diff --git a/dataFilter.py b/dataFilter.py index 0a31f81c86dcb0bddd6ec160e0c92eaaf5bd7e3b..39a1cb2f843f898e0a44a1716625067830a56330 100755 --- a/dataFilter.py +++ b/dataFilter.py @@ -16,13 +16,12 @@ import sys, os LEN_TIMESTAMP=len("1602626336") COL_TIMESTAMP=0 COL_TAG=8 -HEADER="#timestamp, milliseconds, date, time, unit, x, y, tagID, species, speciesnumber, weight_without_tag_mg, moon_real_mLux, moon_eco_mLux, skyglow_Lux\n" - def main(arg1,arg2=10): - global inFile, coolOff - inFile=arg1 - coolOff=arg2 + global inFile, coolOff, HEADER + inFile = arg1 + coolOff = arg2 + print(f"start filtering data with {coolOff} second coolOff") with open(inFile) as f: @@ -32,6 +31,8 @@ def main(arg1,arg2=10): filtered, recently = [], [] for line in lines: if line[0] == '#': + if line.startswith("#timestamp"): + HEADER = line continue l = line.split(',') @@ -50,7 +51,7 @@ def main(arg1,arg2=10): #cutOff from End print(recently) for ts, tag in recently: - print(f"cutoff tag {tag}",end='') + print(f"cutoff tag {tag} ... ",end='') i = len(filtered)-1 ndel=0 diff --git a/dataSync.py b/dataSync.py index e2e3b4528a7ecb8325205ee8da02f717fda410b6..eb7e45497ffc998db86bfd3b78d1190dcd8bc08c 100755 --- a/dataSync.py +++ b/dataSync.py @@ -43,14 +43,15 @@ import dataFilter blox=[ ("ecotron","./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), # Block1: 21.7 - 18.8 ("ecotron","./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"), # Block2: 15.9 - 13.10 -("ecotrack","../camtrondata/test/in","../camtrondata/test/out","2019-06-28 00:00:00","2019-10-16 23:59:00"), -# ("test","./test","./test","2020-07-21 00:00:00","2020-08-18 23:59:00"), +# ("ecotrack","~/code/idiv/camtrondata/test/in","~/code/idiv/camtrondata/test/in","2019-06-28 00:00:00","2019-10-16 23:59:00"), +("ecotrack","~/code/idiv/camtrondata/test/in","~/code/idiv/camtrondata/test/out","2019-10-14 00:00:00","2019-10-16 23:59:00"), ] TIME_FMT='%Y-%m-%d %H:%M:00' noTime = dt.fromtimestamp(0) TAG_LEN=len("04B94A7F7288588022") SCRIPTPATH=os.path.dirname(os.path.abspath(__file__))+os.sep +RULER=f"{'#'*100}\n" def initFileStructure(): @@ -231,7 +232,7 @@ class Data: self.endLight = self.getLightTime( self.lightData[-1] ) global OUT_FILE - OUT_FILE = f"{OUTPUTDIR}{os.sep}{PROJ}-data-{self.startTrack.strftime(OUT_FMT)}-{self.endTrack.strftime(OUT_FMT)}" + OUT_FILE = f"{PROJ}-data-{self.startTrack.strftime(OUT_FMT)}-{self.endTrack.strftime(OUT_FMT)}" pp(self.header()) @@ -430,7 +431,7 @@ class Data: def header(self): h= f"# {PROJ} data\n" \ - f"#################################################################################################\n" \ + f"{RULER}" \ f"# len(Track): {len(self.trackData)}\n" \ f"# startTrack: {self.startTrack}\n" \ f"# endTrack: {self.endTrack}\n" \ @@ -439,30 +440,30 @@ class Data: h += f"# len(Light): {len(self.lightData)}\n" \ f"# startLight: {self.startLight}\n" \ f"# endLight: {self.endLight}\n" - h+= f"#################################################################################################\n" \ - f"#timestamp, ms, date, time, unit, habitat, x, y, tag, species, speciesnumber, weight_without_tag[mg]{', moon_real[mLux], moon_eco[mLux], skyglow[Lux]' if hasLight() else ''}\n" \ - f"#################################################################################################\n" - return h + h += RULER + h += f"#timestamp, ms, date, time, unit, habitat, x, y, tag, species, speciesnumber, weight_without_tag[mg]{', moon_real[mLux], moon_eco[mLux], skyglow[Lux]' if hasLight() else ''}\n" + h += RULER + return h def write(self): - + # os.makedirs(os.path.dirname(OUTPUTDIR), exist_ok=True) + os.makedirs(OUTPUTDIR, exist_ok=True) + os.chdir(OUTPUTDIR) pp("write to " + OUT_FILE) - os.makedirs(os.path.dirname(OUT_FILE), exist_ok=True) with open(OUT_FILE, "w") as f: f.writelines(self.header()) self.trackData=map(lambda x:x+'\n', self.trackData) f.writelines(self.trackData) - cmd_compress=f"tar -zcvf {OUT_FILE}.tgz {OUT_FILE}" # ">/dev/null 2>/dev/null" + cmd_compress=f"tar -C {OUTPUTDIR} -zcvf {OUT_FILE}.tgz {OUT_FILE}" # ">/dev/null 2>/dev/null" os.system(cmd_compress) def xtract(): for root, dirs, files in os.walk(INPUTDIR): #walk recursively for file in files: if file.endswith(".tgz") or file.endswith(".tar.gz"): - file=INPUTDIR + os.sep + file print(f"xtracting {file}\n") - cmd_xtract=f"tar -zxvf {file} --directory {INPUTDIR}" + cmd_xtract=f"tar -zxvf {file} -C {INPUTDIR}" os.system(cmd_xtract) return @@ -514,12 +515,12 @@ def main(): dataFilter.main(OUT_FILE) - print("update git to ignore big files (>50MB)") + print("update git to ignore large files") cmd=SCRIPTPATH+"updateGitIgnore.sh" os.system(cmd) if __name__ == "__main__" : - print(f" Process raw data for which block?\n {50*'#'}\n") + print(f" Process raw data for which block?\n {RULER}") for idx, b in enumerate(blox): print(f" {idx+1}) Project {b[0]}") print(f" from: {b[3]}") diff --git a/updateGitIgnore.sh b/updateGitIgnore.sh index d46bea8aafa5eeee93e16f124dca7b7af0bcd241..a890364b68ebf2f847eccf002929bbabca2cefce 100755 --- a/updateGitIgnore.sh +++ b/updateGitIgnore.sh @@ -1,5 +1,7 @@ #!/bin/bash +maxSize=50 + ignore=( "*.txt" "test/*" @@ -8,16 +10,16 @@ ignore=( __pycache__ ) -projdir=`dirname "$(realpath $0)"` -ignorefile="${projdir}/.gitignore" +projdir=`git rev-parse --show-toplevel` +gitignore="${projdir}/.gitignore" cd "${projdir}" -#ignore .txt files in find, since they are ignored anyway -find . -size +50M ! -name '*.txt' | sed 's/\.\///g' > "${ignorefile}" +echo "update gitignore for project in ${projdir}" + +#ignore .txt files in find, since they are ignored anyway. Also ignore .git folder +find . -size +"${maxSize}"M ! -name '*.txt' ! -wholename './.git*' ! -wholename './test/*'| sed 's/\.\///g' > "${gitignore}" for i in "${ignore[@]}" do - echo "$i" >> "${ignorefile}" + echo "$i" >> "${gitignore}" done - -#cd - \ No newline at end of file