From a4fdc2fd3457056cb8dd4a21bc4c577ba187b9b5 Mon Sep 17 00:00:00 2001 From: am0ebe <am0ebe@gmx.de> Date: Wed, 20 Jan 2021 12:31:20 +0100 Subject: [PATCH] [WIP] dataSync: adapt for usage with ecoTrack project (2019). updateGitignore.sh: use paths relative to projDir for compatibility --- .gitignore | 6 +- dataSync.py | 198 +++++++++++++++++++++++++++------------------ updateGitIgnore.sh | 19 +++-- 3 files changed, 132 insertions(+), 91 deletions(-) diff --git a/.gitignore b/.gitignore index 840a972..9b1b3ae 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ -/home/sugu/code/idiv/ecotrondata/block-II/02-inspect/ecotron-data-20200915-20201013 -/home/sugu/code/idiv/ecotrondata/block-II/01-raw/backup-ecoTron-2020-10-28-06-00-1603861202.tgz -/home/sugu/code/idiv/ecotrondata/block-I/02-inspect/ecotron-data-20200721-20200818 +block-II/02-inspect/ecotron-data-20200915-20201013bla +block-II/01-raw/backup-ecoTron-2020-10-28-06-00-1603861202.tgz +block-I/02-inspect/ecotron-data-20200721-20200818 *.txt test/* *# diff --git a/dataSync.py b/dataSync.py index 63c6624..e2e3b45 100755 --- a/dataSync.py +++ b/dataSync.py @@ -36,57 +36,89 @@ from pprint import pprint as pp from datetime import datetime as dt import dataFilter -PROJ = "ecotron" -# PROJ = "schrebatron" - # Experiment Config! Time and paths. -# Track-start: 7.7.20 // Light start: 20.7.20 // Dark: 10.7.20 - 20.7.20 +##################################### +# ecotron -- Track-start: 7.7.20 // Light start: 20.7.20 // Dark: 10.7.20 - 20.7.20 +# Project, in, out, start, end blox=[ -("./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), # Block1: 21.7 - 18.8 -("./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"), # Block2: 15.9 - 13.10 +("ecotron","./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), # Block1: 21.7 - 18.8 +("ecotron","./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"), # Block2: 15.9 - 13.10 +("ecotrack","../camtrondata/test/in","../camtrondata/test/out","2019-06-28 00:00:00","2019-10-16 23:59:00"), +# ("test","./test","./test","2020-07-21 00:00:00","2020-08-18 23:59:00"), ] TIME_FMT='%Y-%m-%d %H:%M:00' noTime = dt.fromtimestamp(0) TAG_LEN=len("04B94A7F7288588022") +SCRIPTPATH=os.path.dirname(os.path.abspath(__file__))+os.sep + +def initFileStructure(): + + # trackfile + ########## + global T_DELIM, TIMESTAMP_LEN, T_COL_MS, T_COL_X, T_COL_Y, T_COL_TAG, T_MINLEN, T_MAXLEN, T_NCOLS + T_DELIM=';' + TIMESTAMP_LEN=10 + #t_col_timestamp=0 + T_COL_MS=1 + T_COL_X=2 + T_COL_Y=3 + T_COL_TAG=4 + #t_col_signalstrength=5 + if PROJ == "ecotron": + T_MINLEN = 38 # without newline. + T_MAXLEN = 40 # ms column varies 1-3. Will be padded later for uniform len + T_NCOLS = 6 + elif PROJ == "ecotrack": #ecotron temperature project 2019 + T_MINLEN = 36 + T_MAXLEN = 38 + T_NCOLS = 5 + + # lightfile + ########## + if hasLight(): + global L_DELIM, L_COL_MOON_REAL, L_COL_MOON_DMX, L_COL_SKYGLOW, L_MINLEN, L_MAXLEN, L_NCOLS + L_DELIM='\t' + L_COL_MOON_REAL=1 + L_COL_MOON_DMX=2 + L_COL_SKYGLOW=3 + L_MINLEN = 61 #2020-07-21 15:15:00 0.0 0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + L_MAXLEN = 76 #2020-07-21 15:09:00 0.0 0 [15, 54, 4, 19, 0, 1, 5, 0, 40, 12, 123, 123] + L_NCOLS = 4 + + # animalfile + ########### + global A_DELIM, A_MINLEN, A_MAXLEN, A_NCOLS, A_COL_SPECIES, A_COL_SPECIES_IND, A_COL_WEIGHT_WO_TAG_MG, A_COL_TAG + if PROJ == "ecotron": + A_DELIM='\t' + A_MINLEN = 25 + A_MAXLEN = 30 + A_NCOLS = 4 + A_COL_SPECIES=0 + A_COL_SPECIES_IND=1 + A_COL_WEIGHT_WO_TAG_MG=2 + A_COL_TAG=3 + elif PROJ == "ecotrack": + A_DELIM=';' + A_MINLEN = 39 + A_MAXLEN = 44 + # A_MINLEN = 35 + # A_MAXLEN = 49 + A_NCOLS = 7 #/w comments + A_COL_SPECIES=1 + A_COL_SPECIES_IND=0 + A_COL_WEIGHT_WO_TAG_MG=2 + A_COL_TAG=5 + + + # out file + ########### + global OUT_FMT, DELIM, COL_UID, COL_TAG + OUT_FMT='%Y%m%d' + DELIM=',' + COL_UID=4 + COL_TAG=8 -# trackfile -T_DELIM=';' -TIMESTAMP_LEN=10 -#t_col_timestamp=0 -T_COL_MS=1 -T_COL_X=2 -T_COL_Y=3 -T_COL_TAG=4 -#t_col_signalstrength=5 -T_MINLEN = 38 # without newline. -T_MAXLEN = 40 # ms column varies 1-3. Will be padded later for uniform len -T_NCOLS = 6 - -# lightfile -L_DELIM='\t' -L_COL_MOON_REAL=1 -L_COL_MOON_DMX=2 -L_COL_SKYGLOW=3 -L_MINLEN = 61 #2020-07-21 15:15:00 0.0 0 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] -L_MAXLEN = 76 #2020-07-21 15:09:00 0.0 0 [15, 54, 4, 19, 0, 1, 5, 0, 40, 12, 123, 123] -L_NCOLS = 4 - -# animalfile -A_DELIM='\t' -A_MINLEN = 25 #guess -A_MAXLEN = 30 -A_NCOLS = 4 -A_COL_SPECIES=0 -A_COL_SPECIES_IND=1 -A_COL_WEIGHT_WO_TAG_MG=2 -A_COL_TAG=3 - -# out file -OUT_FMT='%Y%m%d' -DELIM=',' -COL_UID=4 -COL_TAG=8 #mlx (idx is also dmx-byte value) moonMap = [ 1.409, @@ -169,7 +201,7 @@ skyglowDict = { } def hasLight(): - return PROJ.lower() == "ecotron" + return PROJ == "ecotron" def getUID(fileName): """ extract and return unit-id from fileName """ @@ -182,19 +214,17 @@ class Data: def __init__(self,animalFile, skyglowFile, trackFiles): + self.animalData = self.getAnimalData(animalFile) + self.trackData = [] for file in trackFiles: self.trackData.extend( self.getTrackData(file) ) self.trackData.sort() - #cut off at end - self.startTrack = self.getTrackTime( self.trackData[0] ) self.endTrack = self.getTrackTime( self.trackData[-1] ) - self.animalData = self.getAnimalData(animalFile) - if hasLight(): self.lightData = self.getLightData(skyglowFile) self.startLight = self.getLightTime( self.lightData[0] ) @@ -206,7 +236,7 @@ class Data: pp(self.header()) def getTrackData(self, fileName): - """ get lines from tracklog, clean and add new columns""" + """ get lines from all tracklogs, clean, merge and add/rm columns""" pp(f"processing: {fileName}") with open(INPUTDIR+'/'+fileName) as f: @@ -222,7 +252,7 @@ class Data: idx_after_ms = TIMESTAMP_LEN + len('999') + len(DELIM) for idx, line in enumerate(lines): - if PROJ.upper() == "ECOTRON": + if PROJ in ["ecotron","ecotrack"]: # pad ms column with zeros to fit three digits cols=line.split(DELIM) ms_digits=len(cols[T_COL_MS]) @@ -240,8 +270,8 @@ class Data: # add columns (date,time,uid,isHabitat) line = line[:idx_after_ms] + DELIM + date + DELIM + time + DELIM + str(uid) + DELIM + str(isHabitat) + line[idx_after_ms:] - # remove signalstrength column - line = line[:line.rindex(DELIM)] + if PROJ == "ecotron": + line = line[:line.rindex(DELIM)] # remove signalstrength column lines[idx] = line @@ -264,11 +294,10 @@ class Data: with open(file) as f: lines = f.readlines() - lines = self.clean(lines, A_MINLEN, A_MAXLEN, A_NCOLS, A_DELIM) - + lines = self.clean(lines, A_MINLEN, A_MAXLEN, A_NCOLS, A_DELIM) #TODO: check for duplicate tags and print warning / ignore? - assert (lines), "skyglow: Not valid. Check times / formatting." - + + assert (lines), "tags: Not valid. Check times / formatting." return lines def clean(self, lines, minLen, maxLen, nCols, sep, timeFunc=None): @@ -313,12 +342,10 @@ class Data: #TODO SPEED UP!!! # ttags_not_found=[] - dflt=f'{DELIM}NA{DELIM}NA{DELIM}NA' notFound=0 for idx, tLine in enumerate(self.trackData): found=False ttag = tLine.split(DELIM)[COL_TAG] - for aLine in self.animalData: aLine = aLine.split(DELIM) atag = aLine[A_COL_TAG] @@ -330,7 +357,7 @@ class Data: break; if not found: - self.trackData[idx] += dflt + self.trackData[idx] += f'{DELIM}NA{DELIM}NA{DELIM}NA' notFound+=1 # ttags_not_found.append(ttag) @@ -402,7 +429,7 @@ class Data: return dt.fromtimestamp(timestamp) def header(self): - h= f"# {PROJ.upper()} TRACK DATA\n" \ + h= f"# {PROJ} data\n" \ f"#################################################################################################\n" \ f"# len(Track): {len(self.trackData)}\n" \ f"# startTrack: {self.startTrack}\n" \ @@ -413,7 +440,7 @@ class Data: f"# startLight: {self.startLight}\n" \ f"# endLight: {self.endLight}\n" h+= f"#################################################################################################\n" \ - f"#timestamp, milliseconds, date, time, unit, x, y, tagID, species, speciesnumber, weight_without_tag[mg]{', moon_real[mLux], moon_eco[mLux], skyglow[Lux]' if hasLight() else ''}\n" \ + f"#timestamp, ms, date, time, unit, habitat, x, y, tag, species, speciesnumber, weight_without_tag[mg]{', moon_real[mLux], moon_eco[mLux], skyglow[Lux]' if hasLight() else ''}\n" \ f"#################################################################################################\n" return h @@ -432,7 +459,7 @@ class Data: def xtract(): for root, dirs, files in os.walk(INPUTDIR): #walk recursively for file in files: - if file.endswith(".tgz"): + if file.endswith(".tgz") or file.endswith(".tar.gz"): file=INPUTDIR + os.sep + file print(f"xtracting {file}\n") cmd_xtract=f"tar -zxvf {file} --directory {INPUTDIR}" @@ -442,10 +469,10 @@ def xtract(): def getFileList(): animalFile, skyglowFile, trackFiles = '','',[] - print( "Looking for 'skyglow.log', 'tags' or files containing 'unit-X'" ) + print( "Looking for 'skyglow.log', 'tags' and files containing 'unit-X'" ) for root, dirs, files in os.walk(INPUTDIR): #walk recursively for file in files: - if "unit" in file: + if "unit" in file and not file.endswith(".tgz") and not file.endswith(".tar.gz"): trackFiles.append(file) elif file == "tags": animalFile = INPUTDIR+'/'+file @@ -459,25 +486,27 @@ def getFileList(): if not trackFiles: sys.exit("No tracking data found. (filename containing unit-X, where X is a number)") if hasLight() and not skyglowFile: - sys.exit(f"Couldn't find {skyglowFile}") + sys.exit(f"Couldn't find 'skyglow.log'") if not animalFile: - sys.exit(f"Couldn't find {animalFile}") + sys.exit(f"Couldn't find 'tags'") return animalFile, skyglowFile, trackFiles def make_rel_abs_path(path): if path.startswith('../'): - path= scriptPath()+path[3:] + path = os.path.abspath(SCRIPTPATH+path) elif path.startswith('./'): - path= scriptPath()+path[2:] - return path + path = SCRIPTPATH+path[2:] + elif path.startswith("~/"): + path = os.path.expanduser("~")+path[1:] -def scriptPath(): - return os.path.dirname(os.path.abspath(__file__))+os.sep + return path def main(): + initFileStructure() xtract() + animalFile, skyglowFile, trackFiles = getFileList() data = Data(animalFile,skyglowFile,trackFiles) data.merge() @@ -486,25 +515,34 @@ def main(): dataFilter.main(OUT_FILE) print("update git to ignore big files (>50MB)") - cmd=scriptPath()+"updateGitIgnore.sh" + cmd=SCRIPTPATH+"updateGitIgnore.sh" os.system(cmd) if __name__ == "__main__" : - print(" Process raw data for which block?") + print(f" Process raw data for which block?\n {50*'#'}\n") for idx, b in enumerate(blox): - print(f" {idx+1}) {b[2]} >> {b[3]}") + print(f" {idx+1}) Project {b[0]}") + print(f" from: {b[3]}") + print(f" to: {b[4]}") + print(f" in: {b[1]}") + print(f" out: {b[2]}\n") try: n=int(input())-1 - INPUTDIR = make_rel_abs_path( blox[n][0] ) - OUTPUTDIR = make_rel_abs_path( blox[n][1] ) - startTime = dt.strptime(blox[n][2], TIME_FMT) - endTime = dt.strptime(blox[n][3], TIME_FMT) + PROJ = blox[n][0].lower() + INPUTDIR = make_rel_abs_path( blox[n][1] ) + OUTPUTDIR = make_rel_abs_path( blox[n][2] ) + startTime = dt.strptime(blox[n][3], TIME_FMT) + endTime = dt.strptime(blox[n][4], TIME_FMT) + + known_projects=["ecotrack","ecotron","schrebatron"] + if PROJ not in known_projects: + print(f"Unknown Project '{PROJ}'. Choose one of {known_projects}") + exit() + except: print("Insert Block Number..") exit() - print(" inputdir: ", INPUTDIR) - print("outputdir: ", OUTPUTDIR) start = time.time() main() end = time.time() diff --git a/updateGitIgnore.sh b/updateGitIgnore.sh index ccdc13a..d46bea8 100755 --- a/updateGitIgnore.sh +++ b/updateGitIgnore.sh @@ -1,12 +1,6 @@ #!/bin/bash -dir=`dirname "$(realpath $0)"` -ignorefile="${dir}/.gitignore" - -#ignore .txt files in find, since they are ignored anyway -find "${dir}" -size +50M ! -name '*.txt' | sed 's/\.\///g' > "${ignorefile}" - -list=( +ignore=( "*.txt" "test/*" "*#" #ods-tmp-files @@ -14,7 +8,16 @@ list=( __pycache__ ) -for i in "${list[@]}" +projdir=`dirname "$(realpath $0)"` +ignorefile="${projdir}/.gitignore" +cd "${projdir}" + +#ignore .txt files in find, since they are ignored anyway +find . -size +50M ! -name '*.txt' | sed 's/\.\///g' > "${ignorefile}" + +for i in "${ignore[@]}" do echo "$i" >> "${ignorefile}" done + +#cd - \ No newline at end of file -- GitLab