diff --git a/dataSync.py b/dataSync.py index 41d3c87335af47a975da15353b8d5682506d35eb..e4f81883b0eb6443977e376f014836de7dc1ffa5 100755 --- a/dataSync.py +++ b/dataSync.py @@ -36,15 +36,15 @@ from pprint import pprint as pp from datetime import datetime as dt import dataFilter -# Experiment Config! Time and paths. +# Configuration of Time and paths ##################################### -# ecotron -- Track-start: 7.7.20 // Light start: 20.7.20 // Dark: 10.7.20 - 20.7.20 -# Project, in, out, start, end +# ecolux -- Track-start: 7.7.20 // Light start: 20.7.20 // Dark: 10.7.20 - 20.7.20 blox=[ -("ecotron","./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), # Block1: 21.7 - 18.8 -("ecotron","./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"), # Block2: 15.9 - 13.10 -# ("ecotrack","~/code/idiv/camtrondata/test/in","~/code/idiv/camtrondata/test/in","2019-06-28 00:00:00","2019-10-16 23:59:00"), -("ecotrack","~/code/idiv/camtrondata/test/in","~/code/idiv/camtrondata/test/out","2019-10-14 00:00:00","2019-10-16 23:59:00"), +("ecolux","./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), # Block1: 21.7 - 18.8 +("ecolux","./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"), # Block2: 15.9 - 13.10 +("foo","~/block-I/01-raw/../..","~/block-I/02-inspect/","2020-07-21 00:00:00","2020-08-18 23:59:00"), +("ecotrack","/opt/virtualBoxVMs/share/block-I/01-raw","/opt/virtualBoxVMs/share/block-I/02-inspect","2019-10-14 00:00:00","2019-10-16 23:59:00"), +## -- add here -- ## ] TIME_FMT='%Y-%m-%d %H:%M:00' @@ -66,7 +66,7 @@ def initFileStructure(): T_COL_Y=3 T_COL_TAG=4 #t_col_signalstrength=5 - if PROJ == "ecotron": + if PROJ == "ecolux": T_MINLEN = 38 # without newline. T_MAXLEN = 40 # ms column varies 1-3. Will be padded later for uniform len T_NCOLS = 6 @@ -90,7 +90,7 @@ def initFileStructure(): # animalfile ########### global A_DELIM, A_MINLEN, A_MAXLEN, A_NCOLS, A_COL_SPECIES, A_COL_SPECIES_IND, A_COL_WEIGHT_WO_TAG_MG, A_COL_TAG - if PROJ == "ecotron": + if PROJ == "ecolux": A_DELIM='\t' A_MINLEN = 25 A_MAXLEN = 30 @@ -109,7 +109,7 @@ def initFileStructure(): A_COL_SPECIES=1 A_COL_SPECIES_IND=0 A_COL_WEIGHT_WO_TAG_MG=2 - A_COL_TAG=5 + A_COL_TAG=5w # out file @@ -202,7 +202,7 @@ skyglowDict = { } def hasLight(): - return PROJ == "ecotron" + return PROJ == "ecolux" def getUID(fileName): """ extract and return unit-id from fileName """ @@ -240,7 +240,7 @@ class Data: """ get lines from all tracklogs, clean, merge and add/rm columns""" pp(f"processing: {fileName}") - with open(INPUTDIR+'/'+fileName) as f: + with open(INPUTDIR+os.sep+fileName) as f: lines = f.readlines() lines = self.clean(lines, T_MINLEN, T_MAXLEN, T_NCOLS, T_DELIM, self.getTrackTime) @@ -253,7 +253,7 @@ class Data: idx_after_ms = TIMESTAMP_LEN + len('999') + len(DELIM) for idx, line in enumerate(lines): - if PROJ in ["ecotron","ecotrack"]: + if PROJ in ["ecolux","ecotrack"]: # pad ms column with zeros to fit three digits cols=line.split(DELIM) ms_digits=len(cols[T_COL_MS]) @@ -271,7 +271,7 @@ class Data: # add columns (date,time,uid,isHabitat) line = line[:idx_after_ms] + DELIM + date + DELIM + time + DELIM + str(uid) + DELIM + str(isHabitat) + line[idx_after_ms:] - if PROJ == "ecotron": + if PROJ == "ecolux": line = line[:line.rindex(DELIM)] # remove signalstrength column lines[idx] = line @@ -304,7 +304,7 @@ class Data: def clean(self, lines, minLen, maxLen, nCols, sep, timeFunc=None): """ remove trailing newline, empty and comments and remove lines before startTime or after endTime""" lines = [l.strip() for l in lines] - lines = [l for l in lines if minLen <= len(l) <= maxLen] # EcoTron: ms are variable in length (0-999), SchrebaTron: ms is padded with zeros + lines = [l for l in lines if minLen <= len(l) <= maxLen] # ecolux: ms are variable in length (0-999), SchrebaTron: ms is padded with zeros lines = [l for l in lines if len(l.split(sep)) == nCols] lines = [l.replace(sep,DELIM) for l in lines] lines = list(filter(lambda q: q and q[0] != '#', lines)) @@ -459,27 +459,28 @@ class Data: os.system(cmd_compress) def xtract(): + # os.chdir(INPUTDIR) for root, dirs, files in os.walk(INPUTDIR): #walk recursively for file in files: if file.endswith(".tgz") or file.endswith(".tar.gz"): print(f"xtracting {file}\n") - cmd_xtract=f"tar -zxvf {file} -C {INPUTDIR}" + cmd_xtract=f"tar -zxvf {INPUTDIR}{os.sep}{file} -C {INPUTDIR}" os.system(cmd_xtract) - return + return def getFileList(): animalFile, skyglowFile, trackFiles = '','',[] - print( "Looking for 'skyglow.log', 'tags' and files containing 'unit-X'" ) + print( "Get files." ) for root, dirs, files in os.walk(INPUTDIR): #walk recursively for file in files: if "unit" in file and not file.endswith(".tgz") and not file.endswith(".tar.gz"): trackFiles.append(file) elif file == "tags": - animalFile = INPUTDIR+'/'+file + animalFile = INPUTDIR+os.sep+file print(f"Found animalFile: {animalFile}") - elif file == "skyglow.log": - skyglowFile = INPUTDIR+'/'+file + elif hasLight() and file == "skyglow.log": + skyglowFile = INPUTDIR+os.sep+file print(f"Found logfile: {skyglowFile}") else: pp(f"Ignoring ({file})") @@ -494,13 +495,9 @@ def getFileList(): return animalFile, skyglowFile, trackFiles def make_rel_abs_path(path): - if path.startswith('../'): - path = os.path.abspath(SCRIPTPATH+path) - elif path.startswith('./'): - path = SCRIPTPATH+path[2:] - elif path.startswith("~/"): - path = os.path.expanduser("~")+path[1:] - + path = os.path.normpath(path) + path = os.path.expanduser(path) + path = os.path.abspath(path) return path def main(): @@ -520,6 +517,7 @@ def main(): os.system(cmd) if __name__ == "__main__" : + known_projects=["ecotrack","ecolux","schrebatron"] print(f" Process raw data for which block?\n {RULER}") for idx, b in enumerate(blox): print(f" {idx+1}) Project {b[0]}") @@ -535,13 +533,16 @@ if __name__ == "__main__" : startTime = dt.strptime(blox[n][3], TIME_FMT) endTime = dt.strptime(blox[n][4], TIME_FMT) - known_projects=["ecotrack","ecotron","schrebatron"] if PROJ not in known_projects: - print(f"Unknown Project '{PROJ}'. Choose one of {known_projects}") - exit() - - except: - print("Insert Block Number..") + raise Exception(f"Unknown Project '{PROJ}'") + + except Exception as e: + print(f"Error: {e}\n") + print("You can add a configuration by adding a line to the 'blox' structure. It has following form:\n") + print(" (Project, inDir, outDir, start, end)\n" ) + print(f"> Project must be in {known_projects}" ) + print(f"> times must have Format like '{dt.now().strftime(TIME_FMT)}'" ) + print(f"> dirs can be relative. eg: ('./data','../data','~/data')") exit() start = time.time()