diff --git a/dataFilterCooloff.py b/dataFilterCooloff.py index 87467b92b78280d7bd27aed941ba985d7e945103..49187198584d95cbf6d8cc24208db78274a45ba3 100755 --- a/dataFilterCooloff.py +++ b/dataFilterCooloff.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 #todo -# plateaus? when beetle chills on sensor for a day? (no patchcrossevent - pce) +# think about plateaus? eg. when a beetle chills on sensor for a day? (no patchcrossevent - pce) import sys, os diff --git a/dataSync.py b/dataSync.py index ecd15997481369083dd65c957d65507be0418177..8d0afe7a15e21510f0b8d072643bb45784ed3ebd 100755 --- a/dataSync.py +++ b/dataSync.py @@ -2,25 +2,19 @@ # clean, sort, merge raw data # output should be one huge table with all necessary fields for further processing/vizualization # +# files are 'skyglow.log','tags' and trackfiles (containing 'unit-x' in name, where x is a number +# ############################ # TODO ############################ -# use NA instead of X if not found (R-compatible) -# add isHabitat col dependent on Pos -# cutoff continues track until end (dead beetle on sensor) -# merge H's species -# -# parse moonmap and skyglowdict from skyglow Project sources (submodule ... ?) -# -# -# include temperature and humidity data? -# integrate with ST +# > parse moonmap and skyglowdict from skyglow Project sources (submodule ... ?) +# > integrate with ST # > make Skyglow optional -> TEST # > ST-Filenames should include 'unit-X'! +# > how to process skylog error in merge() +# > include temperature and humidity data? # -# how to process skylog error in merge() -# -# Speed Up! (takes 16minutes for 8mio lines...) +# > Speed Up! (takes 16minutes for 8mio lines...) # 1. Threads # > see https://realpython.com/python-concurrency/#multiprocessing-version # use threads for @@ -28,22 +22,11 @@ # > merging (intermediate) # * parse chunks of trackData. call sort at end # * calc indices of data dependent on numThreads -# # 2. rewrite animalmerge for loops # 3. Consider using Pandas, NumPy, Cython, Pypy, Koalas, inline C++/C/Bash, sql (SQLAlchemy) # -# how to profile to find bottlenecks? -# -# for lists: use append() instead of + / += operators -# -# Experiment Times -#################### -# Track start 7.7 -# Light start 20.7 -# Dark: 10.7 - 20.7 -# Block1: 21.7 - 18.8 -# Block2: 15.9 - 13.10 -# +# > how to profile to find bottlenecks? +# > for lists: use append() instead of + / += operators import sys, getopt, os, re, datetime, string, time from pprint import pprint as pp @@ -52,12 +35,14 @@ from datetime import datetime as dt PROJ = "ecotron" # PROJ = "schrebatron" +# Experiment Times! +################### +# Track-start: 7.7 // Light start: 20.7 // Dark: 10.7 - 20.7 +blox=[("./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), # Block1: 21.7 - 18.8 + ("./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00")] # Block2: 15.9 - 13.10 + TIME_FMT='%Y-%m-%d %H:%M:00' noTime = dt.fromtimestamp(0) -startTime = dt.strptime('2020-07-21 00:00:00', TIME_FMT) -endTime = dt.strptime('2020-08-18 23:59:00', TIME_FMT) -# startTime = endTime = noTime - TAG_LEN=len("04B94A7F7288588022") # trackfile @@ -478,6 +463,11 @@ def getFileList(): return animalFile, skyglowFile, trackFiles +def make_rel_abs_path(path): + if path.startswith('.'): + path=os.path.dirname(os.path.abspath(__file__))+os.sep+path + return path + def main(): xtract() @@ -487,33 +477,25 @@ def main(): data.write() if __name__ == "__main__" : - if len(sys.argv) == 1: - this = os.path.basename(__file__) - sys.exit(f'Usage:\n' - f' {this} <dir> - search files and write to <dir>\n' - f' {this} <in> <out> - search files in <in> and write to <out>\n\n' - f" files are 'skyglow.log','tags' and trackfiles (containing 'unit-x' in name, where x is a number)") - elif len(sys.argv) == 2: - INPUTDIR = sys.argv[1] - OUTPUTDIR = INPUTDIR - else: - INPUTDIR = sys.argv[1] - OUTPUTDIR = sys.argv[2] - - print(" INPUTDIR: ", INPUTDIR) - print("OUTPUTDIR: ", OUTPUTDIR) - - #Block-I - # startTime = dt.strptime('2020-07-21 00:00:00', TIME_FMT) - # endTime = dt.strptime('2020-08-18 23:59:00', TIME_FMT) - #Block-II - # startTime = dt.strptime('2020-09-15 00:00:00', TIME_FMT) - # endTime = dt.strptime('2020-10-13 23:59:00', TIME_FMT) - - #ask for block - # nBlock=input("choose") - + print(" Process raw data for which block?") + for idx, b in enumerate(blox): + print(f" {idx+1}) {b[2]} >> {b[3]}") + try: + n=int(input())-1 + INPUTDIR = make_rel_abs_path( blox[n][0] ) + OUTPUTDIR = make_rel_abs_path( blox[n][1] ) + startTime = dt.strptime(blox[n][2], TIME_FMT) + endTime = dt.strptime(blox[n][3], TIME_FMT) + except: + print("Insert Block Number..") + exit() + + print(" inputdir: ", INPUTDIR) + print("outputdir: ", OUTPUTDIR) start = time.time() main() end = time.time() print(f"Took {end - start} seconds") + + #call dataFilterCoolOff? + #call updateGitIgnore.sh? \ No newline at end of file