From fdfc786ec849bee5caee1577b7f2973212c2d5dc Mon Sep 17 00:00:00 2001 From: am0ebe <am0ebe@gmx.de> Date: Wed, 27 Jan 2021 18:25:03 +0100 Subject: [PATCH] fix paths. normalize,expand home,convert to absolute if relative,get correct scriptdir + [WIP] rewrite dataFilter.py --- .gitignore | 2 +- dataFilter.py | 114 ++++++++++++++++++++++++++++---------------------- dataSync.py | 20 ++++----- 3 files changed, 74 insertions(+), 62 deletions(-) diff --git a/.gitignore b/.gitignore index 9b1b3ae..0f6f294 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,5 @@ block-I/02-inspect/ecotron-data-20200721-20200818 *.txt test/* *# -*skyglow.log +skyglow.log __pycache__ diff --git a/dataFilter.py b/dataFilter.py index 39a1cb2..93c20c2 100755 --- a/dataFilter.py +++ b/dataFilter.py @@ -1,6 +1,14 @@ #!/usr/bin/python3 #todo +# +# define trackevent: start,end,dur (end = no trackevent in start+coolOff) +# +# time since last detection + +# sensor-crossing. SxE +# > patch-crossing? PxE +# # dont cutOff first trackEvent from plateau to end # # print more info: @@ -10,88 +18,94 @@ # # def coolOff() and def cutOff() # think about plateaus? eg. when a beetle chills on sensor for a day? (no patchcrossevent - pce) - +# import sys, os LEN_TIMESTAMP=len("1602626336") COL_TIMESTAMP=0 COL_TAG=8 -def main(arg1,arg2=10): - global inFile, coolOff, HEADER - inFile = arg1 - coolOff = arg2 +def cutOff(data): + print("cutOff from End") + # print(recently) + # for ts, tag in recently: + # print(f"cutoff tag {tag} ... ",end='') + + # i = len(filtered)-1 + # ndel=0 + # while i: + # line=filtered[i] + # timestamp=int(line[:LEN_TIMESTAMP]) + # if timestamp - ts <= -coolOff: + # #assert no more deadbeetle before ts... + # break + # if tag == line.split(',')[COL_TAG]: + # del filtered[i] + # ndel += 1 + # ts = timestamp - coolOff + + # i -= 1 + + # print(f"done. Deleted {ndel} lines up to ts: {ts} idx: {i}") + return data + - print(f"start filtering data with {coolOff} second coolOff") - with open(inFile) as f: - lines = f.readlines() +def squash(data): + print("squash plateaus") - #filter coolOff + coolOff = 3 #also try 2 + begin = end = dur = 0 filtered, recently = [], [] - for line in lines: + for line in data: if line[0] == '#': if line.startswith("#timestamp"): HEADER = line + #prepend to data continue l = line.split(',') timestamp = int(l[COL_TIMESTAMP]) tag = l[ COL_TAG ] - #delete old entries from beginning of list - if len(recently) and timestamp - recently[0][0] >= coolOff: - del recently[0] + # count as track if tag hasnt been detected for coolOff seconds + if len(recently): + while timestamp - recently[0][0] >= coolOff: + end = recently[0] + del recently[0] - #add tag if its not in list - if all(tag != t2 for t1,t2 in recently): + # add tag if its not in list + if all(tag != recent_tag for recent_ts,recent_tag in recently): recently.append((timestamp,tag)) filtered.append(line) - #cutOff from End - print(recently) - for ts, tag in recently: - print(f"cutoff tag {tag} ... ",end='') - - i = len(filtered)-1 - ndel=0 - while i: - line=filtered[i] - timestamp=int(line[:LEN_TIMESTAMP]) - if timestamp - ts <= -coolOff: - #assert no more deadbeetle before ts... - break - - if tag == line.split(',')[COL_TAG]: - del filtered[i] - ndel += 1 - ts = timestamp - coolOff - - i -= 1 - - print(f"done. Deleted {ndel} lines up to ts: {ts} idx: {i}") - - # [print(f, end='') for f in filtered] - write(filtered) + return data -def write(data): - outFile=f"{inFile}-cutOff-coolOff-{coolOff}" +def write(outFile, data): print(f"writing to {outFile}") with open(outFile, "w") as f: - f.writelines(HEADER) f.writelines(data) cmd_compress=f"tar -zcvf {outFile}.tgz {outFile}" # ">/dev/null 2>/dev/null" os.system(cmd_compress) +def main(filename): + with open(filename) as f: + data = f.readlines() + + data = squash( data ) + data = cutOff( data ) + + # write(filename + "-filtered", data) + if __name__ == "__main__" : + main("ecolux-data-20200915-20200916") + exit() if len(sys.argv) == 1: this = os.path.basename(__file__) - sys.exit(f' "Usage: {this} dataFile [coolOff]\n\n' - f' Cooloff defines the time in seconds that needs to pass before a tag can be detected again. Default = 10\n' - f' Will also cut off tags at end. (eg tag lying on sensor)') - - main(sys.argv[1:]) -# else: -# print("called as a module via import") + sys.exit( f'Usage: {this} dataFile\n\n' + f' Squash plateaus\n' + f' Cut off tags at end. (eg tag lying on sensor)') + + main(sys.argv[1]) diff --git a/dataSync.py b/dataSync.py index e4f8188..3b1fdc6 100755 --- a/dataSync.py +++ b/dataSync.py @@ -42,16 +42,16 @@ import dataFilter blox=[ ("ecolux","./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), # Block1: 21.7 - 18.8 ("ecolux","./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"), # Block2: 15.9 - 13.10 -("foo","~/block-I/01-raw/../..","~/block-I/02-inspect/","2020-07-21 00:00:00","2020-08-18 23:59:00"), ("ecotrack","/opt/virtualBoxVMs/share/block-I/01-raw","/opt/virtualBoxVMs/share/block-I/02-inspect","2019-10-14 00:00:00","2019-10-16 23:59:00"), +("ecolux","./test","./test","2020-09-15 00:00:00","2020-09-16 23:59:00"), ## -- add here -- ## ] TIME_FMT='%Y-%m-%d %H:%M:00' noTime = dt.fromtimestamp(0) TAG_LEN=len("04B94A7F7288588022") -SCRIPTPATH=os.path.dirname(os.path.abspath(__file__))+os.sep RULER=f"{'#'*100}\n" +rootPath = os.path.abspath( os.path.dirname( __file__ ) ) #uses cwd(), which is set to scriptdir at start def initFileStructure(): @@ -109,7 +109,7 @@ def initFileStructure(): A_COL_SPECIES=1 A_COL_SPECIES_IND=0 A_COL_WEIGHT_WO_TAG_MG=2 - A_COL_TAG=5w + A_COL_TAG=5 # out file @@ -446,7 +446,6 @@ class Data: return h def write(self): - # os.makedirs(os.path.dirname(OUTPUTDIR), exist_ok=True) os.makedirs(OUTPUTDIR, exist_ok=True) os.chdir(OUTPUTDIR) pp("write to " + OUT_FILE) @@ -459,7 +458,6 @@ class Data: os.system(cmd_compress) def xtract(): - # os.chdir(INPUTDIR) for root, dirs, files in os.walk(INPUTDIR): #walk recursively for file in files: if file.endswith(".tgz") or file.endswith(".tar.gz"): @@ -494,11 +492,12 @@ def getFileList(): return animalFile, skyglowFile, trackFiles + def make_rel_abs_path(path): - path = os.path.normpath(path) path = os.path.expanduser(path) - path = os.path.abspath(path) - return path + if not os.path.isabs(path): + path = os.path.abspath(rootPath+os.sep+path) + return os.path.normpath(path) def main(): @@ -513,8 +512,7 @@ def main(): dataFilter.main(OUT_FILE) if( platform.system() == "Linux" ): - cmd=SCRIPTPATH+"updateGitIgnore.sh" - os.system(cmd) + os.system(rootPath+"/updateGitIgnore.sh") if __name__ == "__main__" : known_projects=["ecotrack","ecolux","schrebatron"] @@ -541,8 +539,8 @@ if __name__ == "__main__" : print("You can add a configuration by adding a line to the 'blox' structure. It has following form:\n") print(" (Project, inDir, outDir, start, end)\n" ) print(f"> Project must be in {known_projects}" ) - print(f"> times must have Format like '{dt.now().strftime(TIME_FMT)}'" ) print(f"> dirs can be relative. eg: ('./data','../data','~/data')") + print(f"> times must have Format like '{dt.now().strftime(TIME_FMT)}'" ) exit() start = time.time() -- GitLab