Skip to content
Snippets Groups Projects
Commit fdfc786e authored by am0ebe's avatar am0ebe
Browse files

fix paths. normalize,expand home,convert to absolute if relative,get correct...

 fix paths. normalize,expand home,convert to absolute if relative,get correct scriptdir + [WIP] rewrite dataFilter.py
parent e3573d01
No related branches found
No related tags found
No related merge requests found
...@@ -4,5 +4,5 @@ block-I/02-inspect/ecotron-data-20200721-20200818 ...@@ -4,5 +4,5 @@ block-I/02-inspect/ecotron-data-20200721-20200818
*.txt *.txt
test/* test/*
*# *#
*skyglow.log skyglow.log
__pycache__ __pycache__
#!/usr/bin/python3 #!/usr/bin/python3
#todo #todo
#
# define trackevent: start,end,dur (end = no trackevent in start+coolOff)
#
# time since last detection
# sensor-crossing. SxE
# > patch-crossing? PxE
#
# dont cutOff first trackEvent from plateau to end # dont cutOff first trackEvent from plateau to end
# #
# print more info: # print more info:
...@@ -10,88 +18,94 @@ ...@@ -10,88 +18,94 @@
# #
# def coolOff() and def cutOff() # def coolOff() and def cutOff()
# think about plateaus? eg. when a beetle chills on sensor for a day? (no patchcrossevent - pce) # think about plateaus? eg. when a beetle chills on sensor for a day? (no patchcrossevent - pce)
#
import sys, os import sys, os
LEN_TIMESTAMP=len("1602626336") LEN_TIMESTAMP=len("1602626336")
COL_TIMESTAMP=0 COL_TIMESTAMP=0
COL_TAG=8 COL_TAG=8
def main(arg1,arg2=10): def cutOff(data):
global inFile, coolOff, HEADER print("cutOff from End")
inFile = arg1 # print(recently)
coolOff = arg2 # for ts, tag in recently:
# print(f"cutoff tag {tag} ... ",end='')
# i = len(filtered)-1
# ndel=0
# while i:
# line=filtered[i]
# timestamp=int(line[:LEN_TIMESTAMP])
# if timestamp - ts <= -coolOff:
# #assert no more deadbeetle before ts...
# break
# if tag == line.split(',')[COL_TAG]:
# del filtered[i]
# ndel += 1
# ts = timestamp - coolOff
# i -= 1
# print(f"done. Deleted {ndel} lines up to ts: {ts} idx: {i}")
return data
print(f"start filtering data with {coolOff} second coolOff")
with open(inFile) as f: def squash(data):
lines = f.readlines() print("squash plateaus")
#filter coolOff coolOff = 3 #also try 2
begin = end = dur = 0
filtered, recently = [], [] filtered, recently = [], []
for line in lines: for line in data:
if line[0] == '#': if line[0] == '#':
if line.startswith("#timestamp"): if line.startswith("#timestamp"):
HEADER = line HEADER = line
#prepend to data
continue continue
l = line.split(',') l = line.split(',')
timestamp = int(l[COL_TIMESTAMP]) timestamp = int(l[COL_TIMESTAMP])
tag = l[ COL_TAG ] tag = l[ COL_TAG ]
#delete old entries from beginning of list # count as track if tag hasnt been detected for coolOff seconds
if len(recently) and timestamp - recently[0][0] >= coolOff: if len(recently):
del recently[0] while timestamp - recently[0][0] >= coolOff:
end = recently[0]
del recently[0]
#add tag if its not in list # add tag if its not in list
if all(tag != t2 for t1,t2 in recently): if all(tag != recent_tag for recent_ts,recent_tag in recently):
recently.append((timestamp,tag)) recently.append((timestamp,tag))
filtered.append(line) filtered.append(line)
#cutOff from End return data
print(recently)
for ts, tag in recently:
print(f"cutoff tag {tag} ... ",end='')
i = len(filtered)-1
ndel=0
while i:
line=filtered[i]
timestamp=int(line[:LEN_TIMESTAMP])
if timestamp - ts <= -coolOff:
#assert no more deadbeetle before ts...
break
if tag == line.split(',')[COL_TAG]:
del filtered[i]
ndel += 1
ts = timestamp - coolOff
i -= 1
print(f"done. Deleted {ndel} lines up to ts: {ts} idx: {i}")
# [print(f, end='') for f in filtered]
write(filtered)
def write(data): def write(outFile, data):
outFile=f"{inFile}-cutOff-coolOff-{coolOff}"
print(f"writing to {outFile}") print(f"writing to {outFile}")
with open(outFile, "w") as f: with open(outFile, "w") as f:
f.writelines(HEADER)
f.writelines(data) f.writelines(data)
cmd_compress=f"tar -zcvf {outFile}.tgz {outFile}" # ">/dev/null 2>/dev/null" cmd_compress=f"tar -zcvf {outFile}.tgz {outFile}" # ">/dev/null 2>/dev/null"
os.system(cmd_compress) os.system(cmd_compress)
def main(filename):
with open(filename) as f:
data = f.readlines()
data = squash( data )
data = cutOff( data )
# write(filename + "-filtered", data)
if __name__ == "__main__" : if __name__ == "__main__" :
main("ecolux-data-20200915-20200916")
exit()
if len(sys.argv) == 1: if len(sys.argv) == 1:
this = os.path.basename(__file__) this = os.path.basename(__file__)
sys.exit(f' "Usage: {this} dataFile [coolOff]\n\n' sys.exit( f'Usage: {this} dataFile\n\n'
f' Cooloff defines the time in seconds that needs to pass before a tag can be detected again. Default = 10\n' f' Squash plateaus\n'
f' Will also cut off tags at end. (eg tag lying on sensor)') f' Cut off tags at end. (eg tag lying on sensor)')
main(sys.argv[1:]) main(sys.argv[1])
# else:
# print("called as a module via import")
...@@ -42,16 +42,16 @@ import dataFilter ...@@ -42,16 +42,16 @@ import dataFilter
blox=[ blox=[
("ecolux","./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), # Block1: 21.7 - 18.8 ("ecolux","./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), # Block1: 21.7 - 18.8
("ecolux","./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"), # Block2: 15.9 - 13.10 ("ecolux","./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"), # Block2: 15.9 - 13.10
("foo","~/block-I/01-raw/../..","~/block-I/02-inspect/","2020-07-21 00:00:00","2020-08-18 23:59:00"),
("ecotrack","/opt/virtualBoxVMs/share/block-I/01-raw","/opt/virtualBoxVMs/share/block-I/02-inspect","2019-10-14 00:00:00","2019-10-16 23:59:00"), ("ecotrack","/opt/virtualBoxVMs/share/block-I/01-raw","/opt/virtualBoxVMs/share/block-I/02-inspect","2019-10-14 00:00:00","2019-10-16 23:59:00"),
("ecolux","./test","./test","2020-09-15 00:00:00","2020-09-16 23:59:00"),
## -- add here -- ## ## -- add here -- ##
] ]
TIME_FMT='%Y-%m-%d %H:%M:00' TIME_FMT='%Y-%m-%d %H:%M:00'
noTime = dt.fromtimestamp(0) noTime = dt.fromtimestamp(0)
TAG_LEN=len("04B94A7F7288588022") TAG_LEN=len("04B94A7F7288588022")
SCRIPTPATH=os.path.dirname(os.path.abspath(__file__))+os.sep
RULER=f"{'#'*100}\n" RULER=f"{'#'*100}\n"
rootPath = os.path.abspath( os.path.dirname( __file__ ) ) #uses cwd(), which is set to scriptdir at start
def initFileStructure(): def initFileStructure():
...@@ -109,7 +109,7 @@ def initFileStructure(): ...@@ -109,7 +109,7 @@ def initFileStructure():
A_COL_SPECIES=1 A_COL_SPECIES=1
A_COL_SPECIES_IND=0 A_COL_SPECIES_IND=0
A_COL_WEIGHT_WO_TAG_MG=2 A_COL_WEIGHT_WO_TAG_MG=2
A_COL_TAG=5w A_COL_TAG=5
# out file # out file
...@@ -446,7 +446,6 @@ class Data: ...@@ -446,7 +446,6 @@ class Data:
return h return h
def write(self): def write(self):
# os.makedirs(os.path.dirname(OUTPUTDIR), exist_ok=True)
os.makedirs(OUTPUTDIR, exist_ok=True) os.makedirs(OUTPUTDIR, exist_ok=True)
os.chdir(OUTPUTDIR) os.chdir(OUTPUTDIR)
pp("write to " + OUT_FILE) pp("write to " + OUT_FILE)
...@@ -459,7 +458,6 @@ class Data: ...@@ -459,7 +458,6 @@ class Data:
os.system(cmd_compress) os.system(cmd_compress)
def xtract(): def xtract():
# os.chdir(INPUTDIR)
for root, dirs, files in os.walk(INPUTDIR): #walk recursively for root, dirs, files in os.walk(INPUTDIR): #walk recursively
for file in files: for file in files:
if file.endswith(".tgz") or file.endswith(".tar.gz"): if file.endswith(".tgz") or file.endswith(".tar.gz"):
...@@ -494,11 +492,12 @@ def getFileList(): ...@@ -494,11 +492,12 @@ def getFileList():
return animalFile, skyglowFile, trackFiles return animalFile, skyglowFile, trackFiles
def make_rel_abs_path(path): def make_rel_abs_path(path):
path = os.path.normpath(path)
path = os.path.expanduser(path) path = os.path.expanduser(path)
path = os.path.abspath(path) if not os.path.isabs(path):
return path path = os.path.abspath(rootPath+os.sep+path)
return os.path.normpath(path)
def main(): def main():
...@@ -513,8 +512,7 @@ def main(): ...@@ -513,8 +512,7 @@ def main():
dataFilter.main(OUT_FILE) dataFilter.main(OUT_FILE)
if( platform.system() == "Linux" ): if( platform.system() == "Linux" ):
cmd=SCRIPTPATH+"updateGitIgnore.sh" os.system(rootPath+"/updateGitIgnore.sh")
os.system(cmd)
if __name__ == "__main__" : if __name__ == "__main__" :
known_projects=["ecotrack","ecolux","schrebatron"] known_projects=["ecotrack","ecolux","schrebatron"]
...@@ -541,8 +539,8 @@ if __name__ == "__main__" : ...@@ -541,8 +539,8 @@ if __name__ == "__main__" :
print("You can add a configuration by adding a line to the 'blox' structure. It has following form:\n") print("You can add a configuration by adding a line to the 'blox' structure. It has following form:\n")
print(" (Project, inDir, outDir, start, end)\n" ) print(" (Project, inDir, outDir, start, end)\n" )
print(f"> Project must be in {known_projects}" ) print(f"> Project must be in {known_projects}" )
print(f"> times must have Format like '{dt.now().strftime(TIME_FMT)}'" )
print(f"> dirs can be relative. eg: ('./data','../data','~/data')") print(f"> dirs can be relative. eg: ('./data','../data','~/data')")
print(f"> times must have Format like '{dt.now().strftime(TIME_FMT)}'" )
exit() exit()
start = time.time() start = time.time()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment