-
am0ebe authored
> only full raw data in ecotrack/all/01-raw > all other ecotrack/{I,II,III} only link to this -> should speed up git and reduce total size and redundancy > getDates.sh: add earliest latest date for all units write to file add to dataSync ------------------------------- All the squashed old commits below: ------------------------------- commit baafb4ad (HEAD -> dev, tag: ecotrack-data, tag: ecolux-data, origin/dev, master) Author: am0ebe <am0ebe@gmx.de> Date: Thu Mar 18 17:57:32 2021 +0100 recalc *ALL* data + ecotrackimprovement + ... > see below unknown-tags.sh: fix error message. always call both functions and write to files (tags_info and tags_unknowntags) update-gitignore.sh: change name dataSync: calc mean temperature for all 4 sensors filter temp below threshold (cur=12°C) datafilter: fix format bug where zeros where prepended to species col duplicate func make_rel_abs_path (in dataSync and dataFilter) to avoid import error due to circular dependency commit 84cafbb7 Author: am0ebe <am0ebe@gmx.de> Date: Wed Mar 17 18:00:05 2021 +0100 fix config. filter: infer proj from filename. add ectrack data I,II,III. start temp: calc mean for commit 68f4680e Author: am0ebe <am0ebe@gmx.de> Date: Tue Mar 16 15:28:27 2021 +0100 continue conf.json and datasync/filter implementation. filter: derive proj for blox from header commit b419a53b Author: am0ebe <am0ebe@gmx.de> Date: Mon Mar 15 14:48:05 2021 +0100 ignore .csv, add ecotrack data, parse conf.json > add blox and project configuration. commit 99322855 Author: am0ebe <am0ebe@gmx.de> Date: Thu Mar 11 18:29:56 2021 +0100 recalc *ALL*. adjust unknowntags script > create tagInfo and unknown-tags automatically from datasync commit a43a6e59 Author: am0ebe <am0ebe@gmx.de> Date: Thu Mar 11 16:01:30 2021 +0100 put blox-configuration into file. unknown-tags.sh takes dir as arg and is beong called properly from dataSync commit 75f647a9 Author: am0ebe <am0ebe@gmx.de> Date: Thu Mar 11 14:22:05 2021 +0100 add ecotrack data. rename data dirs commit ac955cb8 Author: am0ebe <am0ebe@gmx.de> Date: Thu Mar 11 13:35:30 2021 +0100 add ecotrack support | fix time bug > see below fix daylightsavingtimes bug: > set timedelta to hours not days ecotrack support: > 4 sensors per unit > merge on unit and habitat and time! > sensors log time once every 5 minutes. find closes trackdetection > calcHabitat() to determine what habitat depending on x,y and PROJ var > initTempData() get temp data and store in list of list of tuples -> tempData[unit][habitat] returns (time,temp) > mergeTemp
am0ebe authored> only full raw data in ecotrack/all/01-raw > all other ecotrack/{I,II,III} only link to this -> should speed up git and reduce total size and redundancy > getDates.sh: add earliest latest date for all units write to file add to dataSync ------------------------------- All the squashed old commits below: ------------------------------- commit baafb4ad (HEAD -> dev, tag: ecotrack-data, tag: ecolux-data, origin/dev, master) Author: am0ebe <am0ebe@gmx.de> Date: Thu Mar 18 17:57:32 2021 +0100 recalc *ALL* data + ecotrackimprovement + ... > see below unknown-tags.sh: fix error message. always call both functions and write to files (tags_info and tags_unknowntags) update-gitignore.sh: change name dataSync: calc mean temperature for all 4 sensors filter temp below threshold (cur=12°C) datafilter: fix format bug where zeros where prepended to species col duplicate func make_rel_abs_path (in dataSync and dataFilter) to avoid import error due to circular dependency commit 84cafbb7 Author: am0ebe <am0ebe@gmx.de> Date: Wed Mar 17 18:00:05 2021 +0100 fix config. filter: infer proj from filename. add ectrack data I,II,III. start temp: calc mean for commit 68f4680e Author: am0ebe <am0ebe@gmx.de> Date: Tue Mar 16 15:28:27 2021 +0100 continue conf.json and datasync/filter implementation. filter: derive proj for blox from header commit b419a53b Author: am0ebe <am0ebe@gmx.de> Date: Mon Mar 15 14:48:05 2021 +0100 ignore .csv, add ecotrack data, parse conf.json > add blox and project configuration. commit 99322855 Author: am0ebe <am0ebe@gmx.de> Date: Thu Mar 11 18:29:56 2021 +0100 recalc *ALL*. adjust unknowntags script > create tagInfo and unknown-tags automatically from datasync commit a43a6e59 Author: am0ebe <am0ebe@gmx.de> Date: Thu Mar 11 16:01:30 2021 +0100 put blox-configuration into file. unknown-tags.sh takes dir as arg and is beong called properly from dataSync commit 75f647a9 Author: am0ebe <am0ebe@gmx.de> Date: Thu Mar 11 14:22:05 2021 +0100 add ecotrack data. rename data dirs commit ac955cb8 Author: am0ebe <am0ebe@gmx.de> Date: Thu Mar 11 13:35:30 2021 +0100 add ecotrack support | fix time bug > see below fix daylightsavingtimes bug: > set timedelta to hours not days ecotrack support: > 4 sensors per unit > merge on unit and habitat and time! > sensors log time once every 5 minutes. find closes trackdetection > calcHabitat() to determine what habitat depending on x,y and PROJ var > initTempData() get temp data and store in list of list of tuples -> tempData[unit][habitat] returns (time,temp) > mergeTemp
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
dataFilter.py 5.86 KiB
#!/usr/bin/python3
#
# Todo:
# > (opt) use global vars for Col-indices. each function that changes 'em updates 'em
#
import sys, os, json
from datetime import datetime as dt
cutOff = True
ncut = 0
coolOff = 5
DELIM=","
IN_FMT="%Y-%m-%d %H:%M:%S"
OUT_FMT="%d-%m-%Y %H:%M:%S"
confFileName="./conf.json"
def squash(data):
print("squash plateaus")
col_timestamp=0
col_habitat=4
col_x=6
col_y=7
col_tag=8
nfast=0
recent = []
filtered = []
omit_col_begin=4 #omit ts,ms,date,time
for line in data:
if line[0] == '#' or line[0] == '\n':
if line.startswith("#timestamp"):
global header
header="begin, end, dur, last-detect, block, SXE, PEE, PXE," + DELIM.join(line.split(',')[omit_col_begin:])
continue
l = line.split(DELIM)
timestamp = int(l[col_timestamp])
tag = l[ col_tag ]
x = l[ col_x ]
y = l[ col_y ]
tagFound = False
i=0
while i < len(recent):
samePos=(recent[i][5] == x and recent[i][6] == y)
recentBeetle=recent[i][7] == tag
fastBeetle = recentBeetle and not samePos
ago = timestamp - recent[i][1]
# print(i,":",recent[i])
# print(f"tag: {tag}| timestamp: {timestamp}| ago:{ago}| coolOff:{coolOff}| fastBeetle:{fastBeetle}| {x},{y}:{recent[i][5]},{recent[i][6]}|recentBeetle:{recentBeetle}")
if ago > coolOff or fastBeetle:
if fastBeetle:
nfast += 1
## add new squashed / trackevent
filtered.append(DELIM.join(map(str,recent[i])))
del recent[i] # del old
continue # dont incr idx!
if recentBeetle:
## update
tagFound = True
dur = timestamp-recent[i][0]
recent[i][1] = timestamp
recent[i][2] = dur
i +=1
if not tagFound:
## add recent
recent.append([timestamp,timestamp,0]+l[omit_col_begin:])
for r in recent:
## cutOff
if cutOff:
global ncut
ncut += r[2]
r[1] = r[0]
r[2] = 0
filtered.append(DELIM.join(map(str,r)))
print(f" cutOff: {ncut}")
print(f" nfast: {nfast}")
return filtered
def add_cols( data ):
print("add cols")
#after insertions
col_start = 0
col_end = 1
col_lastdetect = 3
col_block = 4
col_sxe = 5
col_pee = 6
col_pxe = 7
col_habitat = 9
col_x = 10
col_y = 11
known = {} #{tag: line}
npee = 0
nsxe = 0
npxe = 0
for i,line in enumerate(data):
l = line.split(DELIM)
tag = l[7]
##block
block = "NA"
now = dt.fromtimestamp(int( l[0] ))
for b in blox:
if b[1] <= now < b[2]:
block=b[0]
continue
if tag not in known:
lastdetect = "NA"
sxe = False
pee = False
pxe = False
else:
##lastdetect
last_end = int(known[tag][col_end])
this_start = int(l[col_start])
lastdetect = this_start - last_end
##sxe
last_pos = known[tag][col_x], known[tag][col_y]
this_pos = l[5], l[6]
sxe = last_pos != this_pos
if sxe:
nsxe+=1
##pee
last_habitat = known[tag][col_habitat]
this_habitat = l[4]
pee = last_habitat != this_habitat and this_habitat != '0'
if pee:
npee+=1
##pxe
pxe = pee and last_habitat != '0'
if pxe:
npxe +=1
l.insert(col_lastdetect,lastdetect)
l.insert(col_block, block)
l.insert(col_sxe, sxe)
l.insert(col_pee, pee)
l.insert(col_pxe, pxe)
known[tag] = l
data[i] = DELIM.join(map(str,l))
print(f" nsxe: {nsxe}")
print(f" npee: {npee}")
print(f" npxe: {npxe}")
return data
def format(data):
"sort and human-readable times"
print("format")
#sort by end-time
data.sort(key=lambda l: l.split(DELIM)[1])
for i,d in enumerate(data):
d=d.split(DELIM)
d[0]= dt.fromtimestamp(int( d[0] )).strftime(OUT_FMT) #convert timestamp to OUT_FMT
d[1]= dt.fromtimestamp(int( d[1] )).strftime(OUT_FMT)
d[2]=d[2].zfill(4) # dur
if d[3] != "NA" :
d[3]=d[3].zfill(5) # lastdetect
d[7]=d[7].zfill(1) # unit
data[i]=DELIM.join(map(str,d))
return data
def write(outFile, data):
outFile += f"-cut-{ncut}-filtered"
print(f"writing to {outFile}")
with open(outFile, "w") as f:
f.writelines(header)
f.writelines(data)
print("compress")
cmd_compress=f"tar -zcvf {outFile}.tgz {outFile}" # ">/dev/null 2>/dev/null"
os.system(cmd_compress)
def make_rel_abs_path(path):
#! the same method exists in dataFilter and dataSync. Don't forget to update em both if making changes!
global rootPath
rootPath = os.path.abspath( os.path.dirname( __file__ ) ) #uses cwd(), which is set to scriptdir at start
path = os.path.expanduser(path)
if not os.path.isabs(path):
path = os.path.abspath(rootPath+os.sep+path)
return os.path.normpath(path)
def readConf(PROJ):
""" -.- """
global blox
confFile = make_rel_abs_path(confFileName)
with open(confFile, 'r') as f:
blox = json.load(f)["blox"][PROJ]
for b in blox:
b[1]=dt.strptime(f"{b[1]} 00:00:00",IN_FMT)
b[2]=dt.strptime(f"{b[2]} 00:00:00",IN_FMT)
def main(filename):
proj = filename.split(os.sep)[-1].split("-")[0]
readConf(proj)
with open(filename) as f:
data = f.readlines()
data = squash( data )
data = add_cols( data )
data = format( data )
write(filename, data)
if __name__ == "__main__":
if len(sys.argv) == 1:
this = os.path.basename(__file__)
sys.exit( f'Usage: {this} dataFile coolOff cutOff\n\n'
f' another detection for a given tag and position\n'
f' * WITHIN coolOff seconds will be added to the last plateau\n'
f' * AFTER coolOff seconds will be used for a new plateau\n'
f' default: {coolOff}\n'
f''
f' cutOff plateau which goes all the way to the end.\n'
f' eg beetle / lost tag on sensor\n'
f' default: {cutOff}\n'
f''
f' > squash plateaus\n'
f' > cutOff tags from end [optional]\n'
f' > add block according to time\n'
f' > add time-since-last-detection\n'
f' > add Patch Cross Event (PXE)\n'
f' > add Patch Enter Event (PEE)\n'
f' > add Sensor Cross Event (SXE)\n'
f' > format\n')
if len(sys.argv) >= 3:
coolOff = int(sys.argv[2])
if len(sys.argv) >= 4:
if sys.argv[3] in ('0','False'):
cutOff = False
print(sys.argv[1])
main(sys.argv[1])