Skip to content
Snippets Groups Projects
  • am0ebe's avatar
    4174320b
    !!!REWRITE HISTORY, SAVE SPACE FOR ECOTRACK, SEE FULL COMMIT MSG FOR DETAILS!!! · 4174320b
    am0ebe authored
    > only full raw data in ecotrack/all/01-raw
    > all other ecotrack/{I,II,III} only link to this
    -> should speed up git and reduce total size and redundancy
    
    > getDates.sh:
    	add earliest latest date for all units
    	write to file
    	add to dataSync
    
    -------------------------------
    All the squashed old commits below:
    -------------------------------
    
    commit baafb4ad (HEAD -> dev, tag: ecotrack-data, tag: ecolux-data, origin/dev, master)
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Thu Mar 18 17:57:32 2021 +0100
    
        recalc *ALL* data + ecotrackimprovement + ... > see below
    
        unknown-tags.sh: fix error message. always call both functions and write to files (tags_info and tags_unknowntags)
        update-gitignore.sh: change name
        dataSync:
          calc mean temperature for all 4 sensors
          filter temp below threshold (cur=12°C)
        datafilter:
          fix format bug where zeros where prepended to species col
          duplicate func make_rel_abs_path (in dataSync and dataFilter) to avoid import error due to circular dependency
    
    commit 84cafbb7
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Wed Mar 17 18:00:05 2021 +0100
    
        fix config. filter: infer proj from filename. add ectrack data I,II,III. start temp: calc mean for
    
    commit 68f4680e
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Tue Mar 16 15:28:27 2021 +0100
    
        continue conf.json and datasync/filter implementation. filter: derive proj for blox from header
    
    commit b419a53b
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Mon Mar 15 14:48:05 2021 +0100
    
        ignore .csv, add ecotrack data, parse conf.json > add blox and project configuration.
    
    commit 99322855
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Thu Mar 11 18:29:56 2021 +0100
    
        recalc *ALL*. adjust unknowntags script > create tagInfo and unknown-tags automatically from datasync
    
    commit a43a6e59
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Thu Mar 11 16:01:30 2021 +0100
    
        put blox-configuration into file. unknown-tags.sh takes dir as arg and is beong called properly from dataSync
    
    commit 75f647a9
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Thu Mar 11 14:22:05 2021 +0100
    
        add ecotrack data. rename data dirs
    
    commit ac955cb8
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Thu Mar 11 13:35:30 2021 +0100
    
        add ecotrack support | fix time bug > see below
    
        fix daylightsavingtimes bug:
        > set timedelta to hours not days
    
        ecotrack support:
        > 4 sensors per unit > merge on unit and habitat and time!
        > sensors log time once every 5 minutes. find closes trackdetection
    
        > calcHabitat() to determine what habitat depending on x,y and PROJ var
        > initTempData() get temp data and store in list of list of tuples
        -> tempData[unit][habitat] returns (time,temp)
        > mergeTemp
    4174320b
    History
    !!!REWRITE HISTORY, SAVE SPACE FOR ECOTRACK, SEE FULL COMMIT MSG FOR DETAILS!!!
    am0ebe authored
    > only full raw data in ecotrack/all/01-raw
    > all other ecotrack/{I,II,III} only link to this
    -> should speed up git and reduce total size and redundancy
    
    > getDates.sh:
    	add earliest latest date for all units
    	write to file
    	add to dataSync
    
    -------------------------------
    All the squashed old commits below:
    -------------------------------
    
    commit baafb4ad (HEAD -> dev, tag: ecotrack-data, tag: ecolux-data, origin/dev, master)
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Thu Mar 18 17:57:32 2021 +0100
    
        recalc *ALL* data + ecotrackimprovement + ... > see below
    
        unknown-tags.sh: fix error message. always call both functions and write to files (tags_info and tags_unknowntags)
        update-gitignore.sh: change name
        dataSync:
          calc mean temperature for all 4 sensors
          filter temp below threshold (cur=12°C)
        datafilter:
          fix format bug where zeros where prepended to species col
          duplicate func make_rel_abs_path (in dataSync and dataFilter) to avoid import error due to circular dependency
    
    commit 84cafbb7
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Wed Mar 17 18:00:05 2021 +0100
    
        fix config. filter: infer proj from filename. add ectrack data I,II,III. start temp: calc mean for
    
    commit 68f4680e
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Tue Mar 16 15:28:27 2021 +0100
    
        continue conf.json and datasync/filter implementation. filter: derive proj for blox from header
    
    commit b419a53b
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Mon Mar 15 14:48:05 2021 +0100
    
        ignore .csv, add ecotrack data, parse conf.json > add blox and project configuration.
    
    commit 99322855
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Thu Mar 11 18:29:56 2021 +0100
    
        recalc *ALL*. adjust unknowntags script > create tagInfo and unknown-tags automatically from datasync
    
    commit a43a6e59
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Thu Mar 11 16:01:30 2021 +0100
    
        put blox-configuration into file. unknown-tags.sh takes dir as arg and is beong called properly from dataSync
    
    commit 75f647a9
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Thu Mar 11 14:22:05 2021 +0100
    
        add ecotrack data. rename data dirs
    
    commit ac955cb8
    Author: am0ebe <am0ebe@gmx.de>
    Date:   Thu Mar 11 13:35:30 2021 +0100
    
        add ecotrack support | fix time bug > see below
    
        fix daylightsavingtimes bug:
        > set timedelta to hours not days
    
        ecotrack support:
        > 4 sensors per unit > merge on unit and habitat and time!
        > sensors log time once every 5 minutes. find closes trackdetection
    
        > calcHabitat() to determine what habitat depending on x,y and PROJ var
        > initTempData() get temp data and store in list of list of tuples
        -> tempData[unit][habitat] returns (time,temp)
        > mergeTemp
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
dataFilter.py 5.86 KiB
#!/usr/bin/python3
#
# Todo:
#	> (opt) use global vars for Col-indices. each function that changes 'em updates 'em
#

import sys, os, json
from datetime import datetime as dt

cutOff = True
ncut = 0
coolOff = 5

DELIM=","
IN_FMT="%Y-%m-%d %H:%M:%S"
OUT_FMT="%d-%m-%Y %H:%M:%S"
confFileName="./conf.json"

def squash(data):

	print("squash plateaus")

	col_timestamp=0
	col_habitat=4
	col_x=6
	col_y=7
	col_tag=8

	nfast=0
	recent = []
	filtered = []

	omit_col_begin=4 #omit ts,ms,date,time

	for line in data:
		if line[0] == '#' or line[0] == '\n':
			if line.startswith("#timestamp"):
				global header
				header="begin, end, dur, last-detect, block, SXE, PEE, PXE," + DELIM.join(line.split(',')[omit_col_begin:])
			continue

		l = line.split(DELIM)
		timestamp = int(l[col_timestamp])
		tag = l[ col_tag ]
		x = l[ col_x ]
		y = l[ col_y ]

		tagFound = False
		i=0
		while i < len(recent):

			samePos=(recent[i][5] == x and recent[i][6] == y)
			recentBeetle=recent[i][7] == tag
			fastBeetle = recentBeetle and not samePos
			ago = timestamp - recent[i][1]

			# print(i,":",recent[i])
			# print(f"tag: {tag}| timestamp: {timestamp}| ago:{ago}| coolOff:{coolOff}| fastBeetle:{fastBeetle}| {x},{y}:{recent[i][5]},{recent[i][6]}|recentBeetle:{recentBeetle}")
			if ago > coolOff or fastBeetle:
				if fastBeetle:
					nfast += 1

				## add new squashed / trackevent
				filtered.append(DELIM.join(map(str,recent[i])))

				del recent[i]	# del old
				continue		# dont incr idx!

			if recentBeetle:
				
				## update
				tagFound = True
				dur =  timestamp-recent[i][0]

				recent[i][1] = timestamp
				recent[i][2] = dur

			i +=1

		if not tagFound:

			## add recent
			recent.append([timestamp,timestamp,0]+l[omit_col_begin:])

	for r in recent:

		## cutOff
		if cutOff:
			global ncut
			ncut += r[2]
			r[1] = r[0]
			r[2] = 0

		filtered.append(DELIM.join(map(str,r)))

	print(f"	cutOff: {ncut}")
	print(f"	nfast: {nfast}")
	return filtered

def add_cols( data ):
	print("add cols")

	#after insertions
	col_start = 0
	col_end = 1
	col_lastdetect = 3
	col_block = 4
	col_sxe	= 5
	col_pee	= 6
	col_pxe = 7
	col_habitat	= 9
	col_x = 10
	col_y = 11

	known = {} #{tag: line}
	npee = 0
	nsxe = 0
	npxe = 0

	for i,line in enumerate(data):

		l = line.split(DELIM)
		tag = l[7]

		##block
		block = "NA"
		now = dt.fromtimestamp(int( l[0] ))
		for b in blox:
			if b[1] <= now < b[2]:
				block=b[0]
				continue

		if tag not in known:
			lastdetect = "NA"
			sxe = False
			pee = False
			pxe = False

		else:
			##lastdetect
			last_end = int(known[tag][col_end])
			this_start = int(l[col_start])
			lastdetect = this_start - last_end

			##sxe
			last_pos = known[tag][col_x], known[tag][col_y]
			this_pos = l[5], l[6]
			sxe = last_pos != this_pos
			if sxe:
				nsxe+=1

			##pee
			last_habitat = known[tag][col_habitat]
			this_habitat = l[4]
			pee = last_habitat != this_habitat and this_habitat != '0'
			if pee:
				npee+=1

			##pxe
			pxe = pee and last_habitat != '0'
			if pxe:
				npxe +=1

		l.insert(col_lastdetect,lastdetect)
		l.insert(col_block, block)
		l.insert(col_sxe, sxe)
		l.insert(col_pee, pee)
		l.insert(col_pxe, pxe)

		known[tag] = l
		data[i] = DELIM.join(map(str,l))

	print(f"	nsxe: {nsxe}")
	print(f"	npee: {npee}")
	print(f"	npxe: {npxe}")

	return data


def format(data):
	"sort and human-readable times"
	print("format")

	#sort by end-time
	data.sort(key=lambda l: l.split(DELIM)[1])

	for i,d in enumerate(data):
		d=d.split(DELIM)
		d[0]= dt.fromtimestamp(int( d[0] )).strftime(OUT_FMT) #convert timestamp to OUT_FMT
		d[1]= dt.fromtimestamp(int( d[1] )).strftime(OUT_FMT)
		d[2]=d[2].zfill(4)	# dur
		if d[3] != "NA" :
			d[3]=d[3].zfill(5)	# lastdetect
		d[7]=d[7].zfill(1)		# unit
		data[i]=DELIM.join(map(str,d))

	return data

def write(outFile, data):

	outFile += f"-cut-{ncut}-filtered"

	print(f"writing to {outFile}")
	with open(outFile, "w") as f:
		f.writelines(header)
		f.writelines(data)

		print("compress")
		cmd_compress=f"tar -zcvf {outFile}.tgz {outFile}" # ">/dev/null 2>/dev/null"
		os.system(cmd_compress)

def make_rel_abs_path(path):
	#! the same method exists in dataFilter and dataSync. Don't forget to update em both if making changes!

	global rootPath
	rootPath = os.path.abspath( os.path.dirname( __file__ ) ) #uses cwd(), which is set to scriptdir at start

	path = os.path.expanduser(path)
	if not os.path.isabs(path):
		path = os.path.abspath(rootPath+os.sep+path)
	return os.path.normpath(path)

def readConf(PROJ):
	""" -.- """
	global blox
	confFile = make_rel_abs_path(confFileName)

	with open(confFile, 'r') as f:
		blox = json.load(f)["blox"][PROJ]
	
	for b in blox:
		b[1]=dt.strptime(f"{b[1]} 00:00:00",IN_FMT)
		b[2]=dt.strptime(f"{b[2]} 00:00:00",IN_FMT)

def main(filename):

	proj = filename.split(os.sep)[-1].split("-")[0]
	readConf(proj)

	with open(filename) as f:
		data = f.readlines()

	data = squash( data )
	data = add_cols( data )
	data = format( data )

	write(filename, data)

if __name__ == "__main__":

	if len(sys.argv) == 1:
		this = os.path.basename(__file__)
		
		sys.exit(	f'Usage:	{this} dataFile coolOff cutOff\n\n'
					f'	another detection for a given tag and position\n'
					f'		* WITHIN coolOff seconds will be added to the last plateau\n'
					f'		* AFTER coolOff seconds will be used for a new plateau\n'
					f'		default: {coolOff}\n'
					f''
					f'	cutOff plateau which goes all the way to the end.\n'
					f'		eg beetle / lost tag on sensor\n'
					f'		default: {cutOff}\n'
					f''
					f'	> squash plateaus\n'
					f'	> cutOff tags from end [optional]\n'
					f'	> add block according to time\n'
					f'	> add time-since-last-detection\n'
					f'	> add Patch Cross Event (PXE)\n'
					f'	> add Patch Enter Event (PEE)\n'
					f'	> add Sensor Cross Event (SXE)\n'
					f'	> format\n')

	if len(sys.argv) >= 3:
		coolOff = int(sys.argv[2])

	if len(sys.argv) >= 4:
		if sys.argv[3] in ('0','False'):
			cutOff = False

	print(sys.argv[1])
	main(sys.argv[1])