From fdfc786ec849bee5caee1577b7f2973212c2d5dc Mon Sep 17 00:00:00 2001
From: am0ebe <am0ebe@gmx.de>
Date: Wed, 27 Jan 2021 18:25:03 +0100
Subject: [PATCH]  fix paths. normalize,expand home,convert to absolute if
 relative,get correct scriptdir + [WIP] rewrite dataFilter.py

---
 .gitignore    |   2 +-
 dataFilter.py | 114 ++++++++++++++++++++++++++++----------------------
 dataSync.py   |  20 ++++-----
 3 files changed, 74 insertions(+), 62 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9b1b3ae..0f6f294 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,5 +4,5 @@ block-I/02-inspect/ecotron-data-20200721-20200818
 *.txt
 test/*
 *#
-*skyglow.log
+skyglow.log
 __pycache__
diff --git a/dataFilter.py b/dataFilter.py
index 39a1cb2..93c20c2 100755
--- a/dataFilter.py
+++ b/dataFilter.py
@@ -1,6 +1,14 @@
 #!/usr/bin/python3
 
 #todo 
+#
+#	define trackevent: start,end,dur (end = no trackevent in start+coolOff)
+#	
+#	time since last detection
+
+#	sensor-crossing. SxE
+#	> patch-crossing? PxE
+#
 #	dont cutOff first trackEvent from plateau to end
 #
 #	print more info: 
@@ -10,88 +18,94 @@
 #
 #	def coolOff() and def cutOff()
 #	think about plateaus? eg. when a beetle chills on sensor for a day? (no patchcrossevent - pce)
-
+#
 import sys, os
 
 LEN_TIMESTAMP=len("1602626336")
 COL_TIMESTAMP=0
 COL_TAG=8
 
-def main(arg1,arg2=10):
-	global inFile, coolOff, HEADER
-	inFile = arg1
-	coolOff = arg2
+def cutOff(data):
+	print("cutOff from End")
+	# print(recently)
+	# for ts, tag in recently:
+	# 	print(f"cutoff tag {tag} ... ",end='')
+		
+	# 	i = len(filtered)-1
+	# 	ndel=0
+	# 	while i:
+	# 		line=filtered[i]
+	# 		timestamp=int(line[:LEN_TIMESTAMP])
+	# 		if timestamp - ts <= -coolOff:
+	# 			#assert no more deadbeetle before ts...
+	# 			break
+	# 		if tag == line.split(',')[COL_TAG]:
+	# 				del filtered[i]
+	# 				ndel += 1
+	# 				ts = timestamp - coolOff
+
+	# 		i -= 1
+
+	# 	print(f"done. Deleted {ndel} lines up to ts: {ts} idx: {i}")
+	return data
+
 
-	print(f"start filtering data with {coolOff} second coolOff")
 
-	with open(inFile) as f:
-		lines = f.readlines()
+def squash(data):
+	print("squash plateaus")
 
-	#filter coolOff
+	coolOff = 3 #also try 2
+	begin = end = dur = 0
 	filtered, recently = [], []
-	for line in lines:
+	for line in data:
 		if line[0] == '#':
 			if line.startswith("#timestamp"):
 				HEADER = line
+				#prepend to data
 			continue
 		l = line.split(',')
 
 		timestamp = int(l[COL_TIMESTAMP])
 		tag = l[ COL_TAG ]
 
-		#delete old entries from beginning of list
-		if len(recently) and timestamp - recently[0][0] >= coolOff:
-			del recently[0]
+		# count as track if tag hasnt been detected for coolOff seconds
+		if len(recently):
+			while timestamp - recently[0][0] >= coolOff:
+				end = recently[0]
+				del recently[0]
 
-		#add tag if its not in list
-		if all(tag != t2 for t1,t2 in recently):
+		# add tag if its not in list
+		if all(tag != recent_tag for recent_ts,recent_tag in recently):
 			recently.append((timestamp,tag))
 			filtered.append(line)
 
-	#cutOff from End
-	print(recently)
-	for ts, tag in recently:
-		print(f"cutoff tag {tag} ... ",end='')
-		
-		i = len(filtered)-1
-		ndel=0
-		while i:
-			line=filtered[i]
-			timestamp=int(line[:LEN_TIMESTAMP])
-			if timestamp - ts <= -coolOff:
-				#assert no more deadbeetle before ts...
-				break
-
-			if tag == line.split(',')[COL_TAG]:
-					del filtered[i]
-					ndel += 1
-					ts = timestamp - coolOff
-
-			i -= 1
-
-		print(f"done. Deleted {ndel} lines up to ts: {ts} idx: {i}")
-
-	# [print(f, end='') for f in filtered]
-	write(filtered)
+	return data
 
-def write(data):
-	outFile=f"{inFile}-cutOff-coolOff-{coolOff}"
+def write(outFile, data):
 	print(f"writing to {outFile}")
 	with open(outFile, "w") as f:
-		f.writelines(HEADER)
 		f.writelines(data)
 
 	cmd_compress=f"tar -zcvf {outFile}.tgz {outFile}" # ">/dev/null 2>/dev/null"
 	os.system(cmd_compress)
 
+def main(filename):
+	with open(filename) as f:
+		data = f.readlines()
+
+	data = squash( data )
+	data = cutOff( data )
+
+	# write(filename + "-filtered", data)
+
 if __name__ == "__main__" :
+	main("ecolux-data-20200915-20200916")
+	exit()
 	if len(sys.argv) == 1:
 		this = os.path.basename(__file__)
 		
-		sys.exit(f'   "Usage:	{this} dataFile [coolOff]\n\n'
-			f' Cooloff defines the time in seconds that needs to pass before a tag can be detected again. Default = 10\n'
-			f' Will also cut off tags at end. (eg tag lying on sensor)')
-	
-	main(sys.argv[1:])
-# else:
-# 	print("called as a module via import")
+		sys.exit(	f'Usage:	{this} dataFile\n\n'
+					f'	Squash plateaus\n'
+					f'	Cut off tags at end. (eg tag lying on sensor)')
+
+	main(sys.argv[1])
diff --git a/dataSync.py b/dataSync.py
index e4f8188..3b1fdc6 100755
--- a/dataSync.py
+++ b/dataSync.py
@@ -42,16 +42,16 @@ import dataFilter
 blox=[
 ("ecolux","./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), 		# Block1: 21.7 - 18.8
 ("ecolux","./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"),		# Block2: 15.9 - 13.10
-("foo","~/block-I/01-raw/../..","~/block-I/02-inspect/","2020-07-21 00:00:00","2020-08-18 23:59:00"),
 ("ecotrack","/opt/virtualBoxVMs/share/block-I/01-raw","/opt/virtualBoxVMs/share/block-I/02-inspect","2019-10-14 00:00:00","2019-10-16 23:59:00"),
+("ecolux","./test","./test","2020-09-15 00:00:00","2020-09-16 23:59:00"),
 ## -- add here -- ##
 ]
 
 TIME_FMT='%Y-%m-%d %H:%M:00'
 noTime = dt.fromtimestamp(0)
 TAG_LEN=len("04B94A7F7288588022")
-SCRIPTPATH=os.path.dirname(os.path.abspath(__file__))+os.sep
 RULER=f"{'#'*100}\n"
+rootPath = os.path.abspath( os.path.dirname( __file__ ) ) #uses cwd(), which is set to scriptdir at start
 
 def initFileStructure():
 
@@ -109,7 +109,7 @@ def initFileStructure():
 		A_COL_SPECIES=1
 		A_COL_SPECIES_IND=0
 		A_COL_WEIGHT_WO_TAG_MG=2
-		A_COL_TAG=5w
+		A_COL_TAG=5
 
 
 	# out file
@@ -446,7 +446,6 @@ class Data:
 		return h
 
 	def write(self):
-		# os.makedirs(os.path.dirname(OUTPUTDIR), exist_ok=True)
 		os.makedirs(OUTPUTDIR, exist_ok=True)
 		os.chdir(OUTPUTDIR)
 		pp("write to " + OUT_FILE)
@@ -459,7 +458,6 @@ class Data:
 		os.system(cmd_compress)
 
 def xtract():
-	# os.chdir(INPUTDIR)
 	for root, dirs, files in os.walk(INPUTDIR): #walk recursively
 		for file in files:
 			if file.endswith(".tgz") or file.endswith(".tar.gz"):
@@ -494,11 +492,12 @@ def getFileList():
 
 	return animalFile, skyglowFile, trackFiles
 
+
 def make_rel_abs_path(path):
-	path = os.path.normpath(path)
 	path = os.path.expanduser(path)
-	path = os.path.abspath(path)
-	return path
+	if not os.path.isabs(path):
+		path = os.path.abspath(rootPath+os.sep+path)
+	return os.path.normpath(path)
 
 def main():
 
@@ -513,8 +512,7 @@ def main():
 	dataFilter.main(OUT_FILE)
 
 	if( platform.system() == "Linux" ):
-		cmd=SCRIPTPATH+"updateGitIgnore.sh"
-		os.system(cmd)
+		os.system(rootPath+"/updateGitIgnore.sh")
 
 if __name__ == "__main__" :
 	known_projects=["ecotrack","ecolux","schrebatron"]
@@ -541,8 +539,8 @@ if __name__ == "__main__" :
 		print("You can add a configuration by adding a line to the 'blox' structure. It has following form:\n")
 		print("			(Project, inDir, outDir, start, end)\n" )
 		print(f"> Project must be in {known_projects}" )
-		print(f"> times must have Format like '{dt.now().strftime(TIME_FMT)}'" )
 		print(f"> dirs can be relative. eg: ('./data','../data','~/data')")
+		print(f"> times must have Format like '{dt.now().strftime(TIME_FMT)}'" )
 		exit()
 
 	start = time.time()
-- 
GitLab