From a4fdc2fd3457056cb8dd4a21bc4c577ba187b9b5 Mon Sep 17 00:00:00 2001
From: am0ebe <am0ebe@gmx.de>
Date: Wed, 20 Jan 2021 12:31:20 +0100
Subject: [PATCH]  [WIP] dataSync: adapt for usage with ecoTrack project
 (2019). updateGitignore.sh: use paths relative to projDir for compatibility

---
 .gitignore         |   6 +-
 dataSync.py        | 198 +++++++++++++++++++++++++++------------------
 updateGitIgnore.sh |  19 +++--
 3 files changed, 132 insertions(+), 91 deletions(-)

diff --git a/.gitignore b/.gitignore
index 840a972..9b1b3ae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,6 @@
-/home/sugu/code/idiv/ecotrondata/block-II/02-inspect/ecotron-data-20200915-20201013
-/home/sugu/code/idiv/ecotrondata/block-II/01-raw/backup-ecoTron-2020-10-28-06-00-1603861202.tgz
-/home/sugu/code/idiv/ecotrondata/block-I/02-inspect/ecotron-data-20200721-20200818
+block-II/02-inspect/ecotron-data-20200915-20201013bla
+block-II/01-raw/backup-ecoTron-2020-10-28-06-00-1603861202.tgz
+block-I/02-inspect/ecotron-data-20200721-20200818
 *.txt
 test/*
 *#
diff --git a/dataSync.py b/dataSync.py
index 63c6624..e2e3b45 100755
--- a/dataSync.py
+++ b/dataSync.py
@@ -36,57 +36,89 @@ from pprint import pprint as pp
 from datetime import datetime as dt
 import dataFilter
 
-PROJ = "ecotron"
-# PROJ = "schrebatron"
-
 # Experiment Config! Time and paths.
-# Track-start: 7.7.20 // Light start: 20.7.20 // Dark: 10.7.20 - 20.7.20
+#####################################
+# ecotron -- Track-start: 7.7.20 // Light start: 20.7.20 // Dark: 10.7.20 - 20.7.20
+# Project, in, out, start, end
 blox=[
-("./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), 	# Block1: 21.7 - 18.8
-("./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"),	# Block2: 15.9 - 13.10
+("ecotron","./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), 		# Block1: 21.7 - 18.8
+("ecotron","./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"),	# Block2: 15.9 - 13.10
+("ecotrack","../camtrondata/test/in","../camtrondata/test/out","2019-06-28 00:00:00","2019-10-16 23:59:00"),
+# ("test","./test","./test","2020-07-21 00:00:00","2020-08-18 23:59:00"),
 ]
 
 TIME_FMT='%Y-%m-%d %H:%M:00'
 noTime = dt.fromtimestamp(0)
 TAG_LEN=len("04B94A7F7288588022")
+SCRIPTPATH=os.path.dirname(os.path.abspath(__file__))+os.sep
+
+def initFileStructure():
+
+	# trackfile
+	##########
+	global T_DELIM, TIMESTAMP_LEN, T_COL_MS, T_COL_X, T_COL_Y, T_COL_TAG, T_MINLEN, T_MAXLEN, T_NCOLS
+	T_DELIM=';'
+	TIMESTAMP_LEN=10
+	#t_col_timestamp=0
+	T_COL_MS=1
+	T_COL_X=2
+	T_COL_Y=3
+	T_COL_TAG=4
+	#t_col_signalstrength=5
+	if PROJ == "ecotron":
+		T_MINLEN = 38 # without newline.
+		T_MAXLEN = 40 # ms column varies 1-3. Will be padded later for uniform len
+		T_NCOLS = 6
+	elif PROJ == "ecotrack": #ecotron temperature project 2019
+		T_MINLEN = 36
+		T_MAXLEN = 38
+		T_NCOLS = 5
+
+	# lightfile
+	##########
+	if hasLight():
+		global L_DELIM, L_COL_MOON_REAL, L_COL_MOON_DMX, L_COL_SKYGLOW, L_MINLEN, L_MAXLEN, L_NCOLS
+		L_DELIM='\t'
+		L_COL_MOON_REAL=1
+		L_COL_MOON_DMX=2
+		L_COL_SKYGLOW=3
+		L_MINLEN = 61 #2020-07-21 15:15:00 	0.0	0	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+		L_MAXLEN = 76 #2020-07-21 15:09:00 	0.0	0	[15, 54, 4, 19, 0, 1, 5, 0, 40, 12, 123, 123]
+		L_NCOLS = 4
+	
+	# animalfile
+	###########
+	global A_DELIM, A_MINLEN, A_MAXLEN, A_NCOLS, A_COL_SPECIES, A_COL_SPECIES_IND, A_COL_WEIGHT_WO_TAG_MG, A_COL_TAG
+	if PROJ == "ecotron":
+		A_DELIM='\t'
+		A_MINLEN = 25
+		A_MAXLEN = 30
+		A_NCOLS = 4
+		A_COL_SPECIES=0
+		A_COL_SPECIES_IND=1
+		A_COL_WEIGHT_WO_TAG_MG=2
+		A_COL_TAG=3
+	elif PROJ == "ecotrack":
+		A_DELIM=';'
+		A_MINLEN = 39
+		A_MAXLEN = 44
+		# A_MINLEN = 35
+		# A_MAXLEN = 49
+		A_NCOLS = 7 #/w comments
+		A_COL_SPECIES=1
+		A_COL_SPECIES_IND=0
+		A_COL_WEIGHT_WO_TAG_MG=2
+		A_COL_TAG=5
+
+
+	# out file
+	###########
+	global OUT_FMT, DELIM, COL_UID, COL_TAG
+	OUT_FMT='%Y%m%d'
+	DELIM=','
+	COL_UID=4
+	COL_TAG=8
 
-# trackfile
-T_DELIM=';'
-TIMESTAMP_LEN=10
-#t_col_timestamp=0
-T_COL_MS=1
-T_COL_X=2
-T_COL_Y=3
-T_COL_TAG=4
-#t_col_signalstrength=5
-T_MINLEN = 38 # without newline.
-T_MAXLEN = 40 # ms column varies 1-3. Will be padded later for uniform len
-T_NCOLS = 6
-
-# lightfile
-L_DELIM='\t'
-L_COL_MOON_REAL=1
-L_COL_MOON_DMX=2
-L_COL_SKYGLOW=3
-L_MINLEN = 61 #2020-07-21 15:15:00 	0.0	0	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-L_MAXLEN = 76 #2020-07-21 15:09:00 	0.0	0	[15, 54, 4, 19, 0, 1, 5, 0, 40, 12, 123, 123]
-L_NCOLS = 4
-
-# animalfile
-A_DELIM='\t'
-A_MINLEN = 25 #guess
-A_MAXLEN = 30
-A_NCOLS = 4
-A_COL_SPECIES=0
-A_COL_SPECIES_IND=1
-A_COL_WEIGHT_WO_TAG_MG=2
-A_COL_TAG=3
-
-# out file
-OUT_FMT='%Y%m%d'
-DELIM=','
-COL_UID=4
-COL_TAG=8
 
 #mlx (idx is also dmx-byte value)
 moonMap = [ 1.409,
@@ -169,7 +201,7 @@ skyglowDict = {
 }
 
 def hasLight():
-	return PROJ.lower() == "ecotron"
+	return PROJ == "ecotron"
 
 def getUID(fileName):
 	""" extract and return unit-id from fileName """
@@ -182,19 +214,17 @@ class Data:
 
 	def __init__(self,animalFile, skyglowFile, trackFiles):
 
+		self.animalData = self.getAnimalData(animalFile)
+
 		self.trackData = []
 		for file in trackFiles:
 			self.trackData.extend( self.getTrackData(file) )
 
 		self.trackData.sort()
 
-		#cut off at end
-
 		self.startTrack = self.getTrackTime( self.trackData[0] )
 		self.endTrack = self.getTrackTime( self.trackData[-1] )
 
-		self.animalData = self.getAnimalData(animalFile)
-
 		if hasLight():
 			self.lightData = self.getLightData(skyglowFile)
 			self.startLight = self.getLightTime( self.lightData[0] )
@@ -206,7 +236,7 @@ class Data:
 		pp(self.header())
 
 	def getTrackData(self, fileName):
-		""" get lines from tracklog, clean and add new columns"""
+		""" get lines from all tracklogs, clean, merge and add/rm columns"""
 
 		pp(f"processing: {fileName}")
 		with open(INPUTDIR+'/'+fileName) as f:
@@ -222,7 +252,7 @@ class Data:
 		idx_after_ms = TIMESTAMP_LEN + len('999') + len(DELIM)
 		for idx, line in enumerate(lines):
 
-			if PROJ.upper() == "ECOTRON":
+			if PROJ in ["ecotron","ecotrack"]:
 				# pad ms column with zeros to fit three digits
 				cols=line.split(DELIM)
 				ms_digits=len(cols[T_COL_MS])
@@ -240,8 +270,8 @@ class Data:
 			# add columns (date,time,uid,isHabitat)
 			line = line[:idx_after_ms] + DELIM + date + DELIM + time + DELIM + str(uid) + DELIM + str(isHabitat) + line[idx_after_ms:]
 
-			# remove signalstrength column
-			line = line[:line.rindex(DELIM)]
+			if PROJ == "ecotron":
+				line = line[:line.rindex(DELIM)] # remove signalstrength column
 
 			lines[idx] = line
 
@@ -264,11 +294,10 @@ class Data:
 		with open(file) as f:
 			lines = f.readlines()
 
-		lines = self.clean(lines, A_MINLEN, A_MAXLEN, A_NCOLS, A_DELIM)
-		
+		lines = self.clean(lines, A_MINLEN, A_MAXLEN, A_NCOLS, A_DELIM)		
 		#TODO: check for duplicate tags and print warning / ignore?
-		assert (lines), "skyglow: Not valid. Check times / formatting."
-
+		
+		assert (lines), "tags: Not valid. Check times / formatting."
 		return lines
 
 	def clean(self, lines, minLen, maxLen, nCols, sep, timeFunc=None):
@@ -313,12 +342,10 @@ class Data:
 		#TODO SPEED UP!!! 
 
 		# ttags_not_found=[]
-		dflt=f'{DELIM}NA{DELIM}NA{DELIM}NA'
 		notFound=0
 		for idx, tLine in enumerate(self.trackData):
 			found=False
 			ttag = tLine.split(DELIM)[COL_TAG]
-			
 			for aLine in self.animalData:
 				aLine = aLine.split(DELIM)
 				atag = aLine[A_COL_TAG]
@@ -330,7 +357,7 @@ class Data:
 					break;
 
 			if not found:
-				self.trackData[idx] += dflt
+				self.trackData[idx] += f'{DELIM}NA{DELIM}NA{DELIM}NA'
 				notFound+=1
 				# ttags_not_found.append(ttag)
 
@@ -402,7 +429,7 @@ class Data:
 		return dt.fromtimestamp(timestamp)
 
 	def header(self):
-		h= f"# {PROJ.upper()} TRACK DATA\n" \
+		h= f"# {PROJ} data\n" \
 		f"#################################################################################################\n" \
 		f"#     len(Track): {len(self.trackData)}\n" \
 		f"#     startTrack: {self.startTrack}\n" \
@@ -413,7 +440,7 @@ class Data:
 					f"#     startLight: {self.startLight}\n" \
 					f"#       endLight: {self.endLight}\n"
 		h+= f"#################################################################################################\n" \
-			f"#timestamp, milliseconds, date, time, unit, x, y, tagID, species, speciesnumber, weight_without_tag[mg]{', moon_real[mLux], moon_eco[mLux], skyglow[Lux]' if hasLight() else ''}\n" \
+			f"#timestamp, ms, date, time, unit, habitat, x, y, tag, species, speciesnumber, weight_without_tag[mg]{', moon_real[mLux], moon_eco[mLux], skyglow[Lux]' if hasLight() else ''}\n" \
 			f"#################################################################################################\n"
 		return  h
 
@@ -432,7 +459,7 @@ class Data:
 def xtract():
 	for root, dirs, files in os.walk(INPUTDIR): #walk recursively
 		for file in files:
-			if file.endswith(".tgz"):
+			if file.endswith(".tgz") or file.endswith(".tar.gz"):
 				file=INPUTDIR + os.sep + file
 				print(f"xtracting {file}\n")
 				cmd_xtract=f"tar -zxvf {file} --directory {INPUTDIR}"
@@ -442,10 +469,10 @@ def xtract():
 def getFileList():
 
 	animalFile, skyglowFile, trackFiles = '','',[]
-	print( "Looking for 'skyglow.log', 'tags' or files containing 'unit-X'" )
+	print( "Looking for 'skyglow.log', 'tags' and files containing 'unit-X'" )
 	for root, dirs, files in os.walk(INPUTDIR): #walk recursively
 		for file in files:
-			if "unit" in file:
+			if "unit" in file and not file.endswith(".tgz") and not file.endswith(".tar.gz"):
 				trackFiles.append(file)
 			elif file == "tags":
 				animalFile = INPUTDIR+'/'+file
@@ -459,25 +486,27 @@ def getFileList():
 	if not trackFiles:
 		sys.exit("No tracking data found. (filename containing unit-X, where X is a number)")
 	if hasLight() and not skyglowFile:
-		sys.exit(f"Couldn't find {skyglowFile}")
+		sys.exit(f"Couldn't find 'skyglow.log'")
 	if not animalFile:
-		sys.exit(f"Couldn't find {animalFile}")
+		sys.exit(f"Couldn't find 'tags'")
 
 	return animalFile, skyglowFile, trackFiles
 
 def make_rel_abs_path(path):
 	if path.startswith('../'):
-		path= scriptPath()+path[3:]
+		path = os.path.abspath(SCRIPTPATH+path)
 	elif path.startswith('./'):
-		path= scriptPath()+path[2:]
-	return path
+		path = SCRIPTPATH+path[2:]
+	elif path.startswith("~/"):
+		path = os.path.expanduser("~")+path[1:]
 
-def scriptPath():
-	return os.path.dirname(os.path.abspath(__file__))+os.sep
+	return path
 
 def main():
 
+	initFileStructure()
 	xtract()
+	
 	animalFile, skyglowFile, trackFiles = getFileList()
 	data = Data(animalFile,skyglowFile,trackFiles)
 	data.merge()
@@ -486,25 +515,34 @@ def main():
 	dataFilter.main(OUT_FILE)
 
 	print("update git to ignore big files (>50MB)")
-	cmd=scriptPath()+"updateGitIgnore.sh"
+	cmd=SCRIPTPATH+"updateGitIgnore.sh"
 	os.system(cmd)
 
 if __name__ == "__main__" :
-	print("	Process raw data for which block?")
+	print(f"	Process raw data for which block?\n {50*'#'}\n")
 	for idx, b in enumerate(blox):
-		print(f"    {idx+1}) {b[2]} >> {b[3]}")
+		print(f"    {idx+1}) Project {b[0]}")
+		print(f"        from: {b[3]}")
+		print(f"          to: {b[4]}")
+		print(f"          in: {b[1]}")
+		print(f"         out: {b[2]}\n")
 	try:
 		n=int(input())-1
-		INPUTDIR = make_rel_abs_path( blox[n][0] )
-		OUTPUTDIR = make_rel_abs_path( blox[n][1] )
-		startTime = dt.strptime(blox[n][2], TIME_FMT)
-		endTime = dt.strptime(blox[n][3], TIME_FMT)
+		PROJ = blox[n][0].lower()
+		INPUTDIR = make_rel_abs_path( blox[n][1] )
+		OUTPUTDIR = make_rel_abs_path( blox[n][2] )
+		startTime = dt.strptime(blox[n][3], TIME_FMT)
+		endTime = dt.strptime(blox[n][4], TIME_FMT)
+
+		known_projects=["ecotrack","ecotron","schrebatron"]
+		if PROJ not in known_projects:
+			print(f"Unknown Project '{PROJ}'. Choose one of {known_projects}")
+			exit()
+
 	except:
 		print("Insert Block Number..")
 		exit()
 
-	print(" inputdir: ", INPUTDIR)
-	print("outputdir: ", OUTPUTDIR)
 	start = time.time()
 	main()
 	end = time.time()
diff --git a/updateGitIgnore.sh b/updateGitIgnore.sh
index ccdc13a..d46bea8 100755
--- a/updateGitIgnore.sh
+++ b/updateGitIgnore.sh
@@ -1,12 +1,6 @@
 #!/bin/bash
 
-dir=`dirname "$(realpath $0)"`
-ignorefile="${dir}/.gitignore"
-
-#ignore .txt files in find, since they are ignored anyway
-find "${dir}" -size +50M ! -name '*.txt' | sed 's/\.\///g' > "${ignorefile}"
-
-list=(
+ignore=(
 "*.txt"
 "test/*"
 "*#" #ods-tmp-files
@@ -14,7 +8,16 @@ list=(
 __pycache__
 )
 
-for i in "${list[@]}"
+projdir=`dirname "$(realpath $0)"`
+ignorefile="${projdir}/.gitignore"
+cd "${projdir}"
+
+#ignore .txt files in find, since they are ignored anyway
+find . -size +50M ! -name '*.txt' | sed 's/\.\///g' > "${ignorefile}"
+
+for i in "${ignore[@]}"
 do
 	echo "$i" >> "${ignorefile}"
 done
+
+#cd -
\ No newline at end of file
-- 
GitLab