From 32ae5a703d814428440c3ac08fd5cf553eb2eb68 Mon Sep 17 00:00:00 2001
From: am0ebe <am0ebe@gmx.de>
Date: Thu, 11 Feb 2021 23:43:18 +0100
Subject: [PATCH] dataSync: filter out tags only consisting of digits. Assert:
 tag always contain at least 1 letter

---
 dataFilter.py   | 1 -
 dataSync.py     | 9 +++++----
 unknown-tags.sh | 3 ++-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/dataFilter.py b/dataFilter.py
index 634d8c3..f87b401 100755
--- a/dataFilter.py
+++ b/dataFilter.py
@@ -256,4 +256,3 @@ if __name__ == "__main__" :
 			cutOff = False
 
 	main(sys.argv[1])
-
diff --git a/dataSync.py b/dataSync.py
index 4121d5a..d7272ff 100755
--- a/dataSync.py
+++ b/dataSync.py
@@ -39,7 +39,7 @@ blox=[
 ("ecolux","./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), 		# Block1: 21.7 - 18.8
 ("ecolux","./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"),		# Block2: 15.9 - 13.10
 ("ecotrack","/opt/virtualBoxVMs/share/block-I/01-raw","/opt/virtualBoxVMs/share/block-I/02-inspect","2019-10-14 00:00:00","2019-10-16 23:59:00"),
-("ecolux","./test","./test","2020-09-14 00:00:00","2020-09-16 23:59:00"),
+("ecolux","./test","./test","2019-09-14 00:00:00","2021-09-16 23:59:00"),
 ## -- add here -- ##
 ]
 
@@ -238,6 +238,7 @@ class Data:
 
 			lines = f.readlines()
 			lines = self.clean(lines, T_MINLEN, T_MAXLEN, T_NCOLS, T_DELIM, self.getTrackTime)
+			lines = [l for l in lines if not l.split(DELIM)[T_COL_TAG].isdigit()]
 
 			if not lines:
 				pp("-> empty // non-usable")
@@ -306,12 +307,12 @@ class Data:
 		return lines
 
 	def clean(self, lines, minLen, maxLen, nCols, sep, timeFunc=None):
-		""" remove trailing newline, empty and comments and remove lines before startTime or after endTime"""
+		""" remove trailing newline, empty, comments and all-digits and remove lines before startTime or after endTime"""
 		lines = [l.strip() for l in lines]
 		lines = [l for l in lines if minLen <= len(l) <= maxLen] # ecolux: ms are variable in length (0-999), SchrebaTron: ms is padded with zeros
+		lines = list(filter(lambda q: q and q[0] != '#', lines))
 		lines = [l for l in lines if len(l.split(sep)) == nCols]
 		lines = [l.replace(sep,DELIM) for l in lines]
-		lines = list(filter(lambda q: q and q[0] != '#', lines))
 		if timeFunc:
 			lines = list(filter(lambda q: startTime <= timeFunc(q) < endTime, lines)) #only between start and endTime
 
@@ -342,7 +343,7 @@ class Data:
 
 	def merge_animals(self):
 		""" merge AnimalData into TrackData on TagID """
-		pp("merging animal data into track data. This might take a while. Go grab a Coffee or do some pushups!")
+		pp("merging animal data into track data. This might take a while. Time to move around. Or grab a Coffee.")
 
 		#TODO SPEED UP!!! 
 
diff --git a/unknown-tags.sh b/unknown-tags.sh
index 910bd5a..5cff709 100755
--- a/unknown-tags.sh
+++ b/unknown-tags.sh
@@ -14,7 +14,6 @@ if (( $# < 1 )); then
 	exit
 fi
 
-in=`ls ${scriptdir}/block-${1}/02-inspect/ecolux-data-*-filtered | head -1`
 # filename_suffix=`basename "$1" | cut -d'-' -f5-`
 # if [ "$filename_suffix" == "" ]; then
 # 	#before
@@ -25,6 +24,8 @@ in=`ls ${scriptdir}/block-${1}/02-inspect/ecolux-data-*-filtered | head -1`
 COL_TAG=12 
 HEADER_SIZE=1
 # fi
+
+in=`ls ${scriptdir}/block-${1}/02-inspect/ecolux-data-*-filtered | head -1`
 LEN_W_HEADER=`wc -l "$in" | cut -d" " -f1`
 LEN=$(( $LEN_W_HEADER - $HEADER_SIZE))
 
-- 
GitLab