From 32ae5a703d814428440c3ac08fd5cf553eb2eb68 Mon Sep 17 00:00:00 2001 From: am0ebe <am0ebe@gmx.de> Date: Thu, 11 Feb 2021 23:43:18 +0100 Subject: [PATCH] dataSync: filter out tags only consisting of digits. Assert: tag always contain at least 1 letter --- dataFilter.py | 1 - dataSync.py | 9 +++++---- unknown-tags.sh | 3 ++- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dataFilter.py b/dataFilter.py index 634d8c3..f87b401 100755 --- a/dataFilter.py +++ b/dataFilter.py @@ -256,4 +256,3 @@ if __name__ == "__main__" : cutOff = False main(sys.argv[1]) - diff --git a/dataSync.py b/dataSync.py index 4121d5a..d7272ff 100755 --- a/dataSync.py +++ b/dataSync.py @@ -39,7 +39,7 @@ blox=[ ("ecolux","./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), # Block1: 21.7 - 18.8 ("ecolux","./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"), # Block2: 15.9 - 13.10 ("ecotrack","/opt/virtualBoxVMs/share/block-I/01-raw","/opt/virtualBoxVMs/share/block-I/02-inspect","2019-10-14 00:00:00","2019-10-16 23:59:00"), -("ecolux","./test","./test","2020-09-14 00:00:00","2020-09-16 23:59:00"), +("ecolux","./test","./test","2019-09-14 00:00:00","2021-09-16 23:59:00"), ## -- add here -- ## ] @@ -238,6 +238,7 @@ class Data: lines = f.readlines() lines = self.clean(lines, T_MINLEN, T_MAXLEN, T_NCOLS, T_DELIM, self.getTrackTime) + lines = [l for l in lines if not l.split(DELIM)[T_COL_TAG].isdigit()] if not lines: pp("-> empty // non-usable") @@ -306,12 +307,12 @@ class Data: return lines def clean(self, lines, minLen, maxLen, nCols, sep, timeFunc=None): - """ remove trailing newline, empty and comments and remove lines before startTime or after endTime""" + """ remove trailing newline, empty, comments and all-digits and remove lines before startTime or after endTime""" lines = [l.strip() for l in lines] lines = [l for l in lines if minLen <= len(l) <= maxLen] # ecolux: ms are variable in length (0-999), SchrebaTron: ms is padded with zeros + lines = list(filter(lambda q: q and q[0] != '#', lines)) lines = [l for l in lines if len(l.split(sep)) == nCols] lines = [l.replace(sep,DELIM) for l in lines] - lines = list(filter(lambda q: q and q[0] != '#', lines)) if timeFunc: lines = list(filter(lambda q: startTime <= timeFunc(q) < endTime, lines)) #only between start and endTime @@ -342,7 +343,7 @@ class Data: def merge_animals(self): """ merge AnimalData into TrackData on TagID """ - pp("merging animal data into track data. This might take a while. Go grab a Coffee or do some pushups!") + pp("merging animal data into track data. This might take a while. Time to move around. Or grab a Coffee.") #TODO SPEED UP!!! diff --git a/unknown-tags.sh b/unknown-tags.sh index 910bd5a..5cff709 100755 --- a/unknown-tags.sh +++ b/unknown-tags.sh @@ -14,7 +14,6 @@ if (( $# < 1 )); then exit fi -in=`ls ${scriptdir}/block-${1}/02-inspect/ecolux-data-*-filtered | head -1` # filename_suffix=`basename "$1" | cut -d'-' -f5-` # if [ "$filename_suffix" == "" ]; then # #before @@ -25,6 +24,8 @@ in=`ls ${scriptdir}/block-${1}/02-inspect/ecolux-data-*-filtered | head -1` COL_TAG=12 HEADER_SIZE=1 # fi + +in=`ls ${scriptdir}/block-${1}/02-inspect/ecolux-data-*-filtered | head -1` LEN_W_HEADER=`wc -l "$in" | cut -d" " -f1` LEN=$(( $LEN_W_HEADER - $HEADER_SIZE)) -- GitLab