diff --git a/conf b/conf new file mode 100644 index 0000000000000000000000000000000000000000..6533b0510bd8d01089f045fa5b0f0f1e06ee5373 --- /dev/null +++ b/conf @@ -0,0 +1,22 @@ +# Configuration of Time and paths +##################################### +# +# You can add a configuration by adding a line. It has following form: +# +# (Project, inDir, outDir, start, end) +# +# > Project must be in (ecotrack, ecolux) +# > dirs can be relative. eg: ('./data','../data','~/data') +# > times must have Format like '2021-03-11 14:30:12:00' +######################################## +#ecolux -- Track-start: 7.7.20 // Light start: 20.7.20 // Dark: 10.7.20 - 20.7.20 +#ecotrack,in,out,2019-06-29 00:00:00,2019-07-18 00:00:00 #block1 +#ecotrack,in,out,2019-08-05 00:00:00,2019-08-16 00:00:00 #block2 +#ecotrack,in,out,2019-09-27 00:00:00,2019-10-11 00:00:00 #block3 +########################################## +ecolux,./ecolux/I/01-raw,./ecolux/I/02-inspect,2020-07-21 00:00:00,2020-08-18 23:59:00 +ecolux,./ecolux/II/01-raw,./ecolux/II/02-inspect,2020-09-15 00:00:00,2020-10-13 23:59:00 +ecolux,./ecolux/all/01-raw,./ecolux/all/02-inspect,2020-07-21 00:00:00,2020-11-11 23:59:00 +ecotrack,./ecotrack/all/01-raw,./ecotrack/all/02-inspect,2019-09-26 00:00:00,2019-10-21 23:59:00 +ecotrack,./test,./test,2019-09-26 00:00:00,2019-10-21 23:59:00 +ecotrack,/opt/virtualBoxVMs/share/block-I/01-raw,/opt/virtualBoxVMs/share/block-I/02-inspect,2019-09-26 00:00:00,2019-10-21 23:59:00 diff --git a/dataSync.py b/dataSync.py index b63e440a6e77a435b08fff1dc9147864b44278d9..a3e898e82464e08fe0dec05080cfc32ef63c2236 100755 --- a/dataSync.py +++ b/dataSync.py @@ -1,20 +1,20 @@ #!/usr/bin/python3 # clean, sort, merge raw data -# ecolux and ecotrack Projects +# for ecolux and ecotrack Projects # output one huge table with all necessary fields for further processing/vizualization # ############################ # TODO ############################ +# > ecotrack time is in CEST (both track and temp) => UTC+2 => C'est une problème? # > rewrite getXData to initXData -# > add block name to blox. > add it to update-tags.sh # # > parse moonmap and skyglowdict from skyglow Project sources # -> maybe integrate skyglow project as submodule # > integrate with ST -# > make Skyglow optional -> TEST # > ST-Filenames should include 'unit-X'! # > how to process skylog error in merge() + # > Speed Up! (takes 2min per 1mio lines) # 1. Use Threads for # https://realpython.com/python-concurrency/#multiprocessing-version @@ -31,27 +31,6 @@ from pprint import pprint as pp from datetime import datetime as dt import dataFilter -# Configuration of Time and paths -##################################### -# ecolux -- Track-start: 7.7.20 // Light start: 20.7.20 // Dark: 10.7.20 - 20.7.20 -blox=[ -("ecolux","./ecolux/I/01-raw","./ecolux/I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), # Block1: 21.7 - 18.8 -("ecolux","./ecolux/II/01-raw","./ecolux/I/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"), # Block2: 15.9 - 13.10 -("ecolux","./ecolux/all/01-raw","./ecolux/I/all/02-inspect","2020-07-21 00:00:00","2020-11-11 23:59:00"), # Blox1&2 + 4Weex -("ecotrack","./ecotrack/all/01-raw","./ecotrack/all/02-inspect","2019-09-26 00:00:00","2019-10-21 23:59:00"), -# ("ecotrack","/opt/virtualBoxVMs/share/block-I/01-raw","/opt/virtualBoxVMs/share/block-I/02-inspect","2019-09-26 00:00:00","2019-10-21 23:59:00"), -#temp 26.9 - 21.10 -("ecotrack","./test","./test","2019-09-26 00:00:00","2019-10-21 23:59:00"), -# -#NOTE: ecotrack time is in CEST (both track and temp) => UTC+2 -#("ecotrack",in,out,"2019-06-29 00:00:00", "2019-07-18 00:00:00") #block1 -#("ecotrack",in,out,"2019-08-05 00:00:00", "2019-08-16 00:00:00") #block2 -#("ecotrack",in,out,"2019-09-27 00:00:00","2019-10-11 00:00:00") #block3 -# -## -- add here -- ## -] - -# TIME_FMT='%Y-%m-%d %H:%M:00' TIME_FMT='%Y-%m-%d %H:%M:%S' noTime = dt.fromtimestamp(0) TAG_LEN=len("04B94A7F7288588022") @@ -653,24 +632,35 @@ def make_rel_abs_path(path): def main(): - initFileStructure() - xtract() + # initFileStructure() + # xtract() - animalFile, skyglowFile, tempFile, trackFiles = getFileList() - data = Data(animalFile,skyglowFile,tempFile,trackFiles) - data.merge() - data.write() + # animalFile, skyglowFile, tempFile, trackFiles = getFileList() + # data = Data(animalFile,skyglowFile,tempFile,trackFiles) + # data.merge() + # data.write() - dataFilter.main(OUT_FILE) + # dataFilter.main(OUT_FILE) if( platform.system() == "Linux" ): - os.system(rootPath+"/updateGitIgnore.sh") - os.system(rootPath+"/unknown-tags.sh ") # TODO + os.system(rootPath + "/updateGitIgnore.sh") + os.system(rootPath + "/unknown-tags.sh " + OUTPUTDIR) + +def readConf(): + global PROJ, OUTPUTDIR, INPUTDIR, startTime, endTime + + confFileName="conf" + confFile = make_rel_abs_path(confFileName) + with open(confFile) as f: + lines = f.readlines() + + lines = [l.strip() for l in lines] + lines = list(filter(lambda q: q and q[0] != '#', lines)) + lines = [l.split(',') for l in lines] -if __name__ == "__main__" : known_projects=["ecotrack","ecolux"] #,"schrebatron" print(f" Process raw data for which block?\n {RULER}") - for idx, b in enumerate(blox): + for idx, b in enumerate(lines): print(f" {idx+1}) Project {b[0]}") print(f" from: {b[3]}") print(f" to: {b[4]}") @@ -678,23 +668,21 @@ if __name__ == "__main__" : print(f" out: {b[2]}\n") try: n=int(input())-1 - PROJ = blox[n][0].lower() - INPUTDIR = make_rel_abs_path( blox[n][1] ) - OUTPUTDIR = make_rel_abs_path( blox[n][2] ) - startTime = dt.strptime(blox[n][3], TIME_FMT) - endTime = dt.strptime(blox[n][4], TIME_FMT) + PROJ = lines[n][0].lower() + INPUTDIR = make_rel_abs_path( lines[n][1] ) + OUTPUTDIR = make_rel_abs_path( lines[n][2] ) + startTime = dt.strptime(lines[n][3], TIME_FMT) + endTime = dt.strptime(lines[n][4], TIME_FMT) if PROJ not in known_projects: raise Exception(f"Unknown Project '{PROJ}'") except Exception as e: - print(f"Error: {e}\n") - print("You can add a configuration by adding a line to the 'blox' structure. It has following form:\n") - print(" (Project, inDir, outDir, start, end)\n" ) - print(f"> Project must be in {known_projects}" ) - print(f"> dirs can be relative. eg: ('./data','../data','~/data')") - print(f"> times must have Format like '{dt.now().strftime(TIME_FMT)}'" ) - exit() + print(f"Error: {e}\n") + print(f"You can add a configuration by adding a line to the file {confFileName}.") + +if __name__ == "__main__" : + readConf() start = time.time() main() diff --git a/info b/info deleted file mode 100644 index c7110e9c5500a8eab1629afcce1f28db40ba3261..0000000000000000000000000000000000000000 --- a/info +++ /dev/null @@ -1,45 +0,0 @@ -##### -## BLOCK-ALL -###After################################################ -## Tags - total: 907 - known: 322 (35.50%) -unknown: 585 (64.49%) - -## Detections - total: 76645 - known: 62446 (81.47%) -unknown: 14199 (18.52%) - -###### -## BLOCK-I -###Before#####################After##################### -## Tags - total: 305 total: 305 - known: 132 (43.27%) known: 132 (43.27%) -unknown: 173 (56.72%) unknown: 173 (56.72%) - -## Detections - total: 3609863 total: 10294 - known: 3561725 (98.66%) known: 8442 (82.00%) -unknown: 48138 ( 1.33%) unknown: 1852 (17.99%) - - top 2 unknown: 1033 (10.0%) - top 10 unknown: 1428 (13.8%) - top 20 unknown: 1609 (15.6%) -####### -## BLOCK-II -###Before#####################After##################### -## Tags - total: 453 total: 453 - known: 158 (34.87%) known: 158 (34.87%) -unknown: 295 (65.12%) unknown: 295 (65.12%) - -## Detections - total: 8617060 total: 42575 - known: 8601009 (99.81%) known: 35255 (82.80%) -unknown: 16051 (00.18%) unknown: 7320 (17.19%) - - top 2 unknown: 3257 (7.6%) - top 10 unknown: 5032 (11.8%) - top 20 unknown: 5672 (13.3%) diff --git a/readme b/readme index 1769f7a53b3a4ed8398783bfc45e0df43b89913b..0158cc88d9b8e172443050664e8c1092a5b6b54d 100644 --- a/readme +++ b/readme @@ -11,19 +11,20 @@ You can also filter data independently, if you don't want to process everything again [takes quite long] or tweak some filter parameters. Run dataFilter.py for Usage instructions. - + Optional: + get time of last tracking-event per unit: source getLastDates.sh cd block-I/01-raw getLastDateAllUnits -print information about known and unknown tags and detections for Block-I - unknowntags.sh I +print information about known and unknown tags and detections + unknowntags.sh ecolux/all/02-inspect -write number of unkown tags for block-II along with their occurrences to file: - unknowntags.sh II list +write number of unkown tags along with their occurrences to file: + unknowntags.sh ecotrack/II/02-inspect list print tags in tagfile which length deviate: checkTagLengths.sh block-II/tags2 diff --git a/unknown-tags.sh b/unknown-tags.sh index 5d1987d182a77545fc1f27ff4806df75c71eef6a..758a43059bab359390dda41dd85008febde61dcf 100755 --- a/unknown-tags.sh +++ b/unknown-tags.sh @@ -1,31 +1,23 @@ #!/bin/bash - -scriptdir=`dirname $0` +# scriptdir=`dirname $0` this=`basename $0` if (( $# < 1 )); then echo "Usage:" - echo "1. $this project X" + echo "1. $this dir" echo " prints info about tags and detections" echo "" - echo "2. $this n list" + echo "2. $this dir list" echo " write list of unknown tags together with their occurrences to file unknowntags-block-{n}" echo "" - echo "Looks for data-file ( ${scriptdir}/block-{n}/02-inspect/eco*-data-*-filtered )" + echo "Looks for data-file ( {dir}/*-data-*-filtered ) in {dir}" exit fi -# filename_suffix=`basename "$1" | cut -d'-' -f5-` -# if [ "$filename_suffix" == "" ]; then -# #before -# COL_TAG=9 -# HEADER_SIZE=`grep -c "#" "$1"` -# else - #after col-insertions from dataFilter.py +dir=$1 COL_TAG=13 HEADER_SIZE=1 -# fi -in=`ls ${scriptdir}/${project}-${1}/02-inspect/eco*-data-*-filtered | head -1` +in=`ls ${dir}/*-data-*-filtered | head -1` LEN_W_HEADER=`wc -l "$in" | cut -d" " -f1` LEN=$(( $LEN_W_HEADER - $HEADER_SIZE))