diff --git a/.gitignore b/.gitignore index 92729c96c051b819b9b1f879173dcda445a180ce..c11786d300a0fbf52bb8105ae6b47740c185db73 100644 --- a/.gitignore +++ b/.gitignore @@ -1,14 +1,8 @@ block-II/02-inspect/ecotron-data-20200915-20201013 -block-II/02-inspect/ecotron-data-20200915-20201013-cutOff-coolOff-10 -block-II/01-raw/data-eco-unit-2-node-22-start-2020-07-08-file.txt -block-II/01-raw/data-eco-unit-11-node-113-start-2020-07-08-file.txt block-II/01-raw/backup-ecoTron-2020-10-28-06-00-1603861202.tgz -block-II/01-raw/data-eco-unit-11-node-111-start-2020-07-08-file.txt -block-II/01-raw/data-eco-unit-9-node-91-start-2020-07-08-file.txt block-I/02-inspect/ecotron-data-20200721-20200818 -block-I/01-raw/data-eco-unit-11-node-113-start-2020-07-08-file.txt -block-I/01-raw/data-eco-unit-9-node-91-start-2020-07-08-file.txt *.txt test/* *# *skyglow.log +__pycache__ diff --git a/block-I/02-inspect/ecotron-data-20200721-20200818.tgz b/block-I/02-inspect/ecotron-data-20200721-20200818.tgz index 25634c449a532908efad2b0ba828cc7414f2d4b3..4eab63985d85f62e5dc0fd7fe94208ecb233afe4 100644 Binary files a/block-I/02-inspect/ecotron-data-20200721-20200818.tgz and b/block-I/02-inspect/ecotron-data-20200721-20200818.tgz differ diff --git a/block-II/01-raw/blubb b/block-II/01-raw/blubb deleted file mode 100644 index 30c7bdb5a5e1cc5f25a78edfeb5a180c9cf99d49..0000000000000000000000000000000000000000 --- a/block-II/01-raw/blubb +++ /dev/null @@ -1,15 +0,0 @@ - -2020-10-04 13:22:07 data-eco-unit-7-node-71-start-2020-07-08-file.txt -2020-10-12 17:39:20 data-eco-unit-13-node-131-start-2020-07-08-file.txt - -2020-10-15 17:30:50 data-eco-unit-2-node-22-start-2020-07-08-file.txt -2020-10-15 21:54:05 data-eco-unit-12-node-121-start-2020-07-08-file.txt - -2020-10-27 14:30:19 data-eco-unit-10-node-103-start-2020-07-08-file.txt -2020-10-27 20:20:16 data-eco-unit-5-node-51-start-2020-07-08-file.txt -2020-10-27 22:01:38 data-eco-unit-3-node-33-start-2020-07-08-file.txt -2020-10-27 22:39:46 data-eco-unit-9-node-91-start-2020-07-08-file.txt -2020-10-28 04:43:10 data-eco-unit-11-node-113-start-2020-07-08-file.txt -2020-10-28 05:11:59 data-eco-unit-6-node-63-start-2020-07-08-file.txt -2020-10-28 05:15:18 data-eco-unit-4-node-43-start-2020-07-08-file.txt -2020-10-28 05:35:37 data-eco-unit-8-node-83-start-2020-07-08-file.txt diff --git a/block-II/02-inspect/ecotron-data-20200915-20201013-cutOff-coolOff-10.tgz b/block-II/02-inspect/ecotron-data-20200915-20201013-cutOff-coolOff-10.tgz index 00e36c6e551289392d0c8603f727f9464f023eea..62129e622d72a61a4ab73852e6e95aa8bc605740 100644 Binary files a/block-II/02-inspect/ecotron-data-20200915-20201013-cutOff-coolOff-10.tgz and b/block-II/02-inspect/ecotron-data-20200915-20201013-cutOff-coolOff-10.tgz differ diff --git a/block-II/02-inspect/ecotron-data-20200915-20201013.tgz b/block-II/02-inspect/ecotron-data-20200915-20201013.tgz index 53ac0a67595b6743e05a5786bcabf27c8cc50fc2..91e1568db0ba6d9216c191ae1986901435b86408 100644 Binary files a/block-II/02-inspect/ecotron-data-20200915-20201013.tgz and b/block-II/02-inspect/ecotron-data-20200915-20201013.tgz differ diff --git a/dataFilterCooloff.py b/dataFilterCoolOff.py similarity index 77% rename from dataFilterCooloff.py rename to dataFilterCoolOff.py index 49187198584d95cbf6d8cc24208db78274a45ba3..048a6c81e00bcd381776f1943f4841b2ba03da6b 100755 --- a/dataFilterCooloff.py +++ b/dataFilterCoolOff.py @@ -1,6 +1,12 @@ #!/usr/bin/python3 #todo +# print more info: +# nLines before coolOff +# nLines after coolOff +# nLines after cutOff (total nDel lines) +# +# def coolOff() and def cutOff() # think about plateaus? eg. when a beetle chills on sensor for a day? (no patchcrossevent - pce) import sys, os @@ -10,7 +16,14 @@ COL_TIMESTAMP=0 COL_TAG=8 HEADER="#timestamp, milliseconds, date, time, unit, x, y, tagID, species, speciesnumber, weight_without_tag_mg, moon_real_mLux, moon_eco_mLux, skyglow_Lux\n" -def main(): + +def main(arg1,arg2=10): + global inFile, coolOff + inFile=arg1 + coolOff=arg2 + + with open(inFile) as f: + lines = f.readlines() filtered, recently = [], [] for line in lines: @@ -30,7 +43,6 @@ def main(): recently.append((timestamp,tag)) filtered.append(line) - #cutOffEnd print(recently) for ts, tag in recently: @@ -54,21 +66,19 @@ def main(): print(f"done. Deleted {ndel} lines up to ts: {ts} idx: {i}") - # [print(f, end='') for f in filtered] write(filtered) - def write(data): - fileName=f"{file}-cutOff-coolOff-{coolOff}" - with open(fileName, "w") as f: + outFile=f"{inFile}-cutOff-coolOff-{coolOff}" + print(f"writing to {outFile}") + with open(outFile, "w") as f: f.writelines(HEADER) f.writelines(data) - cmd_compress=f"tar -zcvf {fileName}.tgz {fileName}" # ">/dev/null 2>/dev/null" + cmd_compress=f"tar -zcvf {outFile}.tgz {outFile}" # ">/dev/null 2>/dev/null" os.system(cmd_compress) - if __name__ == "__main__" : if len(sys.argv) == 1: this = os.path.basename(__file__) @@ -76,11 +86,7 @@ if __name__ == "__main__" : sys.exit(f' "Usage: {this} dataFile [coolOff]\n\n' f' Cooloff defines the time in seconds that needs to pass before a tag can be detected again. Default = 10\n' f' Will also cut off tags at end. (eg tag lying on sensor)') - - file = sys.argv[1] - coolOff = int(sys.argv[2]) if len(sys.argv) >= 3 else 10 - with open(file) as f: - lines = f.readlines() - - main() \ No newline at end of file + main(sys.argv[1:]) +# else: +# print("called as a module via import") diff --git a/dataSync.py b/dataSync.py index 8d0afe7a15e21510f0b8d072643bb45784ed3ebd..c6548b2ee29981ff5713d579f2b03c394843f436 100755 --- a/dataSync.py +++ b/dataSync.py @@ -7,6 +7,7 @@ ############################ # TODO ############################ +# > could add COOLOFF to blox-config. Now defaulting to 10 # > parse moonmap and skyglowdict from skyglow Project sources (submodule ... ?) # > integrate with ST # > make Skyglow optional -> TEST @@ -31,15 +32,19 @@ import sys, getopt, os, re, datetime, string, time from pprint import pprint as pp from datetime import datetime as dt +import dataFilterCoolOff PROJ = "ecotron" # PROJ = "schrebatron" -# Experiment Times! -################### -# Track-start: 7.7 // Light start: 20.7 // Dark: 10.7 - 20.7 -blox=[("./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), # Block1: 21.7 - 18.8 - ("./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00")] # Block2: 15.9 - 13.10 +# Experiment Config! Time and paths. +# Track-start: 7.7.20 // Light start: 20.7.20 // Dark: 10.7.20 - 20.7.20 +blox=[ +("./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), # Block1: 21.7 - 18.8 +("./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00"), # Block2: 15.9 - 13.10 +("./test","./test","2020-07-21 00:00:00","2020-08-18 23:59:00"), # test +] + TIME_FMT='%Y-%m-%d %H:%M:00' noTime = dt.fromtimestamp(0) @@ -79,6 +84,7 @@ A_COL_TAG=3 # out file OUT_FMT='%Y%m%d' +OUT_FILE='default' DELIM=',' COL_UID=4 COL_TAG=8 @@ -195,6 +201,9 @@ class Data: self.startLight = self.getLightTime( self.lightData[0] ) self.endLight = self.getLightTime( self.lightData[-1] ) + global OUT_FILE + OUT_FILE = f"{OUTPUTDIR}{os.sep}{PROJ}-data-{self.startTrack.strftime(OUT_FMT)}-{self.endTrack.strftime(OUT_FMT)}" + pp(self.header()) def getTrackData(self, fileName): @@ -312,9 +321,6 @@ class Data: ttag = tLine.split(DELIM)[COL_TAG] for aLine in self.animalData: - # print(tLine) - # print(aLine) - # print(atag) aLine = aLine.split(DELIM) atag = aLine[A_COL_TAG] @@ -414,17 +420,14 @@ class Data: def write(self): - fileName = OUTPUTDIR+f"/{PROJ}-data-{self.startTrack.strftime(OUT_FMT)}-{self.endTrack.strftime(OUT_FMT)}" - pp("write to " + fileName) - - os.makedirs(os.path.dirname(fileName), exist_ok=True) - with open(fileName, "w") as f: + pp("write to " + OUT_FILE) + os.makedirs(os.path.dirname(OUT_FILE), exist_ok=True) + with open(OUT_FILE, "w") as f: f.writelines(self.header()) self.trackData=map(lambda x:x+'\n', self.trackData) f.writelines(self.trackData) - - cmd_compress=f"tar -zcvf {fileName}.tgz {fileName}" # ">/dev/null 2>/dev/null" + cmd_compress=f"tar -zcvf {OUT_FILE}.tgz {OUT_FILE}" # ">/dev/null 2>/dev/null" os.system(cmd_compress) def xtract(): @@ -469,14 +472,24 @@ def make_rel_abs_path(path): return path def main(): - + xtract() animalFile, skyglowFile, trackFiles = getFileList() data = Data(animalFile,skyglowFile,trackFiles) data.merge() data.write() + dataFilterCoolOff.main(OUT_FILE) + #call updateGitIgnore.sh? + if __name__ == "__main__" : + + print( os.path.dirname(os.path.abspath(__file__)) ) + cmd_compress=f"updateGitIgnore.sh" + os.system(cmd_compress) + exit() + + print(" Process raw data for which block?") for idx, b in enumerate(blox): print(f" {idx+1}) {b[2]} >> {b[3]}") @@ -496,6 +509,3 @@ if __name__ == "__main__" : main() end = time.time() print(f"Took {end - start} seconds") - - #call dataFilterCoolOff? - #call updateGitIgnore.sh? \ No newline at end of file diff --git a/readme b/readme index 94a5a6b9807cd0962ec8f8e599c99dcc69cc2c6b..343774b48f0ebdaef6be78dc7628007bfe927ec1 100644 --- a/readme +++ b/readme @@ -1,8 +1,6 @@ -1. extract .tgz -2. run dataSync on dir --> should output a cleaned table with light, track and animalData -3. filterCooloff.py data 10 --> only count tracks with difference of 10 seconds inbetween! (this greatly reduces the size) +1. run dataSync on dir containing compressed data (.tgz suffix) +-> should output a cleaned table containing light, track and animalData +-> should also produce a compressed version of the table Optional: @@ -19,3 +17,7 @@ print tags in tagfile which length deviate: update gitignore to not add large files (>50M) to git repo: updateGitIgnore.sh + +filterCooloff.py data 10 +-> only count tracks with difference of 10 seconds inbetween! (this greatly reduces the size) +-> this script is called automatically from dataSync.py but can be invoked manually if you want t o change the coolOff phase \ No newline at end of file diff --git a/test.py b/test.py deleted file mode 100755 index 2a47db064e59752b6080edb88c9ed54fc0187b5d..0000000000000000000000000000000000000000 --- a/test.py +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/python3 -import sys - -import "../../skyglow/skyglow.py" -lightFile=r"../../skyglow/skyglow.py" -# from lightFile import - diff --git a/updateGitIgnore.sh b/updateGitIgnore.sh index 11a880fcf6038ff7e93d6ded0c68ae6e1032edc9..1af2ebd2ebfc1eca0aecd856b169e1743337badd 100755 --- a/updateGitIgnore.sh +++ b/updateGitIgnore.sh @@ -1,12 +1,14 @@ #!/bin/bash -find . -size +50M | sed 's/\.\///g' > .gitignore +#ignore .txt files in find +find . -size +50M ! -name '*.txt' | sed 's/\.\///g' > .gitignore list=( "*.txt" "test/*" "*#" #ods-tmp-files *skyglow.log +__pycache__ ) for i in "${list[@]}"