put block config in list of tuples. make script interactive. ask for block at...

put block config in list of tuples. make script interactive. ask for block at startup. Choose set of dirs and times.

put block config in list of tuples. make script interactive. ask for block at...
53fc2566 · am0ebe · 4695a22f · 53fc2566 · 53fc2566
Commit 53fc2566 authored 4 years ago by am0ebe
--- a/dataFilterCooloff.py
+++ b/dataFilterCooloff.py
 #!/usr/bin/python3

 #todo 
-#	plateaus? when beetle chills on sensor for a day? (no patchcrossevent - pce)
+#	think about plateaus? eg. when a beetle chills on sensor for a day? (no patchcrossevent - pce)

 import sys, os


--- a/dataSync.py
+++ b/dataSync.py
@@ -2,25 +2,19 @@
 # clean, sort, merge raw data
 # output should be one huge table with all necessary fields for further processing/vizualization
 #
+# files are 'skyglow.log','tags' and trackfiles (containing 'unit-x' in name, where x is a number
+#
 ############################
 # TODO
 ############################
-#	use NA instead of X if not found (R-compatible)
-#	add isHabitat col dependent on Pos
-#	cutoff continues track until end (dead beetle on sensor)
-#	merge H's species
-#
-#	parse moonmap and skyglowdict from skyglow Project sources (submodule ... ?)
-#
-#
-#	include temperature and humidity data?
-#	integrate with ST
+# > parse moonmap and skyglowdict from skyglow Project sources (submodule ... ?)
+# > integrate with ST
 #		> make Skyglow optional -> TEST
 #		> ST-Filenames should include 'unit-X'!
+# > how to process skylog error in merge()
+# > include temperature and humidity data?
 #
-# how to process skylog error in merge()
-#
-# Speed Up! (takes 16minutes for 8mio lines...)
+# > Speed Up! (takes 16minutes for 8mio lines...)
 # 	1. Threads 
 # 	> see https://realpython.com/python-concurrency/#multiprocessing-version
 # 	use threads for 
@@ -28,22 +22,11 @@
 # 	> merging (intermediate)
 #		* parse chunks of trackData. call sort at end
 #		* calc indices of data dependent on numThreads
-#
 #	2. rewrite animalmerge for loops
 #	3. Consider using Pandas, NumPy, Cython, Pypy, Koalas, inline C++/C/Bash, sql (SQLAlchemy)
 #
-#	how to profile to find bottlenecks?
-#
-#	for lists: use append() instead of + / += operators
-#
-# Experiment Times
-####################
-# Track start 	 7.7
-# Light start 	20.7
-# Dark: 10.7 - 20.7 
-# Block1: 21.7 - 18.8
-# Block2: 15.9 - 13.10
-#
+# > how to profile to find bottlenecks?
+#	> for lists: use append() instead of + / += operators

 import sys, getopt, os, re, datetime, string, time
 from pprint import pprint as pp
@@ -52,12 +35,14 @@ from datetime import datetime as dt
 PROJ = "ecotron"
 # PROJ = "schrebatron"

+# Experiment Times!
+###################
+# Track-start: 7.7 // Light start: 20.7 // Dark: 10.7 - 20.7
+blox=[("./block-I/01-raw","./block-I/02-inspect","2020-07-21 00:00:00","2020-08-18 23:59:00"), 	# Block1: 21.7 - 18.8
+      ("./block-II/01-raw","./block-II/02-inspect","2020-09-15 00:00:00","2020-10-13 23:59:00")]	# Block2: 15.9 - 13.10
+
 TIME_FMT='%Y-%m-%d %H:%M:00'
 noTime = dt.fromtimestamp(0)
-startTime = dt.strptime('2020-07-21 00:00:00', TIME_FMT)
-endTime = dt.strptime('2020-08-18 23:59:00', TIME_FMT)
-# startTime = endTime = noTime
-
 TAG_LEN=len("04B94A7F7288588022")

 # trackfile
@@ -478,6 +463,11 @@ def getFileList():

 	return animalFile, skyglowFile, trackFiles

+def make_rel_abs_path(path):
+	if path.startswith('.'):
+		path=os.path.dirname(os.path.abspath(__file__))+os.sep+path
+	return path
+
 def main():

 	xtract()
@@ -487,33 +477,25 @@ def main():
 	data.write()

 if __name__ == "__main__" :
-	if len(sys.argv) == 1:
-		this = os.path.basename(__file__)
-		sys.exit(f'Usage:\n'
-				f'   {this} <dir> - search files and write to <dir>\n'
-				f'   {this} <in> <out> - search files in <in> and write to <out>\n\n'
-				f"   files are 'skyglow.log','tags' and trackfiles (containing 'unit-x' in name, where x is a number)")
-	elif len(sys.argv) == 2:
-		INPUTDIR = sys.argv[1]
-		OUTPUTDIR = INPUTDIR
-	else:
-		INPUTDIR = sys.argv[1]
-		OUTPUTDIR = sys.argv[2]
-
-	print(" INPUTDIR: ", INPUTDIR)
-	print("OUTPUTDIR: ", OUTPUTDIR)
-
-	#Block-I
-	# startTime = dt.strptime('2020-07-21 00:00:00', TIME_FMT)
-	# endTime = dt.strptime('2020-08-18 23:59:00', TIME_FMT)
-	#Block-II
-	# startTime = dt.strptime('2020-09-15 00:00:00', TIME_FMT)
-	# endTime = dt.strptime('2020-10-13 23:59:00', TIME_FMT)
-
-	#ask for block
-	# nBlock=input("choose")
-
+	print("	Process raw data for which block?")
+	for idx, b in enumerate(blox):
+		print(f"    {idx+1}) {b[2]} >> {b[3]}")
+	try:
+		n=int(input())-1
+		INPUTDIR = make_rel_abs_path( blox[n][0] )
+		OUTPUTDIR = make_rel_abs_path( blox[n][1] )
+		startTime = dt.strptime(blox[n][2], TIME_FMT)
+		endTime = dt.strptime(blox[n][3], TIME_FMT)
+	except:
+		print("Insert Block Number..")
+		exit()
+
+	print(" inputdir: ", INPUTDIR)
+	print("outputdir: ", OUTPUTDIR)
 	start = time.time()
 	main()
 	end = time.time()
 	print(f"Took {end - start} seconds")
+
+	#call dataFilterCoolOff?
+	#call updateGitIgnore.sh?
\ No newline at end of file