Skip to content
Snippets Groups Projects
Select Git revision
  • a72dcded621a82feb277f571f4b7989d7310d628
  • master default
  • dev
  • new_temp_table
  • ecotrack-data
  • ecolux-data
6 results

dataSync.py

Blame
  • user avatar
    am0ebe authored
    c5ad2ad9
    History
    Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    dataSync.py 20.15 KiB
    #!/usr/bin/python3
    #
    # clean, sort, merge raw data
    # for ecolux and ecotrack Projects
    # output one huge table with all necessary fields for further processing/vizualization
    #
    ############################
    # TODO / MISC
    ############################
    # rewrite conf.json structure to reflect projects > [conf, blox]
    # > ecotrack time is in CEST (both track and temp) => UTC+2 => C'est une problème?
    # > parse moonmap and skyglowdict from skyglow Project sources 
    # -> maybe integrate skyglow project as submodule
    # > integrate with ST
    #		> ST-Filenames should include 'unit-X'!
    # > how to process skylog error in merge()
    
    # > Speed Up! (takes 2min per 1mio lines)
    #	1. Use Threads for
    #	https://realpython.com/python-concurrency/#multiprocessing-version
    #		> reading files (easy)
    #		> merging (intermediate)
    #		* parse chunks of trackData. call sort at end
    #		* calc indices of data dependent on numThreads
    #	2. rewrite animalmerge for loops
    #	3. Consider using Pandas, NumPy, Cython, Pypy, Koalas, inline C++/C/Bash, sql (SQLAlchemy)
    #Q: wieviel track-events wurden von clean rausgefiltert?
    import sys, getopt, os, re, string, time, platform, json, datetime
    from pprint import pp as p
    from datetime import datetime as dt
    import dataFilter
    
    TIME_FMT='%Y-%m-%d %H:%M:%S'
    noTime = dt.fromtimestamp(0)
    TAG_LEN=len("04B94A7F7288588022")
    RULER=f"{'#'*100}"
    N_UNITS=12
    confFileName="./conf.json"
    min_temp=14
    temp_precision=2
    
    def initFileStructure():
    
    	## trackFiles
    	global T_DELIM, TIMESTAMP_LEN, T_COL_MS, T_COL_X, T_COL_Y, T_COL_TAG, T_MINLEN, T_MAXLEN, T_NCOLS
    	T_DELIM=';'
    	TIMESTAMP_LEN=10
    	#t_col_timestamp=0
    	T_COL_MS=1
    	T_COL_X=2
    	T_COL_Y=3
    	T_COL_TAG=4
    	#t_col_signalstrength=5
    	if PROJ == "ecolux": #track+light
    		T_MINLEN = 38 # without newline.
    		T_MAXLEN = 40 # ms are variable in length (0-999). Will be padded later for uniform len
    		T_NCOLS = 6
    	elif PROJ == "ecotrack": #track+temp 2019
    		T_MINLEN = 36
    		T_MAXLEN = 38
    		T_NCOLS = 5
    	# elif PROJ == "schrebatron":
     		#SchrebaTron: ms is padded with zeros
    
    	if hasLight():
    	## lightFile
    		global L_DELIM, L_COL_MOON_REAL, L_COL_MOON_DMX, L_COL_SKYGLOW, L_MINLEN, L_MAXLEN, L_NCOLS
    		L_DELIM='\t'
    		L_COL_MOON_REAL=1
    		L_COL_MOON_DMX=2
    		L_COL_SKYGLOW=3
    		L_MINLEN = 61 #2020-07-21 15:15:00 	0.0	0	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    		L_MAXLEN = 76 #2020-07-21 15:09:00 	0.0	0	[15, 54, 4, 19, 0, 1, 5, 0, 40, 12, 123, 123]
    		L_NCOLS = 4
    
    	if hasTemp():
    	## tempFile (C for Celsius)
    		global C_TIME_FMT, C_COL_TIME, C_COL_UNIT, C_COL_HABITAT, C_COL_TEMP, C_MIN_LEN, C_MAX_LEN, C_DELIM, C_NCOLS
    		C_TIME_FMT = "%Y-%m-%d %H:%M:%S.%f"
    		C_COL_TIME = 0
    		C_COL_UNIT = 1
    		C_COL_HABITAT = 2
    		C_COL_TEMP = 3
    		C_MIN_LEN = 30 #coarse approximation
    		C_MAX_LEN = 40
    		C_DELIM = ';'
    		C_NCOLS = 4
    
    	## animalFile
    	global A_DELIM, A_MINLEN, A_MAXLEN, A_NCOLS, A_COL_SPECIES, A_COL_SPECIES_IND, A_COL_WEIGHT_WO_TAG_MG, A_COL_TAG
    	if PROJ == "ecolux":
    		A_DELIM='\t'
    		A_MINLEN = 25
    		A_MAXLEN = 30
    		A_NCOLS = 4
    		A_COL_SPECIES=0
    		A_COL_SPECIES_IND=1
    		A_COL_WEIGHT_WO_TAG_MG=2
    		A_COL_TAG=3
    	elif PROJ == "ecotrack":
    		A_DELIM=';'
    		A_MINLEN = 30
    		A_MAXLEN = 45
    		A_NCOLS = 7 #/w comments
    		A_COL_SPECIES=1
    		A_COL_SPECIES_IND=0
    		A_COL_WEIGHT_WO_TAG_MG=2
    		A_COL_TAG=5
    
    
    	# out file
    	###########
    	global OUT_FMT, DELIM, COL_UID, COL_HID, COL_TAG
    	OUT_FMT='%Y%m%d'
    	DELIM=','
    	COL_UID=4
    	COL_TAG=8
    
    
    #mlx (idx is also dmx-byte value)
    moonMap = [ 1.409,
    		 8.369,
    		12.333,
    		16.210,
    		20.563,
    		24.716,
    		28.907,
    		33.363,
    		36.843,
    		41.137,
    		46.527,
    		50.880,
    		54.727,
    		59.617,
    		63.967,
    		67.817,
    		73.173,
    		77.213,
    		82.827,
    		87.037,
    		91.033,
    		95.320,
    		99.470,
    		104.366,
    		108.833,
    		113.667, #25
    		123.733,
    		128.567,
    		133.2,
    		138.3,
    		143.367,
    		148.233,
    		152.233,
    		156.933,
    		162.5,
    		167.5,
    		172.1,
    		176.767,
    		181.567,
    		187,
    		192.067,
    		196.433,
    		201.467,
    		206.5,
    		211.733,
    		216.4,
    		221.033,
    		226,
    		231.1,
    		235.733,
    		240.4, #50
    		245.6,
    		250.433,
    		254.567,
    		259.5,
    		263.933,
    		268.3,
    		272.6, #274 == new max
    		277.267,
    		281.433,
    		287.467, #60
    	]
    
    #Mapping skyglow dmx+nled to lux
    skyglowDict = { 
    			 15: 0.08,
    			 54: 0.3,
    			  4: 1,
    			 19: 0.1,
    			  0: 0,
    			  1: 0.01,
    			  5: 0.03,
    			  0: 0,
    			 40: 10,
    			 12: 3,
    			123: 30,
    			123: 30,
    }
    
    def hasLight():
    	return PROJ == "ecolux"
    
    def hasTemp():
    	return PROJ == "ecotrack"
    
    def getUID(fileName):
    	""" extract and return unit-id from fileName """
    	f=fileName.split('-')
    	idx=f.index("unit")+1
    	uid=int(f[int(idx)])
    	return 1 if uid == 13 else uid
    
    class Data:
    
    	def __init__(self):
    
    		animalFile, skyglowFile, tempFile, trackFiles = getFileList()
    
    		self.initAnimalData(animalFile)
    		self.initTrackData(trackFiles)
    
    		if hasLight():
    			self.initLightData(skyglowFile)
    
    		if hasTemp():
    			self.initTempData(tempFile)
    
    		global OUT_FILE
    		OUT_FILE = f"{PROJ}-data-{self.startTrack.strftime(OUT_FMT)}-{self.endTrack.strftime(OUT_FMT)}"
    
    		p(self.header())
    
    	def initTrackData(self,trackFiles):
    		self.trackData = []
    		for file in trackFiles:
    			self.trackData.extend( self.getTrackData(file) )
    
    		# sort and make unique (no duplicat lines)
    		lenBefore=len(self.trackData)
    		self.trackData = list( set( self.trackData ) )
    		self.trackData.sort()
    		nDuplicate=lenBefore-len(self.trackData)
    		if nDuplicate:
    			p(f"removed {nDuplicate} duplicate lines")
    
    		self.startTrack = self.getTrackTime( self.trackData[0] )
    		self.endTrack = self.getTrackTime( self.trackData[-1] )
    
    	def getTrackData(self, fileName):
    		""" get lines from all tracklogs, clean, merge and add/rm columns"""
    
    		p(f"processing: {fileName}")
    		with open(INPUTDIR+os.sep+fileName) as f:
    
    			lines = f.readlines()
    			lines = self.clean(lines, T_MINLEN, T_MAXLEN, T_NCOLS, T_DELIM, self.getTrackTime)
    			lines = [l for l in lines if not l.split(DELIM)[T_COL_TAG].isdigit()]
    
    			if not lines:
    				p("-> empty // non-usable")
    				return []
    
    		uid=getUID(fileName)
    		idx_after_ms = TIMESTAMP_LEN + len('999') + len(DELIM)
    		for idx, line in enumerate(lines):
    
    			if PROJ in ["ecolux","ecotrack"]:
    				# pad ms column with zeros to fit three digits
    				cols=line.split(DELIM)
    				ms_digits=len(cols[T_COL_MS])
    				if ms_digits < 3:
    					line = line[:TIMESTAMP_LEN+1] + '0'*(3-ms_digits) + line[TIMESTAMP_LEN+1:]
    
    			trackTime = self.getTrackTime(line)
    			date = trackTime.strftime('%Y-%m-%d')
    			time = trackTime.strftime('%H:%M')
    
    			x = int(cols[T_COL_X])
    			y = int(cols[T_COL_Y])
    
    			habitat = self.calcHabitat(x,y)
    
    			# add columns (date,time,uid,habitat)
    			line = line[:idx_after_ms] + DELIM + date + DELIM + time + DELIM + str(uid) + DELIM + str(habitat) + line[idx_after_ms:]
    
    			if PROJ == "ecolux":
    				line = line[:line.rindex(DELIM)] # remove signalstrength column
    
    			lines[idx] = line
    
    		assert (lines), "trackData: Not valid. Check times / formatting."
    		return lines
    
    	def calcHabitat(self,x,y):
    		if PROJ == "ecolux":
    			if x in (1,2) and y in (1,2):
    				return 1
    			elif x in (5,6) and y in (1,2):
    				return 2
    			elif x in (1,2) and y in (5,6):
    				return 3
    			elif x in (5,6) and y in (5,6):
    				return 4
    			else:
    				return 0
    		elif PROJ == "ecotrack":
    			if x in (1,2) and y in (1,2,5,6):
    				return 2
    			elif x in (1,2) and y in (3,4,7,8):
    				return 3
    			elif x in (3,4) and y in (1,2,5,6):
    				return 3
    			elif x in (3,4) and y in (3,4,7,8):
    				return 4
    			else:
    				return "NA"
    		else:
    			return "NA"
    
    	def initLightData(self,file):
    
    		with open(file) as f:
    			lines = f.readlines()
    			lines = self.clean(lines, L_MINLEN, L_MAXLEN, L_NCOLS, L_DELIM, self.getLightTime)
    			pattern = re.compile(r"\s\t") # skylog used to have an xtra space (changed 200915) TODO remove
    			lines = [re.sub(pattern, "\t", x) for x in lines ]
    
    		assert (lines), "skyglow: Not valid. Check times / formatting."
    
    		self.lightData = lines
    		self.startLight = self.getLightTime( self.lightData[0] )
    		self.endLight = self.getLightTime( self.lightData[-1] )
    		
    
    	def initTempData(self,file):
    		""" get temp data and save in a list of list of tuples >> tempData[unit][habitat] returns (time,temp)"""
    		p(f"processing: {file.split(os.sep)[-1]}")
    		with open(file) as f:
    			lines = f.readlines()
    
    		#Note: add timedelta-margin so trackdata can be merged correctly at the edges
    		lines = self.clean(lines, C_MIN_LEN, C_MAX_LEN, C_NCOLS, C_DELIM, self.getTempTime, datetime.timedelta(minutes=30))
    		lines.sort()
    
    		self.startTemp = self.getTempTime( lines[0] )
    		self.endTemp = self.getTempTime( lines[-1] )
    		self.lenTemp = len(lines)
    
     		#list of empty lists
    		self.tempData = [[] for _ in range(N_UNITS) ]
    		for l in lines:
    			cols = l.split(DELIM)
    
    			uid = int( cols[C_COL_UNIT] ) - 1
    			time = dt.strptime( cols[C_COL_TIME],C_TIME_FMT )
    			temp = float( cols[C_COL_TEMP] )
    			if temp < min_temp:
    				continue
    
    			if self.tempData[uid]:
    				lastTime = self.tempData[uid][-1][0]
    				if time == lastTime:
    					self.tempData[uid][-1].append( temp )
    				else:
    					self.tempData[uid].append( [time,temp] )
    			else:
    				self.tempData[uid].append( [time,temp] )
    
    		#calc mean for all 4 sensors
    		for uid in range(N_UNITS):
    			for i,x in enumerate( self.tempData[uid] ):
    				n=0
    				mean_temp=0
    				for temp in x[1:]:
    					mean_temp+=temp
    					n+=1
    
    				mean_temp= round( mean_temp/n, temp_precision )
    
    				self.tempData[uid][i] = [time,mean_temp]
    
    
    	def initAnimalData(self,file):
    		with open(file) as f:
    			lines = f.readlines()
    
    		lines = list(filter(lambda l: not l.split(A_DELIM)[A_COL_TAG].startswith("XXX"), lines)) #filter "XXX" tags
    		lines = list(filter(lambda l: l.split(A_DELIM)[A_COL_TAG] != "049E4C5EF2F44880CE", lines)) #filter thomas control tag
    		lines = [ l[:l.rindex(A_DELIM)+1] for l in lines ] #remove comments
    		lines = self.clean(lines, A_MINLEN, A_MAXLEN, A_NCOLS, A_DELIM)
    
    		#TODO: check for duplicate tags and pp warning / ignore?
    		assert (lines), "tags: Not valid. Check times / formatting."
    
    		self.animalData = lines
    
    	def clean(self, lines, minLen, maxLen, nCols, sep, timeFunc=None, delta=datetime.timedelta(minutes=0)):
    		""" remove trailing newline, empty, comments and all-digits and remove lines before startTime or after endTime"""
    		lines = [l.strip() for l in lines]
    		lines = [l for l in lines if minLen <= len(l) <= maxLen]
    		lines = list(filter(lambda q: q and q[0] != '#', lines))
    		lines = [l for l in lines if len(l.split(sep)) == nCols]
    		lines = [l.replace(sep,DELIM) for l in lines]
    		lines = [l.replace('\ufeff','') for l in lines] #cutout BOM (ByteOrderMark). Showed up in ecotrack temp-csv.
    
    		if timeFunc:
    			# global startTime, endTime
    			lines = list(filter(lambda q: startTime-delta <= timeFunc(q) < endTime+delta, lines)) #only between start and endTime
    
    		return lines
    
    	def parseLightLine(self, line, uid):
    		if not hasLight():
    			return
    
    		cols=line.split(DELIM)
    		moon_real = float(cols[L_COL_MOON_REAL])*1000 #convert to mLux
    		moon_dmx = int(cols[L_COL_MOON_DMX])
    		moon_eco = moonMap[moon_dmx]
    
    		#int list from string format like '[1, 2, 3, 4, 5]'
    		skyglowList = cols[L_COL_SKYGLOW:]
    		skyglowList[0] = skyglowList[0][1:] 	#remove '['
    		skyglowList[-1] = skyglowList[-1][:-1]	#remove ']'
    		skyglow = skyglowDict.get(int(skyglowList[uid-1]),'')
    
    		return (moon_real, moon_eco, skyglow)
    
    	def merge(self):
    		
    		self.mergeAnimals()
    		if hasLight():
    			self.mergeLight()
    		if hasTemp():
    			self.mergeTemp()
    
    	def mergeAnimals(self):
    		""" merge AnimalData into TrackData on TagID """
    		p("merging animal data into track data. This might take a while. Time to move around. Or grab a Coffee.")
    
    		#TODO SPEED UP!!! 
    
    		# ttags_not_found=[]
    		notFound=0
    		for idx, tLine in enumerate(self.trackData):
    			found=False
    			ttag = tLine.split(DELIM)[COL_TAG]
    			for aLine in self.animalData:
    				aLine = aLine.split(DELIM)
    				atag = aLine[A_COL_TAG]
    
    				if ttag == atag:
    					found=True
    					aLine = DELIM + aLine[A_COL_SPECIES_IND] + DELIM + aLine[A_COL_SPECIES] + DELIM + aLine[A_COL_WEIGHT_WO_TAG_MG]
    					self.trackData[idx] += aLine
    					break;
    
    			if not found:
    				self.trackData[idx] += f'{DELIM}NA{DELIM}NA{DELIM}NA'
    				notFound+=1
    				# ttags_not_found.append(ttag)
    
    		p(f"Didn't find Tags for {notFound} tracking events!")
    
    	def mergeLight(self):
    		"""merge LightData into TrackData on Time"""
    		p("merging light data into track data")
    
    		track_idx=0
    		light_idx=0
    		lightTime = self.startLight
    		trackTime = self.startTrack
    		
    		while trackTime < lightTime:
    			self.trackData[track_idx] += f'{DELIM}NA{DELIM}NA{DELIM}NA'
    			track_idx+=1
    			if track_idx >= len(self.trackData):
    				p("skyglow.log starts after track.log...")
    				return
    			trackTime = self.getTrackTime(self.trackData[track_idx])
    
    		p(f"start light merge @ {trackTime} -- idx {track_idx}")
    
    		for trackLine in self.trackData[track_idx:]:
    			trackTime = self.getTrackTime(trackLine)
    			
    			uid = int(trackLine.split(DELIM)[COL_UID])
    
    			while lightTime < trackTime and light_idx < len(self.lightData) - 1: # -> minute precision
    				light_idx+=1
    				lightTime = self.getLightTime(self.lightData[light_idx])
    
    			#add light cols
    			moon_real, moon_eco, skyglow = self.parseLightLine(self.lightData[light_idx],uid)
    			trackLine += DELIM + "{:.3f}".format(moon_real) + DELIM + "{:.3f}".format(moon_eco) + DELIM + str(skyglow)
    
    			self.trackData[track_idx] = trackLine
    			track_idx+=1
    
    	def mergeTemp(self):
    		"""merge tempData into TrackData on Time"""
    		p("merging temperature data into track data")
    
    		track_idx = 0
    		trackTime = self.startTrack
    		tids = [0 for _ in range(N_UNITS) ]
    
    		#fill NA's for tracking events starting before temperature
    		while trackTime < self.startTemp:
    			self.trackData[track_idx] += f'{DELIM}NA'
    			track_idx+=1
    			if track_idx >= len(self.trackData):
    				p("temperature starts after trackdata...")
    				return
    			trackTime = self.getTrackTime(self.trackData[track_idx])
    
    
    		p(f"start temperature merge @ {trackTime} -- idx {track_idx}")
    
    
    		for trackLine in self.trackData[track_idx:]:
    
    			cols = trackLine.split(DELIM)
    			uid = int(cols[COL_UID]) - 1
    			trackTime = self.getTrackTime(trackLine)
    
    			# get temp idx for temp within 2:30min. TempSensors record once every 5 minutes.
    			delta = datetime.timedelta(minutes=2, seconds=30)
    			i = tids[uid]
    			while i < len(self.tempData[uid]) and self.tempData[uid][i][0] < trackTime-delta:
    				# p(f"{self.tempData[uid][hid][i][0]} < {trackTime} - {delta} ?? {self.tempData[uid][hid][i][0] < trackTime-delta}")
    				i += 1
    			tids[uid]= i
    
    			tempTime, temp = self.tempData[uid][i]
    
    			# add temp col
    			trackLine += DELIM + f"%.{temp_precision}f"%temp #fill up to 2 digits after comma
    
    			self.trackData[track_idx] = trackLine
    			track_idx+=1
    
    	def getLightTime(self,line):
    		if not hasLight():
    			return
    
    		cols = line.strip().split(DELIM)
    		sTime = cols[0].strip()
    		if "Error" in sTime:
    			return noTime
    		
    		lightTime = dt.strptime(sTime,TIME_FMT)
    
    		#convert to UTC from local BerlinTZ and respect DST sun 25 october 3AM -> -1 hour = 2AM again
    		winterTime = dt.strptime("2020-10-25 03:00:00",TIME_FMT)
    		hours= ( 2 if lightTime < winterTime else 1 )
    
    		lightTime -= datetime.timedelta(hours=hours)	# -2h in summmer, -1h in winter
    		return lightTime
    
    
    	def getTrackTime(self,line):
    		""" convert timestamp string to datetime """
    		try:
    			timestamp=int(line[:TIMESTAMP_LEN])
    		except ValueError as e:
    			p("Caught:",e)
    			p("Presumably binary data in txt file... or some other corruption")
    			return noTime
    
    		return dt.fromtimestamp(timestamp)
    
    	def getTempTime(self,line):
    		cols = line.split(DELIM)
    		return dt.strptime(cols[C_COL_TIME],C_TIME_FMT)
    
    	def header(self):
    		h= f"# {PROJ} data\n" \
    		f"{RULER}\n" \
    		f"#       len(Track): {len(self.trackData)}\n" \
    		f"#       startTrack: {self.startTrack}\n" \
    		f"#         endTrack: {self.endTrack}\n" \
    		f"#      len(animal): {len(self.animalData)}\n"
    		if hasLight():
    			h += 	f"#       len(Light): {len(self.lightData)}\n" \
    					f"#       startLight: {self.startLight}\n" \
    					f"#         endLight: {self.endLight}\n"
    		if hasTemp():
    			h += 	f"# len(Temperature): {self.lenTemp}\n" \
    					f"#        startTemp: {self.startTemp}\n" \
    					f"#          endTemp: {self.endTemp}\n"
    		h += RULER + "\n"
    		h += f"#timestamp, ms, date, time, unit, habitat, x, y, tag, species, speciesnumber, weight_without_tag[mg]"
    		if hasLight():
    			h+=', moon_real[mLux], moon_eco[mLux], skyglow[Lux]'
    		if hasTemp():
    			h+=', temp[C°]'
    		h +="\n"
    		h += RULER + "\n"
    		return h
    
    	def write(self):
    		os.makedirs(OUTPUTDIR, exist_ok=True)
    		os.chdir(OUTPUTDIR)
    
    		p("delete old data")
    		cmd_delete_data=f"rm *-data-* tagInfo unkownTags"
    		os.system(cmd_delete_data)
    
    		p("write to " + OUT_FILE)
    		with open(OUT_FILE, "w") as f:
    			f.writelines(self.header())
    			self.trackData=map(lambda x:x+'\n', self.trackData)
    			f.writelines(self.trackData)
    
    		p("compress")
    		cmd_compress=f"tar -C {OUTPUTDIR} -zcvf {OUT_FILE}.tgz {OUT_FILE}" # ">/dev/null 2>/dev/null"
    		os.system(cmd_compress)
    
    def xtract():
    	for root, dirs, files in os.walk(INPUTDIR): #walk recursively
    		for file in files:
    			if file.endswith(".tgz") or file.endswith(".tar.gz"):
    				p(f"xtracting {file}")
    				p(f"")
    				cmd_xtract=f"tar -zxvf {INPUTDIR}{os.sep}{file} -C {INPUTDIR}"
    				os.system(cmd_xtract)
    		return
    
    def getFileList():
    
    	p("")
    	p( "Searching for files..." )
    	animalFile, skyglowFile, tempFile, trackFiles = '','','',[]
    	animalFiles=["tags","list-individuals.csv"]
    	skyglowFiles=["skyglow.log"]
    
    	for root, dirs, files in os.walk(INPUTDIR): #walk recursively
    		for file in files:
    			if file.endswith(".tgz") or file.endswith(".tar.gz"):
    				continue
    			elif "unit-" in file:
    				trackFiles.append(file)
    			elif file in animalFiles:
    				animalFile = INPUTDIR+os.sep+file
    				p(f"   found animalFile: {file}")
    			elif hasLight() and file in skyglowFiles:
    				skyglowFile = INPUTDIR+os.sep+file
    				p(f"   found logfile: {file}")
    			elif hasTemp() and "emp" in file :
    				tempFile = INPUTDIR+os.sep+file
    				p(f"   found tempFile: {file}")
    			else: 
    				p(f"   ignoring ({file})")
    
    	if hasTemp() and not tempFile:
    		sys.exit(f"Couldn't find temperature data (filename containing 'temp' or 'Temp')")
    	if not trackFiles:
    		sys.exit(f"No tracking data found. (filenames containing 'unit-X', where X is a number)")
    	if hasLight() and not skyglowFile:
    		sys.exit(f"Couldn't find {skyglowFiles}")
    	if not animalFile:
    		sys.exit(f"Couldn't find {animalFiles}")
    
    	return animalFile, skyglowFile, tempFile, trackFiles
    
    
    def make_rel_abs_path(path):
    	#! the same method exists in dataFilter and dataSync. Don't forget to update em both if making changes!
    	
    	global rootPath
    	rootPath = os.path.abspath( os.path.dirname( __file__ ) ) #uses cwd(), which is set to scriptdir at start
    
    	path = os.path.expanduser(path)
    	if not os.path.isabs(path):
    		path = os.path.abspath(rootPath+os.sep+path)
    	return os.path.normpath(path)
    
    def main():
    
    	initFileStructure()
    	# xtract()
    
    	data = Data()
    	data.merge()
    	data.write()
    
    	dataFilter.main(OUT_FILE)
    
    	if platform.system() == "Linux":
    		os.system(rootPath + "/getDates.sh " + INPUTDIR + " > " + OUTPUTDIR + "/dates")
    		os.system(rootPath + "/update-gitignore.sh")
    		os.system(rootPath + "/unknown-tags.sh " + OUTPUTDIR)
    
    def readConf():
    	global PROJ, OUTPUTDIR, INPUTDIR, startTime, endTime, blox
    
    	confFile = make_rel_abs_path(confFileName)
    	with open(confFile, 'r') as f:
    		content = json.load(f)
    	
    	conf = content["conf"]
    
    	known_projects=["ecotrack","ecolux","schrebatron"]
    	p(f"    Process raw data for which configuration?")
    	p(f"{RULER}")
    	for idx, c in enumerate(conf):
    		p(f"    {idx+1}) Project {c[0]}")
    		p(f"        from: {c[3]}")
    		p(f"          to: {c[4]}")
    		p(f"          in: {c[1]}")
    		p(f"         out: {c[2]}")
    		p(f"")
    	try:
    		n=int(input())-1
    		PROJ = conf[n][0].lower()
    		INPUTDIR = make_rel_abs_path( conf[n][1] )
    		OUTPUTDIR = make_rel_abs_path( conf[n][2] )
    		startTime = dt.strptime(f"{conf[n][3]} 00:00:00", TIME_FMT)
    		endTime = dt.strptime(f"{conf[n][4]} 00:00:00", TIME_FMT)
    
    		if PROJ not in known_projects:
    			raise Exception(f"Unknown Project '{PROJ}'")
    
    		blox = content["blox"][PROJ]
    		for b in blox:
    			b[1]=dt.strptime(f"{b[1]} 00:00:00",TIME_FMT)
    			b[2]=dt.strptime(f"{b[2]} 00:00:00",TIME_FMT)
    
    	except Exception as e:
    		p(f"Error: {e}")
    		p(f"")
    		p(f"You can add a configuration to {confFileName}")
    		p(f"It has following form:")
    		p(f"")
    		p(f"		['Project', 'inDir', 'outDir', 'start', 'end']")
    		p(f"")
    		p(f"  > Project must be in {known_projects}")
    		p(f"  > dirs can be relative. eg: ('./data','../data','~/data')")
    		p(f"  > times must have Format like '{dt.now().strftime('%Y-%m-%d')}'")
    		exit()
    
    
    if __name__ == "__main__" :
    
    	readConf()
    
    	start = time.time()
    	main()
    	end = time.time()
    	p(f"Took {end - start} seconds")