diff --git a/dataFilter.py b/dataFilter.py index 713e860997f2a79ccfe54fdf97af74667ecc0f97..1d3767bbf60cfe87355282d2aeb420ebf8b34c16 100755 --- a/dataFilter.py +++ b/dataFilter.py @@ -128,7 +128,7 @@ def add_cols( data ): for b in blox: if b[1] <= now < b[2]: block=b[0] - continue + break if tag not in known: lastdetect = "NA" @@ -176,7 +176,6 @@ def add_cols( data ): return data - def format(data): "sort and human-readable times" print("format") diff --git a/dataSync.py b/dataSync.py index 0cc6364a1b504066bc6f27fc9b9c20be207ee846..9ed598344a53fd68d95eb0443d875afa9d3501ed 100755 --- a/dataSync.py +++ b/dataSync.py @@ -25,6 +25,7 @@ # 2. rewrite animalmerge for loops # 3. Consider using Pandas, NumPy, Cython, Pypy, Koalas, inline C++/C/Bash, sql (SQLAlchemy) #Q: wieviel track-events wurden von clean rausgefiltert? + import sys, getopt, os, re, string, time, platform, json, datetime from pprint import pp as p from datetime import datetime as dt @@ -36,8 +37,29 @@ TAG_LEN=len("04B94A7F7288588022") RULER=f"{'#'*100}" N_UNITS=12 confFileName="./conf.json" -min_temp=14 -temp_precision=2 + +# month, sunrise, sunset +# sa: https://www.laenderdaten.info/Europa/Deutschland/sonnenuntergang.php +suntimes = [ + ( time(8,7),time(16,24) ), + ( time(7,20),time(17,20) ), + ( time(6,18),time(18,12) ), + ( time(6,5),time(20,06) ), + ( time(5,7),time(20,57) ), + ( time(4,40),time(21,33) ), + ( time(5,0),time(21,24) ), + ( time(5,47),time(20,34) ), + ( time(6,39),time(19,23) ), + ( ,time(7,30),time(18,13) ), + ( ,time(7,26),time(16,15) ), + ( ,time(8,8),time(15,54) ), +] + +def isDay( cur ): + # expects datetime obj + rise = suntimes( cur.month-1 )[1] + set = suntimes( cur.month-1 )[2] + return rise <= cur.time() < set def initFileStructure(): @@ -53,7 +75,7 @@ def initFileStructure(): #t_col_signalstrength=5 if PROJ == "ecolux": #track+light T_MINLEN = 38 # without newline. - T_MAXLEN = 40 # ms are variable in length (0-999). Will be padded later for uniform len + T_MAXLEN = 40 # ms are variable in len (0-999). Will be padded later for uniform len T_NCOLS = 6 elif PROJ == "ecotrack": #track+temp 2019 T_MINLEN = 36 @@ -76,6 +98,7 @@ def initFileStructure(): if hasTemp(): ## tempFile (C for Celsius) global C_TIME_FMT, C_COL_TIME, C_COL_UNIT, C_COL_HABITAT, C_COL_TEMP, C_MIN_LEN, C_MAX_LEN, C_DELIM, C_NCOLS + global min_temp, temp_precision C_TIME_FMT = "%Y-%m-%d %H:%M:%S.%f" C_COL_TIME = 0 C_COL_UNIT = 1 @@ -85,6 +108,8 @@ def initFileStructure(): C_MAX_LEN = 40 C_DELIM = ';' C_NCOLS = 4 + min_temp=14 + temp_precision=0 ## animalFile global A_DELIM, A_MINLEN, A_MAXLEN, A_NCOLS, A_COL_SPECIES, A_COL_SPECIES_IND, A_COL_WEIGHT_WO_TAG_MG, A_COL_TAG @@ -210,6 +235,15 @@ def getUID(fileName): uid=int(f[int(idx)]) return 1 if uid == 13 else uid +def calc_mean(ds, precison=temp_precision): + return round( sum(ds)/len(ds) , precison) + +def getBlock( time ): + for b in blox: + if b[1] <= time < b[2]: + return b + return "NA" + class Data: def __init__(self): @@ -335,49 +369,83 @@ class Data: def initTempData(self,file): """ get temp data and save in a list of list of tuples >> tempData[unit][habitat] returns (time,temp)""" p(f"processing: {file.split(os.sep)[-1]}") + with open(file) as f: + #Note: add timedelta-margin so trackdata can be merged correctly at the edges lines = f.readlines() - - #Note: add timedelta-margin so trackdata can be merged correctly at the edges - lines = self.clean(lines, C_MIN_LEN, C_MAX_LEN, C_NCOLS, C_DELIM, self.getTempTime, datetime.timedelta(minutes=30)) - lines.sort() + lines = self.clean(lines, C_MIN_LEN, C_MAX_LEN, C_NCOLS, C_DELIM, self.getTempTime, datetime.timedelta(minutes=30)) + lines.sort() self.startTemp = self.getTempTime( lines[0] ) self.endTemp = self.getTempTime( lines[-1] ) self.lenTemp = len(lines) + self.tempData = [[] for _ in range(N_UNITS) ] #list of empty lists + meanTemps = [[{} for _ in range(N_UNITS)] for _ in range(2)] #list containing 2 lists of empty dicts + # > will contain night/day mean temps for blox for units - #list of empty lists - self.tempData = [[] for _ in range(N_UNITS) ] for l in lines: - cols = l.split(DELIM) + cols = l.split(DELIM) uid = int( cols[C_COL_UNIT] ) - 1 - time = dt.strptime( cols[C_COL_TIME],C_TIME_FMT ) - temp = float( cols[C_COL_TEMP] ) + cur_time = dt.strptime( cols[C_COL_TIME],C_TIME_FMT ) + cur_temp = float( cols[C_COL_TEMP] ) + if temp < min_temp: + # ignore unreasonable low sensor values (eg. negative values) continue - if self.tempData[uid]: - lastTime = self.tempData[uid][-1][0] - if time == lastTime: - self.tempData[uid][-1].append( temp ) + try: + prev_time = self.tempData[uid][-1][0] + + if cur_time == prev_time: + # aggregate sensor values for same time + self.tempData[uid][-1].append( cur_temp ) + else: - self.tempData[uid].append( [time,temp] ) - else: - self.tempData[uid].append( [time,temp] ) + # calc mean for sensor-values for prev_time + # (mostly 4 sensor-values per time) + mean = calc_mean(self.tempData[uid][-1][1:]) + self.tempData[uid][-1][1:] = [mean] # replace list with mean + + # add to temp means + # TODO min/max pro unit pro block + block = getBlock( prev_time ) + day = isDay(prev_time) + try: + meanTemps[day][uid][block].append( mean ) + + except KeyError as new_block: + meanTemps[day][uid][block] = mean + + raise IndexError #add_new + + except IndexError as add_new: - #calc mean for all 4 sensors + self.tempData[uid].append( [cur_time,cur_temp] ) + + #TODO: check last lines and calc mean. edge case! + #testexcept. other exceptions? other indice errors then the one intended? + + for day in [0,1]: + for uid in range(N_UNITS): + for block, temps in self.meanTemps[day][uid].items(): + #Assert: temps is a list of floats + mean = calc_mean(temps) + self.meanTemps[day][uid][block] = mean #replace list with mean + + print("meanTemps: "+meanTemps) + input() + + #constructed day-and-night means. now add them to tempData for uid in range(N_UNITS): - for i,x in enumerate( self.tempData[uid] ): - n=0 - mean_temp=0 - for temp in x[1:]: - mean_temp+=temp - n+=1 + for lst in self.tempData[uid] + time = lst[0] + block = getBlock(time) - mean_temp= round( mean_temp/n, temp_precision ) + mean_night_temp = self.meanTemps[0][uid][block] + mean_day_temp = self.meanTemps[1][uid][block] - self.tempData[uid][i] = [time,mean_temp] + self.tempData[uid].extend( [mean_night_temp, mean_day_temp] ) def initAnimalData(self,file): @@ -502,7 +570,7 @@ class Data: track_idx = 0 trackTime = self.startTrack - tids = [0 for _ in range(N_UNITS) ] + temp_indices = [0 for _ in range(N_UNITS) ] #fill NA's for tracking events starting before temperature while trackTime < self.startTemp: @@ -523,18 +591,21 @@ class Data: uid = int(cols[COL_UID]) - 1 trackTime = self.getTrackTime(trackLine) - # get temp idx for temp within 2:30min. TempSensors record once every 5 minutes. - delta = datetime.timedelta(minutes=2, seconds=30) - i = tids[uid] - while i < len(self.tempData[uid]) and self.tempData[uid][i][0] < trackTime-delta: - # p(f"{self.tempData[uid][hid][i][0]} < {trackTime} - {delta} ?? {self.tempData[uid][hid][i][0] < trackTime-delta}") - i += 1 - tids[uid]= i + # update temp index + # Note: temp-sensors record once per minute! Get temp idx for nearest reading + delta = datetime.timedelta(seconds=30) + while temp_indices[uid] < len(self.tempData[uid]) and self.tempData[uid][temp_indices[uid]][0] <= trackTime-delta: + temp_indices[uid] += 1 - tempTime, temp = self.tempData[uid][i] + temp = self.tempData[uid][temp_indices[uid]][1] # add temp col - trackLine += DELIM + f"%.{temp_precision}f"%temp #fill up to 2 digits after comma + trackLine += DELIM + f"%.{temp_precision}f"%temp #fill up to temp_precision digits after comma + + # TODO + # for unit-block + # add min/max + # add mean-day/night self.trackData[track_idx] = trackLine track_idx+=1