#!/bin/bash this=`basename $0` out1="tags_unknown" out2="tags_info" if (( $# < 1 )); then echo "Usage:" echo "1. $this dir" echo " write info about tags and detections to ${out2}" echo "" echo "2. $this dir list" echo " write list of unknown tags together with their occurrences to '${out1}'" echo "" echo "Looks for data-file ( {dir}/*-data-*-filtered )" exit fi dir=$1 COL_TAG=13 HEADER_SIZE=1 in=`ls ${dir}/*-data-*-filtered | head -1` LEN_W_HEADER=`wc -l "$in" | cut -d" " -f1` LEN=$(( $LEN_W_HEADER - $HEADER_SIZE)) function unknownTags() { unknown=`tail -${LEN} "$in"| cut -d',' -f"$COL_TAG"- | grep "NA" | cut -d',' -f1 | sort | uniq` for tag in $unknown; do # echo $tag n=`grep -c "$tag" $2` echo "$n : $tag" >> tmp done sort -r -t : -k 1 -g tmp rm tmp } function info() { SCALE=2 #floating-point precision echo "## Tags" tag_total=`tail -${LEN} "$in" | cut -d',' -f"$COL_TAG" | sort | uniq | wc -l` tag_known=`tail -${LEN} "$in" | cut -d',' -f"$COL_TAG"- | grep -v "NA" | cut -d',' -f1 | sort | uniq | wc -l` tag_unknown=`tail -${LEN} "$in" | cut -d',' -f"$COL_TAG"- | grep "NA" | cut -d',' -f1 | sort | uniq | wc -l` tag_known_percent=$(bc <<< "scale=$SCALE; $tag_known * 100 /$tag_total") tag_unknown_percent=$(bc <<< "scale=$SCALE; $tag_unknown * 100 / $tag_total" ) echo " total: $tag_total" echo " known: $tag_known (${tag_known_percent}%)" echo "unknown: $tag_unknown (${tag_unknown_percent}%)" echo "" echo "## Detections" detection_known=`tail -${LEN} "$in" | cut -d',' -f"$COL_TAG"- | grep -v "NA" | cut -d',' -f1 | wc -l` detection_unknown=`tail -${LEN} "$in" | cut -d',' -f"$COL_TAG"- | grep "NA" | cut -d',' -f1 | wc -l` detection_known_percent=$(bc <<< "scale=$SCALE; $detection_known * 100 / $LEN" ) detection_unknown_percent=$(bc <<< "scale=$SCALE; $detection_unknown * 100 / $LEN" ) echo " total: $LEN" echo " known: $detection_known (${detection_known_percent}%)" echo "unknown: $detection_unknown (${detection_unknown_percent}%)" } echo "write info about tags to ${out2}" info "$in" > "${dir}/${out2}" echo "print list of unknown tags along with the number of their occurrences to ${out1}" unknownTags "$1" "$in" > "${dir}/${out1}"