From c41444d14e6af3c189682ae717a349559f1cae70 Mon Sep 17 00:00:00 2001 From: Christian Krause <christian.krause@idiv.de> Date: Mon, 23 Jul 2018 11:10:57 +0200 Subject: [PATCH] adds chunk input script --- README.md | 11 +++++++++++ chunk-input.sh | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 chunk-input.sh diff --git a/README.md b/README.md index 40f46fc..f0cfb34 100644 --- a/README.md +++ b/README.md @@ -1 +1,12 @@ # zamba predict job pipeline + +## chunk input + +Zamba does not scale out. We can, however, reduce the turnaround time by chunking the input videos and submitting one job per chunk. + +```bash +bash chunk-input.sh \ + /data/GROUP/videos \ + /data/GROUP/videos-chunked-10 \ + 10 +``` diff --git a/chunk-input.sh b/chunk-input.sh new file mode 100644 index 0000000..faf2b8d --- /dev/null +++ b/chunk-input.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# ----------------------------------------------------------------------------- +# command line arguments +# ----------------------------------------------------------------------------- + +[[ $# -eq 3 ]] || { + echo "usage: $(basename "$0") input_dir output_dir chunk_size" >&2 + exit 1 +} + +input_dir=$1 +output_dir=$2 +chunk_size=$3 + +[[ -d $input_dir ]] || { + echo "$(basename "$0"): $input_dir does not exist" >&2 + exit 1 +} + +[[ $chunk_size -gt 0 ]] || { + echo "$(basename "$0"): chunk size should be greater than 0" >&2 + exit 1 +} + +# ----------------------------------------------------------------------------- +# create chunked dirs +# ----------------------------------------------------------------------------- + +current_chunk=1 +current_element=1 + +find "$input_dir" -type f | while read -r file ; do + if [[ $current_element -eq 1 ]] ; then + chunk_dir="$output_dir/chunk-$current_chunk" + mkdir -p "$chunk_dir" + fi + + ln -t "$chunk_dir" "$file" + + if [[ $current_element -lt $chunk_size ]] ; then + current_element=$(( current_element + 1 )) + else + current_element=1 + current_chunk=$(( current_chunk + 1 )) + fi +done -- GitLab