From c41444d14e6af3c189682ae717a349559f1cae70 Mon Sep 17 00:00:00 2001
From: Christian Krause <christian.krause@idiv.de>
Date: Mon, 23 Jul 2018 11:10:57 +0200
Subject: [PATCH] adds chunk input script

---
 README.md      | 11 +++++++++++
 chunk-input.sh | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 chunk-input.sh

diff --git a/README.md b/README.md
index 40f46fc..f0cfb34 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,12 @@
 # zamba predict job pipeline
+
+## chunk input
+
+Zamba does not scale out. We can, however, reduce the turnaround time by chunking the input videos and submitting one job per chunk.
+
+```bash
+bash chunk-input.sh \
+  /data/GROUP/videos \
+  /data/GROUP/videos-chunked-10 \
+  10
+```
diff --git a/chunk-input.sh b/chunk-input.sh
new file mode 100644
index 0000000..faf2b8d
--- /dev/null
+++ b/chunk-input.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+# -----------------------------------------------------------------------------
+# command line arguments
+# -----------------------------------------------------------------------------
+
+[[ $# -eq 3 ]] || {
+  echo "usage: $(basename "$0") input_dir output_dir chunk_size" >&2
+  exit 1
+}
+
+input_dir=$1
+output_dir=$2
+chunk_size=$3
+
+[[ -d $input_dir ]] || {
+  echo "$(basename "$0"): $input_dir does not exist" >&2
+  exit 1
+}
+
+[[ $chunk_size -gt 0 ]] || {
+  echo "$(basename "$0"): chunk size should be greater than 0" >&2
+  exit 1
+}
+
+# -----------------------------------------------------------------------------
+# create chunked dirs
+# -----------------------------------------------------------------------------
+
+current_chunk=1
+current_element=1
+
+find "$input_dir" -type f | while read -r file ; do
+  if [[ $current_element -eq 1 ]] ; then
+    chunk_dir="$output_dir/chunk-$current_chunk"
+    mkdir -p "$chunk_dir"
+  fi
+
+  ln -t "$chunk_dir" "$file"
+
+  if [[ $current_element -lt $chunk_size ]] ; then
+    current_element=$(( current_element + 1 ))
+  else
+    current_element=1
+    current_chunk=$(( current_chunk + 1 ))
+  fi
+done
-- 
GitLab