add import_oldnews.sh helper script
authorStefan Metzmacher <metze@samba.org>
Wed, 5 Aug 2015 11:06:22 +0000 (13:06 +0200)
committerStefan Metzmacher <metze@samba.org>
Wed, 5 Aug 2015 11:09:05 +0000 (13:09 +0200)
This can be used like this:

./import_oldnews.sh generated_news/latest_10_headlines.html generated_news/latest_10_bodies.html

Signed-off-by: Stefan Metzmacher <metze@samba.org>
import_oldnews.sh [new file with mode: 0755]

diff --git a/import_oldnews.sh b/import_oldnews.sh
new file mode 100755 (executable)
index 0000000..f6ab6bc
--- /dev/null
@@ -0,0 +1,61 @@
+#!/bin/sh
+#
+
+LC_ALL=C
+export LC_ALL
+LANG=C
+export LANG
+LANGUAGE=C
+export LANGUAGE
+
+set -u
+set -e
+umask 0022
+
+HEADLINES=${1}
+BODIES=${2}
+
+LINES="$(cat ${HEADLINES} | grep '<li>')"
+NLINES=$(echo "${LINES}" | wc -l)
+
+BODYH5="$(grep --line-number '<h5>' ${BODIES})"
+
+for i in $(seq 1 $NLINES); do
+       l=$(echo "${LINES}" | head -${i} | tail -1 | sed -e 's!^[\t ]*!!')
+       d=$(echo "${l}" | sed -e 's!^<li>\([^<]*\)<a href=.*!\1!')
+       n=$(echo "${l}" | sed -e 's!.*a href="#\([^"]*\)".*!\1!')
+       utctime=$(date --date="${d}" --utc +"%Y%m%d-%H%M%S")
+       headlinefile="posted_news/${utctime}.${n}.headline.html"
+       bodyfile="posted_news/${utctime}.${n}.body.html"
+       echo "${i}: ${headlinefile}"
+       {
+               echo "<!-- BEGIN: ${headlinefile} -->"
+               echo "${l}"
+               echo "<!-- END: ${headlinefile} -->"
+       } > ${headlinefile}
+
+       boundary=$(echo "${BODYH5}" | grep -A1 "<h5><a name=\"${n}\">" | cut -d ':' -f1)
+       tmp=$(echo "${boundary}" | wc -l)
+       case "${tmp}" in
+       2)
+               begin=$(echo "${boundary}" | head -1)
+               next=$(echo "${boundary}" | tail -1)
+               end=$(expr ${next} - 1)
+               len=$(expr ${end} - ${begin})
+               ;;
+       1)
+               len="10000"
+               ;;
+       *)
+               echo "invalid boundary:"
+               echo "${boundary}"
+               exit 1
+               ;;
+       esac
+       echo "${i}: ${bodyfile}"
+       {
+               echo "<!-- BEGIN: ${bodyfile} -->"
+               grep -A${len} "<h5><a name=\"${n}\">" ${BODIES}
+               echo "<!-- END: ${bodyfile} -->"
+       } > ${bodyfile}
+done