#!/usr/bin/env bash

## RSS to PDF
## This is a tool which accepts a single RSS feed, and produces a number
## of article PDFs in the format [sectiontitle-hash.pdf]. Naming it this
## way has the added benefit of random sorting within each section.

## Usage:
##   cat input.rss | rsstopdf <output.pdf>

# Dump xml from stdin
rawxmlfile="$(mktemp)"
tmpdir="$(mktemp -d)"
cat > "$rawxmlfile"

# Various functions that are handy later
scriptdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
function tex_escape {
    sed 's|{|\{|g' \
    | sed 's|}|\}|g' \
    | sed 's|~|\\textasciitilde{}|g' \
    | sed 's|\\|\\textbackslash{}|g' \
    | sed 's|\^|\\textasciicircum{}|g' \
    | sed 's|\#|\\\#|g' \
    | sed 's|\$|\\\$|g' \
    | sed 's|%|\%|g' \
    | sed 's|&|\\&|g' \
    | sed 's|_|\_|g'
}

function tex_escape2 {
    pandoc --standalone -f html -t latex \
        | tail -n +50 | head -n -2 \
        | sed 's|\\rule{3in}|\\rule{1in}|g'
}

# Need to know date, category, title, author, and body
suffixes=("th" "st" "nd" "rd" "th" "th" "th" "th" "th" "th")
date="$(date '+%A, %B %d, %Y' | sed 's/ 0/ /g')"
dayindex="$(date "+%A, %B %d, %Y" | sed 's/ 0/ /g' | awk -F' ' '{print $3}' | tr -d ',' | tail -c 2)"
daysuffix=${suffixes[$dayindex]}
# Tuesday, October 2nd, 2018
datestring="$(echo "$date" | awk -F ',' "{print \$1 \",\" \$2 \"$daysuffix,\" \$3}")"

title_delim="|" # Because we use `cut`, keep this a single character

numberofitems="$(xpath sample.xml 'count(/rss/channel/item)')"
for (( i=1; $i<=$numberofitems; $((i++)) ))
do
    #echo $i
    node="/rss/channel/item[$i]"

    # Collect
    category="$(cat "$rawxmlfile" | xpath "$node/category/text()")"
    title="$(cat "$rawxmlfile" | xpath "$node/title/text()")"
    author="$(cat "$rawxmlfile" | xpath "$node/author/text()" | sed 's/.*(\(.*\))/\1/')"
    org="NPR"
    body="$(cat "$rawxmlfile" | xpath "$node/content:encoded/text()")"

    # Escape
    category="$(echo "$category" | tex_escape)"
    title="$(echo "$title" | tex_escape)"
    author="$(echo  "$author" | tex_escape)"
    org="$(echo  "$org" | tex_escape)"
    body="$(echo  "$body" | tex_escape)"

    # Replace the last word of the body with an \hbox and \textbullet
    # ...

    # Mix with tex template
    texout="$tmpdir/${category}${title_delim}${title}.tex"
    cat "$scriptdir/standard.tex.template" \
        | sed "s|~~category~~|$category|g" \
        | sed "s|~~title~~|$title|g" \
        | sed "s|~~date~~|$datestring|g" \
        | sed "s|~~author~~|$author|g" \
        | sed "s|~~body~~|$body|g" \
        | sed "s|~~org~~|$org|g" \
        > "$texout"

    pdflatex \
        -halt-on-error \
        -output-directory "$tmpdir" \
        "$texout"
done

# Create the bookmarks list--count the number of articles in each
# category, so we can build a nested list.
readarray categories < <(ls $tmpdir/*.pdf | awk -F'/' '{print $NF}' | awk '{print $NF}' | cut -d "$title_delim" -f 1 | sort | uniq)

unite_ordered_paths=()
page_count=1

for category in ${categories[@]}
do
    article_index=()
    readarray articles < <(find "$tmpdir" -name "${category}${title_delim}*.tex")

    echo "Post-read articles"
    echo "${category}${title_delim}"

    echo "### Articles:::"
    echo "$tmpdir" "${category}${title_delim}*.tex"
    find "$tmpdir" -name "${category}${title_delim}*.tex"
    printf '%s\n' "${articles[@]}"

    for article_path in "${articles[@]}"
    do
        unite_ordered_paths+=("$article_path")
        num_pages=$(pdfinfo "$article_path" \
            | grep Pages \
            | awk '{print $NF}')
        title=$(echo "$article_path" | awk -F'/' '{print $NF}' | cut -d "$title_delim" -f 2)
        echo "### title $title"

        article_index+=("[/Page $pagecount /Title ($title) /OUT pdfmark")
        pagecount+=$num_pages
    done

    echo "### This is the section marker"
    echo "[/Count ${#articles[@]} /Page $page_count /Title ($category) /OUT pdfmark"
done

# Process tex file into PDF; stick into output dir
pdfunite $tmpdir/*.pdf "/tmp/test.pdf"
#rm -rf "$tmpdir" "$rawxmlfile"
echo "$tmpdir"
# Take all the PDFs and organize them by category, then create the final
# PDF

# Echo the location of the final pdf