/ Published in: Bash
This script is just a wrapper around pandoc, textutil, and pdftohtml . It will try to convert html, tex, latex, rst, pdf, ps, odt, doc, docx, rtf, rtfd, wordml, and webarchive files to markdown, detecting file type based on extension.
Expand |
Embed | Plain Text
#!/bin/bash usage() { cat << EOF Usage: `basename $0` [options] file(s) cat file | `basename $0` [options] OPTIONS: -h Show this message -t <title> Specify title of document -a <author> Specify author of document -d <date> Specify creation date of document NOTE: This script is sloppy when it comes to adding metadata. In particular, if you pass it a document that already has pandoc-style metadata, this will not check for that, and will simply prepend metadata to the original. EOF } defined() { [ "${!1-X}" == "${!1-Y}" ] } convertFile() { orig="$@" ext=${orig##*.} # odt, doc, docx, rtf, rtfd, wordml, webarchive if [[ "$ext" = "md" || "$ext" = "markdown" || "$ext" = "mdown" || "$ext" = "txt" ]]; then cat "$orig" elif [[ "$ext" = "html" || "$ext" = "htm" || "$ext" = "shtml" ]]; then html2markdown -s --no-wrap --reference-links "$orig" elif [[ "$ext" = "tex" || "$ext" = "latex" || "$ext" = "rst" ]]; then pandoc -s --no-wrap --reference-links -t markdown "$orig" elif [[ "$ext" = "pdf" || "$ext" = "ps" ]]; then pdftohtml -noframes -stdout "$orig" | html2markdown -s --no-wrap --reference-links elif [[ "$ext" = "odt" || "$ext" = "doc" || "$ext" = "$docx" || "$ext" = "rtf" || "$ext" = "rtfd" || "$ext" = "wordml" || "$ext" = "webarchive" ]]; then textutil -convert html -stdout "$orig" | html2markdown -s --no-wrap --reference-links else echo "unsupported file type" >&1 fi } # parse options while getopts "ht:a:d:" option do case $option in h) usage exit 1 ;; t) title=$OPTARG shift $((OPTIND-1)); OPTIND=1 ;; d) date=$OPTARG shift $((OPTIND-1)); OPTIND=1 ;; a) author=$OPTARG shift $((OPTIND-1)); OPTIND=1 ;; ?) usage exit ;; esac done # If no arguments, process STDIN if [ $# -eq 0 ]; then files=`mktemp -t any2md` cat > $files else files="$@" fi for file in "$files"; do if [ -f "$file" -a -r "$file" ]; then if defined "title" -o defined "author" -o defined "date"; then echo "% $title" echo "% $author" echo "% $date" echo fi convertFile "$file" else echo "$file is not a file or is unreadable; skipping..." fi done
You need to login to post a comment.
