Posted By

dsanson on 09/10/09


Tagged

osx markdown pandoc textutil


Versions (?)

any2md


 / Published in: Bash
 

This script is just a wrapper around pandoc, textutil, and pdftohtml . It will try to convert html, tex, latex, rst, pdf, ps, odt, doc, docx, rtf, rtfd, wordml, and webarchive files to markdown, detecting file type based on extension.

  1. #!/bin/bash
  2. usage()
  3. {
  4. cat << EOF
  5. Usage: `basename $0` [options] file(s)
  6. cat file | `basename $0` [options]
  7.  
  8. OPTIONS:
  9. -h Show this message
  10. -t <title> Specify title of document
  11. -a <author> Specify author of document
  12. -d <date> Specify creation date of document
  13. NOTE:
  14. This script is sloppy when it comes to adding metadata.
  15. In particular, if you pass it a document that already has
  16. pandoc-style metadata, this will not check for that, and will
  17. simply prepend metadata to the original.
  18. EOF
  19. }
  20.  
  21. defined()
  22. {
  23. [ "${!1-X}" == "${!1-Y}" ]
  24. }
  25.  
  26. convertFile()
  27. {
  28. ext=${orig##*.}
  29. # odt, doc, docx, rtf, rtfd, wordml, webarchive
  30. if [[ "$ext" = "md" || "$ext" = "markdown" || "$ext" = "mdown" || "$ext" = "txt" ]]; then
  31. cat "$orig"
  32. elif [[ "$ext" = "html" || "$ext" = "htm" || "$ext" = "shtml" ]]; then
  33. html2markdown -s --no-wrap --reference-links "$orig"
  34. elif [[ "$ext" = "tex" || "$ext" = "latex" || "$ext" = "rst" ]]; then
  35. pandoc -s --no-wrap --reference-links -t markdown "$orig"
  36. elif [[ "$ext" = "pdf" || "$ext" = "ps" ]]; then
  37. pdftohtml -noframes -stdout "$orig" | html2markdown -s --no-wrap --reference-links
  38. elif [[ "$ext" = "odt" || "$ext" = "doc" || "$ext" = "$docx" || "$ext" = "rtf" || "$ext" = "rtfd" || "$ext" = "wordml" || "$ext" = "webarchive" ]]; then
  39. textutil -convert html -stdout "$orig" | html2markdown -s --no-wrap --reference-links
  40. else
  41. echo "unsupported file type" >&1
  42. fi
  43. }
  44.  
  45. # parse options
  46.  
  47. while getopts "ht:a:d:" option
  48. do
  49. case $option in
  50. h)
  51. usage
  52. exit 1
  53. ;;
  54. t)
  55. title=$OPTARG
  56. shift $((OPTIND-1)); OPTIND=1
  57. ;;
  58. d)
  59. date=$OPTARG
  60. shift $((OPTIND-1)); OPTIND=1
  61. ;;
  62. a)
  63. author=$OPTARG
  64. shift $((OPTIND-1)); OPTIND=1
  65. ;;
  66. ?)
  67. usage
  68. exit
  69. ;;
  70. esac
  71. done
  72.  
  73. # If no arguments, process STDIN
  74.  
  75. if [ $# -eq 0 ]; then
  76. files=`mktemp -t any2md`
  77. cat > $files
  78. else
  79. fi
  80.  
  81. for file in "$files"; do
  82. if [ -f "$file" -a -r "$file" ]; then
  83. if defined "title" -o defined "author" -o defined "date"; then
  84. echo "% $title"
  85. echo "% $author"
  86. echo "% $date"
  87. echo
  88. fi
  89. convertFile "$file"
  90. else
  91. echo "$file is not a file or is unreadable; skipping..."
  92. fi
  93. done

Report this snippet  

You need to login to post a comment.