Posted By

dsanson on 09/10/09


Tagged

osx markdown pandoc textutil


Versions (?)

any2md


 / Published in: Bash
 

This script is just a wrapper around pandoc, textutil, and pdftohtml . It will try to convert html, tex, latex, rst, pdf, ps, odt, doc, docx, rtf, rtfd, wordml, and webarchive files to markdown, detecting file type based on extension.

  1. #!/bin/bash
  2. usage()
  3. {
  4. cat << EOF
  5. Usage: `basename $0` [options] file(s)
  6. cat file | `basename $0` [options]
  7.  
  8. OPTIONS:
  9. -h Show this message
  10. -t <title> Specify title of document
  11. -a <author> Specify author of document
  12. -d <date> Specify creation date of document
  13. NOTE:
  14. This script is sloppy when it comes to adding metadata.
  15. In particular, if you pass it a document that already has
  16. pandoc-style metadata, this will not check for that, and will
  17. simply prepend metadata to the original.
  18. EOF
  19. }
  20.  
  21. defined()
  22. {
  23. [ "${!1-X}" == "${!1-Y}" ]
  24. }
  25.  
  26. convertFile()
  27. {
  28. orig="$@"
  29. ext=${orig##*.}
  30. # odt, doc, docx, rtf, rtfd, wordml, webarchive
  31. if [[ "$ext" = "md" || "$ext" = "markdown" || "$ext" = "mdown" || "$ext" = "txt" ]]; then
  32. cat "$orig"
  33. elif [[ "$ext" = "html" || "$ext" = "htm" || "$ext" = "shtml" ]]; then
  34. html2markdown -s --no-wrap --reference-links "$orig"
  35. elif [[ "$ext" = "tex" || "$ext" = "latex" || "$ext" = "rst" ]]; then
  36. pandoc -s --no-wrap --reference-links -t markdown "$orig"
  37. elif [[ "$ext" = "pdf" || "$ext" = "ps" ]]; then
  38. pdftohtml -noframes -stdout "$orig" | html2markdown -s --no-wrap --reference-links
  39. elif [[ "$ext" = "odt" || "$ext" = "doc" || "$ext" = "$docx" || "$ext" = "rtf" || "$ext" = "rtfd" || "$ext" = "wordml" || "$ext" = "webarchive" ]]; then
  40. textutil -convert html -stdout "$orig" | html2markdown -s --no-wrap --reference-links
  41. else
  42. echo "unsupported file type" >&1
  43. fi
  44. }
  45.  
  46. # parse options
  47.  
  48. while getopts "ht:a:d:" option
  49. do
  50. case $option in
  51. h)
  52. usage
  53. exit 1
  54. ;;
  55. t)
  56. title=$OPTARG
  57. shift $((OPTIND-1)); OPTIND=1
  58. ;;
  59. d)
  60. date=$OPTARG
  61. shift $((OPTIND-1)); OPTIND=1
  62. ;;
  63. a)
  64. author=$OPTARG
  65. shift $((OPTIND-1)); OPTIND=1
  66. ;;
  67. ?)
  68. usage
  69. exit
  70. ;;
  71. esac
  72. done
  73.  
  74. # If no arguments, process STDIN
  75.  
  76. if [ $# -eq 0 ]; then
  77. files=`mktemp -t any2md`
  78. cat > $files
  79. else
  80. files="$@"
  81. fi
  82.  
  83. for file in "$files"; do
  84. if [ -f "$file" -a -r "$file" ]; then
  85. if defined "title" -o defined "author" -o defined "date"; then
  86. echo "% $title"
  87. echo "% $author"
  88. echo "% $date"
  89. echo
  90. fi
  91. convertFile "$file"
  92. else
  93. echo "$file is not a file or is unreadable; skipping..."
  94. fi
  95. done

Report this snippet  

You need to login to post a comment.