Posted By

dsanson on 09/10/09


Tagged

textmate markdown bibtex


Versions (?)

bibout


 / Published in: Ruby
 

  1. #!/usr/bin/ruby
  2.  
  3. # bibout 0.1 by David Sanson
  4. #
  5. # This is a ruby script that tries to parse bibtex files
  6. # and spit out either citekeys or markdown formatted
  7. # references. Try bibout -h for details.
  8. #
  9.  
  10. require 'optparse'
  11. require 'ostruct'
  12.  
  13. # Configure shortcuts to your favorite bibtex files
  14.  
  15. bibfiles = {}
  16. bibfiles['small'] = "/users/foobar/mysmallbib.bib"
  17. bibfiles['big'] = "/users/foobar/mybigbib.bib"
  18. bibfiles['default'] = bibfiles['small']
  19.  
  20. # Default Command Line Options
  21.  
  22. options = OpenStruct.new
  23. options.bibfile = bibfiles['default']
  24. options.url = false
  25. options.task = "biblio"
  26. options.bibliobullets = false
  27. options.minbib = false
  28. options.all = false
  29.  
  30. # Regexps for cleaning bibtex files
  31.  
  32. regexps = [/\sBdsk-.*?\},?\n?/m, /\sKeywords\s=.*?\},?\n?/m,
  33. /\sJstor_.*?=.*?\},?\n?/m, /\sLocal-Url\s=.*?\},?\n?/m,
  34. /\sDate-(Added|Modified)\s=.*?\},?\n?/m,
  35. /\sAnnote\s=.*?\},?\n?/m,
  36. /\sRead\s=.*?\},?\n?/m,
  37. /\sGluck\s=.*?\},?\n?/m,
  38. /\sItuneserror\s=.*?\},?\n?/m,
  39. /\sNot-Online\s=.*?\},?\n?/m,
  40. /\sNote\s=\s\{ArticleType.*?\},?\n?/m,
  41. ]
  42.  
  43.  
  44. # Titlecase class, from http://moose56.com/blog/2008/05/22/titlecase/
  45.  
  46. class TitleCase
  47.  
  48. @small_words = %w(a an and as at but by en for if in of on or the to v[.]? via vs[.]?);
  49. @small_words_r = /(#{@small_words.join("|")})\b/
  50. @inline_period = /[[:alpha:]|[:digit:]][.][[:alpha:]|[:digit:]]/
  51. @special_case = /AT&T|Q&A/i
  52. @uppercase_word = /[[:upper:]]{2,}/
  53.  
  54. # Static/Class method to do the work
  55. def TitleCase.parse(str)
  56.  
  57. result = []
  58.  
  59. # split on white space
  60. str.each("\s") do |word|
  61.  
  62. word.strip!
  63.  
  64. # do not downcase SEC etc.
  65. if @uppercase_word.match(word) then result << word; next end
  66.  
  67. word.downcase!
  68. # capitalise all but small_words_r and inline_period
  69. word.capitalize! unless @small_words_r.match(word) || @inline_period.match(word)
  70. # deal with special cases
  71. word.upcase! if @special_case.match(word)
  72.  
  73. result << word
  74. end
  75.  
  76. # capitalize first and last word
  77. result[0].capitalize! unless @inline_period.match(result[0])
  78. result[result.size-1].capitalize! unless @inline_period.match(result[result.size-1])
  79.  
  80. result.join(" ")
  81. end
  82.  
  83. end
  84.  
  85.  
  86. # Parse Command Line Options
  87.  
  88. OptionParser.new do |opts|
  89. opts.banner = "Usage: bibout [options] str1 str2 str3"
  90. opts.on("-u", "--url", "Include URL in Bibliographic Entries (implies -b)") do |u|
  91. options.url = true
  92. end
  93. opts.on("-b", "--bibliography", "Output Bibliographic Entries (Markdown)") do |b|
  94. options.task = "biblio"
  95. end
  96. opts.on("-l", "--bullets", "Format Bibliography Entries with Bullets") do |l|
  97. options.bibliobullets = true
  98. end
  99. opts.on("-c", "--cite", "Output Cite Keys") do |c|
  100. options.task = "cite"
  101. end
  102. opts.on("-t", "--bibtex", "Output BibTeX Entries") do |t|
  103. options.task = "bibtex"
  104. end
  105. opts.on("-f", "--bib BIBFILE",
  106. "Use BIBFILE") do |bib|
  107. options.bibfile = bib
  108. end
  109. opts.on("-m", "--minbib", "Output Minimal BibTex Entries (implies -t)") do |m|
  110. options.minbib = true
  111. end
  112. opts.on("-a", "--all", "Output all entries in BibTex file") do |a|
  113. options.all = true
  114. end
  115. end.parse!
  116.  
  117. if bibfiles[options.bibfile]
  118. bibfile = bibfiles[options.bibfile]
  119. else
  120. bibfile = options.bibfile
  121. end
  122. if options.minbib
  123. options.task = "bibtex"
  124. end
  125. if options.url
  126. options.task = "biblio"
  127. end
  128.  
  129. # Subroutines
  130.  
  131. ## get a list of bibtex entries given an array of cite keys
  132.  
  133. def extractbibtex(keys, bibfile, options, regexps)
  134. results = []
  135. File.open(bibfile) { |file|
  136. text = file.read
  137. entries = text.split("\n@")
  138. keys.each { |key|
  139. results = results + entries.grep(/^.*?\{#{key},/)
  140. }
  141. }
  142. cleaned_results = []
  143. results.each { |result|
  144. if options.minbib
  145. regexps.each { |regexp|
  146. result.gsub!(regexp, "")
  147. }
  148. end
  149. cleaned_results = cleaned_results + ["\@#{result}\n"]
  150. }
  151. return cleaned_results
  152. end
  153.  
  154. ## get a sorted list of cite keys that match an array of strings
  155.  
  156. def extractcitekeys(somestrings, bibfile, options)
  157.  
  158. keys = []
  159.  
  160. somestrings.each { |match|
  161. match.chomp
  162. File.open(bibfile) { |file|
  163. text = file.read
  164. keys = keys + text.scan(/@.*?\{(#{match}.*?),/i)
  165. }
  166. }
  167. keys.uniq!
  168. keys.sort!
  169. return keys
  170. end
  171.  
  172. ## get a bibliography given an array of cite keys
  173.  
  174. def extractbiblio(somekeys, bibfile, options)
  175. output =""
  176. somekeys.each { |match|
  177. author = ""
  178. editor = ""
  179. booktitle = ""
  180. title = ""
  181. journal = ""
  182. year = ""
  183. volume = ""
  184. number = ""
  185. url = ""
  186. type = "article"
  187. publisher = ""
  188. address = ""
  189. pages = ""
  190. chapter = ""
  191. crossref = ""
  192. File.open(bibfile) { |file|
  193. text = file.read
  194. entries = text.split("\n@")
  195. entries.each { |entry|
  196. if entry.match(/^.*?\{#{match},/)
  197. entry.each { |line|
  198. if line.match(/^(.*?)\{#{match},/)
  199. type = $1
  200. end
  201. if line.match(/Crossref\s*=\s*\{(.*?)\}/i)
  202. crossref = $1
  203. end
  204. if line.match(/Author\s*=\s*\{(.*?)\}/i)
  205. author = $1
  206. end
  207. if line.match(/Editor\s*=\s*\{(.*?)\}/i)
  208. editor = $1
  209. end
  210. if line.match(/Booktitle\s*=\s*\{(.*?)\}/i)
  211. booktitle = $1
  212. booktitle.gsub!(/^``?(.*?)''?$/, '\1')
  213. booktitle.gsub!(/``?(.*?)''?/, '"\1"')
  214. booktitle.gsub!(/\\emph\{(.*?)\}/, '*\1*')
  215. booktitle.gsub!(/\{(.*?)\}/, '\1')
  216. booktitle = TitleCase.parse(booktitle)
  217. end
  218. if line.match(/Title\s*=\s*\{(.*?)\},/i)
  219. title = $1
  220. title.gsub!(/^``?(.*?)''?$/, '\1')
  221. title.gsub!(/``?(.*?)''?/, '"\1"')
  222. title.gsub!(/\\emph\{(.*?)\}/, '*\1*')
  223. title.gsub!(/\{(.*?)\}/, '\1')
  224. title = TitleCase.parse(title)
  225.  
  226. end
  227. if line.match(/Journal\s*=\s*\{(.*?)\}/i)
  228. journal = $1
  229. journal = TitleCase.parse(journal)
  230. end
  231. if line.match(/Year\s*=\s*\{(.*?)\}/i)
  232. year = $1
  233. end
  234. if line.match(/Volume\s*=\s*\{(.*?)\}/i)
  235. volume = $1
  236. end
  237. if line.match(/Number\s*=\s*\{(.*?)\}/i)
  238. number = $1
  239. end
  240. if line.match(/Publisher\s*=\s*\{(.*?)\}/i)
  241. publisher = $1
  242. end
  243. if line.match(/Address\s*=\s*\{(.*?)\}/i)
  244. address = $1
  245. end
  246. if line.match(/Pages\s*=\s*\{(.*?)\}/)
  247. pages = $1
  248. end
  249. if line.match(/Chapter\s*=\s*\{(.*?)\}/)
  250. chapter = $1
  251. chapter.gsub!(/^``?(.*?)''?$/, '\1')
  252. chapter.gsub!(/``?(.*?)''?/, '"\1"')
  253. chapter.gsub!(/\\emph\{(.*?)\}/, '*\1*')
  254. chapter.gsub!(/\{(.*?)\}/, '\1')
  255. chapter = TitleCase.parse(chapter)
  256. end
  257. if line.match(/[^-]Url\s*=\s*\{(.*?)\}/i)
  258. url = $1
  259. end
  260. if url == ""
  261. if line.match(/Bdsk-Url.*\s*=\s*\{(.*?)\}/i)
  262. url = $1
  263. end
  264. end
  265. }
  266. if options.url && url != ""
  267. if title == ""
  268. title = match
  269. end
  270. title = "[#{title}](#{url})"
  271. end
  272. if options.bibliobullets
  273. output = output + "*\t"
  274. end
  275. if author != ""
  276. output = output + author
  277. elsif editor != ""
  278. output = output + editor
  279. else
  280. output = output + "Anonymous"
  281. end
  282. if year != ""
  283. output = output + " (#{year}). "
  284. else
  285. output = output + ", "
  286. end
  287. if type == "article"
  288. output = output + "\"" + title + ",\" *" + journal + "*"
  289. if volume != ""
  290. output = output + " #{volume}"
  291. end
  292. if number != ""
  293. output = output + ":#{number}"
  294. end
  295. if pages != ""
  296. output = output + " #{pages}"
  297. end
  298. output = output + "."
  299. elsif type == "book"
  300. output = output + "*#{title}*."
  301. if address != ""
  302. output = output + " #{address}: "
  303. end
  304. if publisher != ""
  305. output = output + publisher
  306. end
  307. output = output + "."
  308. elsif type == "incollection"
  309. output = output + "\"" + title + ",\" in "
  310. if editor != ""
  311. output = output + "#{editor}, "
  312. end
  313. output = output + "*#{booktitle}*."
  314. if address != ""
  315. output = output + " #{address}: "
  316. end
  317. if publisher != ""
  318. output = output + publisher
  319. end
  320. output = output + "."
  321. elsif type == "inbook"
  322. output = output + "\"" + chapter + ",\" in *#{title}*"
  323. if address != ""
  324. output = output + " #{address}: "
  325. end
  326. if publisher != ""
  327. output = output + publisher
  328. end
  329. output = output + "."
  330. else
  331. output = output + "\"" + title + ".\""
  332. end
  333. if crossref != ""
  334. output = output + " In #{crossref}."
  335. end
  336. output = output + "\n"
  337. end
  338. }
  339. }
  340. }
  341. return output
  342. end
  343.  
  344. # Read in additional arguments, from ARGV or STDIN
  345.  
  346. if options.all
  347. matches = [""]
  348. elsif ARGV[0]
  349. matches = ARGV
  350. else
  351. matches = STDIN.read.split("\n")
  352. end
  353.  
  354. # Process arguments, according to options.task
  355.  
  356. if options.task == "biblio"
  357. keys = extractcitekeys(matches, bibfile, options)
  358. entries = extractbiblio(keys, bibfile, options)
  359. puts entries
  360. end
  361. if options.task == "cite"
  362. keys = extractcitekeys(matches, bibfile, options)
  363. puts keys
  364. end
  365. if options.task == "bibtex"
  366. keys = extractcitekeys(matches, bibfile, options)
  367. entries = extractbibtex(keys, bibfile, options, regexps)
  368. puts entries
  369. end

Report this snippet  

You need to login to post a comment.