/ Published in: Ruby
Expand |
Embed | Plain Text
#!/usr/bin/ruby # bibout 0.1 by David Sanson # # This is a ruby script that tries to parse bibtex files # and spit out either citekeys or markdown formatted # references. Try bibout -h for details. # require 'optparse' require 'ostruct' # Configure shortcuts to your favorite bibtex files bibfiles = {} bibfiles['small'] = "/users/foobar/mysmallbib.bib" bibfiles['big'] = "/users/foobar/mybigbib.bib" bibfiles['default'] = bibfiles['small'] # Default Command Line Options options = OpenStruct.new options.bibfile = bibfiles['default'] options.url = false options.task = "biblio" options.bibliobullets = false options.minbib = false options.all = false # Regexps for cleaning bibtex files regexps = [/\sBdsk-.*?\},?\n?/m, /\sKeywords\s=.*?\},?\n?/m, /\sJstor_.*?=.*?\},?\n?/m, /\sLocal-Url\s=.*?\},?\n?/m, /\sDate-(Added|Modified)\s=.*?\},?\n?/m, /\sAnnote\s=.*?\},?\n?/m, /\sRead\s=.*?\},?\n?/m, /\sGluck\s=.*?\},?\n?/m, /\sItuneserror\s=.*?\},?\n?/m, /\sNot-Online\s=.*?\},?\n?/m, /\sNote\s=\s\{ArticleType.*?\},?\n?/m, ] # Titlecase class, from http://moose56.com/blog/2008/05/22/titlecase/ class TitleCase @small_words = %w(a an and as at but by en for if in of on or the to v[.]? via vs[.]?); @small_words_r = /(#{@small_words.join("|")})\b/ @inline_period = /[[:alpha:]|[:digit:]][.][[:alpha:]|[:digit:]]/ @special_case = /AT&T|Q&A/i @uppercase_word = /[[:upper:]]{2,}/ # Static/Class method to do the work def TitleCase.parse(str) result = [] # split on white space str.each("\s") do |word| word.strip! # do not downcase SEC etc. if @uppercase_word.match(word) then result << word; next end word.downcase! # capitalise all but small_words_r and inline_period word.capitalize! unless @small_words_r.match(word) || @inline_period.match(word) # deal with special cases word.upcase! if @special_case.match(word) result << word end # capitalize first and last word result[0].capitalize! unless @inline_period.match(result[0]) result[result.size-1].capitalize! unless @inline_period.match(result[result.size-1]) result.join(" ") end end # Parse Command Line Options OptionParser.new do |opts| opts.banner = "Usage: bibout [options] str1 str2 str3" opts.on("-u", "--url", "Include URL in Bibliographic Entries (implies -b)") do |u| options.url = true end opts.on("-b", "--bibliography", "Output Bibliographic Entries (Markdown)") do |b| options.task = "biblio" end opts.on("-l", "--bullets", "Format Bibliography Entries with Bullets") do |l| options.bibliobullets = true end opts.on("-c", "--cite", "Output Cite Keys") do |c| options.task = "cite" end opts.on("-t", "--bibtex", "Output BibTeX Entries") do |t| options.task = "bibtex" end opts.on("-f", "--bib BIBFILE", "Use BIBFILE") do |bib| options.bibfile = bib end opts.on("-m", "--minbib", "Output Minimal BibTex Entries (implies -t)") do |m| options.minbib = true end opts.on("-a", "--all", "Output all entries in BibTex file") do |a| options.all = true end end.parse! if bibfiles[options.bibfile] bibfile = bibfiles[options.bibfile] else bibfile = options.bibfile end if options.minbib options.task = "bibtex" end if options.url options.task = "biblio" end # Subroutines ## get a list of bibtex entries given an array of cite keys def extractbibtex(keys, bibfile, options, regexps) results = [] File.open(bibfile) { |file| text = file.read entries = text.split("\n@") keys.each { |key| results = results + entries.grep(/^.*?\{#{key},/) } } cleaned_results = [] results.each { |result| if options.minbib regexps.each { |regexp| result.gsub!(regexp, "") } end cleaned_results = cleaned_results + ["\@#{result}\n"] } return cleaned_results end ## get a sorted list of cite keys that match an array of strings def extractcitekeys(somestrings, bibfile, options) keys = [] somestrings.each { |match| match.chomp File.open(bibfile) { |file| text = file.read keys = keys + text.scan(/@.*?\{(#{match}.*?),/i) } } keys.uniq! keys.sort! return keys end ## get a bibliography given an array of cite keys def extractbiblio(somekeys, bibfile, options) output ="" somekeys.each { |match| author = "" editor = "" booktitle = "" title = "" journal = "" year = "" volume = "" number = "" url = "" type = "article" publisher = "" address = "" pages = "" chapter = "" crossref = "" File.open(bibfile) { |file| text = file.read entries = text.split("\n@") entries.each { |entry| if entry.match(/^.*?\{#{match},/) entry.each { |line| if line.match(/^(.*?)\{#{match},/) type = $1 end if line.match(/Crossref\s*=\s*\{(.*?)\}/i) crossref = $1 end if line.match(/Author\s*=\s*\{(.*?)\}/i) author = $1 end if line.match(/Editor\s*=\s*\{(.*?)\}/i) editor = $1 end if line.match(/Booktitle\s*=\s*\{(.*?)\}/i) booktitle = $1 booktitle.gsub!(/^``?(.*?)''?$/, '\1') booktitle.gsub!(/``?(.*?)''?/, '"\1"') booktitle.gsub!(/\\emph\{(.*?)\}/, '*\1*') booktitle.gsub!(/\{(.*?)\}/, '\1') booktitle = TitleCase.parse(booktitle) end if line.match(/Title\s*=\s*\{(.*?)\},/i) title = $1 title.gsub!(/^``?(.*?)''?$/, '\1') title.gsub!(/``?(.*?)''?/, '"\1"') title.gsub!(/\\emph\{(.*?)\}/, '*\1*') title.gsub!(/\{(.*?)\}/, '\1') title = TitleCase.parse(title) end if line.match(/Journal\s*=\s*\{(.*?)\}/i) journal = $1 journal = TitleCase.parse(journal) end if line.match(/Year\s*=\s*\{(.*?)\}/i) year = $1 end if line.match(/Volume\s*=\s*\{(.*?)\}/i) volume = $1 end if line.match(/Number\s*=\s*\{(.*?)\}/i) number = $1 end if line.match(/Publisher\s*=\s*\{(.*?)\}/i) publisher = $1 end if line.match(/Address\s*=\s*\{(.*?)\}/i) address = $1 end if line.match(/Pages\s*=\s*\{(.*?)\}/) pages = $1 end if line.match(/Chapter\s*=\s*\{(.*?)\}/) chapter = $1 chapter.gsub!(/^``?(.*?)''?$/, '\1') chapter.gsub!(/``?(.*?)''?/, '"\1"') chapter.gsub!(/\\emph\{(.*?)\}/, '*\1*') chapter.gsub!(/\{(.*?)\}/, '\1') chapter = TitleCase.parse(chapter) end if line.match(/[^-]Url\s*=\s*\{(.*?)\}/i) url = $1 end if url == "" if line.match(/Bdsk-Url.*\s*=\s*\{(.*?)\}/i) url = $1 end end } if options.url && url != "" if title == "" title = match end title = "[#{title}](#{url})" end if options.bibliobullets output = output + "*\t" end if author != "" output = output + author elsif editor != "" output = output + editor else output = output + "Anonymous" end if year != "" output = output + " (#{year}). " else output = output + ", " end if type == "article" output = output + "\"" + title + ",\" *" + journal + "*" if volume != "" output = output + " #{volume}" end if number != "" output = output + ":#{number}" end if pages != "" output = output + " #{pages}" end output = output + "." elsif type == "book" output = output + "*#{title}*." if address != "" output = output + " #{address}: " end if publisher != "" output = output + publisher end output = output + "." elsif type == "incollection" output = output + "\"" + title + ",\" in " if editor != "" output = output + "#{editor}, " end output = output + "*#{booktitle}*." if address != "" output = output + " #{address}: " end if publisher != "" output = output + publisher end output = output + "." elsif type == "inbook" output = output + "\"" + chapter + ",\" in *#{title}*" if address != "" output = output + " #{address}: " end if publisher != "" output = output + publisher end output = output + "." else output = output + "\"" + title + ".\"" end if crossref != "" output = output + " In #{crossref}." end output = output + "\n" end } } } return output end # Read in additional arguments, from ARGV or STDIN if options.all matches = [""] elsif ARGV[0] matches = ARGV else matches = STDIN.read.split("\n") end # Process arguments, according to options.task if options.task == "biblio" keys = extractcitekeys(matches, bibfile, options) entries = extractbiblio(keys, bibfile, options) puts entries end if options.task == "cite" keys = extractcitekeys(matches, bibfile, options) puts keys end if options.task == "bibtex" keys = extractcitekeys(matches, bibfile, options) entries = extractbibtex(keys, bibfile, options, regexps) puts entries end
You need to login to post a comment.
