Posted By

iblis on 02/16/09


Tagged

ISBN web bibtex isbndb


Versions (?)

Who likes this?

1 person have marked this snippet as a favorite

adrpater


Convert ISBN file names to BibTeX records


 / Published in: Perl
 

  1. #!/usr/bin/env perl
  2.  
  3. use strict;
  4. use warnings;
  5.  
  6. use Encoding "utf8";
  7. use Text::BibTeX;
  8. use WebService::ISBNDB::API::Books;
  9. use Getopt::Long;
  10. use Pod::Usage;
  11.  
  12. my %options;
  13. GetOptions('usage|?' => \$options{usage},
  14. 'h|help' => \$options{help}
  15. );
  16. pod2usage(1) if $options{usage};
  17. pod2usage(-verbose => 2) if $options{help};
  18.  
  19. my $api_key = $ENV{ISBNDB_KEY} || 'TMDKWJSX';
  20.  
  21. my $dir = shift || '.';
  22. my $file = shift || '&STDOUT';
  23. my $bib = Text::BibTeX::File->new('>'.$file);
  24.  
  25.  
  26. opendir my $dh, $dir
  27. or die "Cannot open $dir: $!\n";
  28. my @files = grep { -f && m{/\d{9}[x|\d]\.pdf$}i }
  29. map {"$dir/$_"}
  30. readdir $dh;
  31.  
  32. foreach my $file (@files) {
  33.  
  34. # extract isbn from file name
  35. my $isbn = $file =~ /(\d{9}[x|\d])\.pdf$/i ? $1 : '0000000000' ;
  36. # check database for isbn number, loop if failed
  37. my $book = WebService::ISBNDB::API::Books->find( { api_key => $api_key, isbn => $isbn } );
  38. next unless $book;
  39.  
  40. # set new bibtex entry
  41. my $entry = new Text::BibTeX::Entry;
  42. $entry->set_metatype(BTE_REGULAR);
  43. $entry->set_type('book');
  44. $entry->set_key($isbn);
  45.  
  46. # set title field
  47. $entry->set( 'title', $book->get_longtitle || $book->get_title );
  48.  
  49. # set author or editor field
  50. my $authors = $book->get_authors_text;
  51. # some clean-up
  52. $authors =~ s/^by //;
  53. $authors =~ s/,$//;
  54. $authors =~ s/,\s+/ and /g;
  55. $authors =~ s/;\s+/ and /g;
  56. # authors or editors ?
  57. if ( $authors =~ /^\s*\[?edited by\s+\]?(.*)$/i ) {
  58. (my $editors = $1) =~ s/with/and/;
  59. $entry->set('editor', $editors);
  60. }
  61. elsif ( $authors =~ /\(Editor\)/i ) {
  62. $authors =~ s/\s*\(Editor\)//gi;
  63. }
  64. else {
  65. $entry->set('author', $authors);
  66. }
  67.  
  68. # parse publisher and edition fields for publisher and year data
  69. if ( $book->get_publisher_text =~ m/^(.*?),\s+c?(\d{4}).?$/ ) {
  70. $entry->set( 'publisher', $1 ) ;
  71. $entry->set( 'year', $2 );
  72.  
  73. }
  74. else {
  75. $entry->set( 'publisher', $book->get_publisher_text ) ;
  76. if ( $book->get_edition_info =~ m/(\d{4})/ ) {
  77. $entry->set( 'year', $1 );
  78. }
  79. }
  80.  
  81. # miscellaneous fields
  82. my $notes = $book->get_notes;
  83. $entry->set( 'notes', $notes ) if $notes ;
  84. my $abstract = $book->get_summary;
  85. $entry->set( 'abstract', $abstract ) if $abstract ;
  86.  
  87. $entry->set( 'local-url', $file);
  88.  
  89. $entry->write($bib);
  90.  
  91. # sleep 2;
  92. }
  93.  
  94. __END__
  95.  
  96.  
  97. =head1 NAME
  98.  
  99. isbn2bibtex.pl - Convert ISBN file names to BibTeX records
  100.  
  101. =head1 SYNOPSIS
  102.  
  103. isbn2bibtex.pl [-? | --help] | [directory] [outfile.bib]
  104.  
  105. =head1 DESCRIPTION
  106.  
  107. Scans a directory for PDF files whose name are ISBN-10 identifiers,
  108. fetches the corresponding book's data from isbndb.com, parses data
  109. fields to get rid of inconsistencies, and finally, outputs a bibtex
  110. file with all fields set accordingly.
  111.  
  112. -? print usage
  113. -h --help verbose help message
  114.  
  115. If no directory is given, scans the current directory. Outputs result
  116. to STDOUT, unless a second argument is given.
  117.  
  118. An API key is required to access isbndb.com services. You can either
  119. paste it in the source code or set the environment variable ISBNDB_KEY.
  120.  
  121. =head1 LICENSE
  122.  
  123. Free to use and modifiy, same terms as Perl itself.
  124.  
  125. =head1 AUTHOR
  126.  
  127. i-blis, I<i-blis yandex ru>.
  128.  
  129. =cut

Report this snippet  

You need to login to post a comment.