Return to Snippet

Revision: 11730
at November 11, 2010 02:29 by iblis


Updated Code
#!/usr/bin/env perl

use strict; 
use warnings;

use Encoding "utf8";
use Text::BibTeX;
use WebService::ISBNDB::API::Books;
use Getopt::Long;
use Pod::Usage;

my %options;
GetOptions('usage|?'  => \$options{usage},
           'h|help' 	=> \$options{help}
          );
pod2usage(1) if $options{usage};
pod2usage(-verbose => 2) if $options{help};

my $api_key = $ENV{ISBNDB_KEY} || 'TMDKWJSX';

my $dir = shift || '.';
my $file = shift || '&STDOUT';
my $bib = Text::BibTeX::File->new('>'.$file); 


opendir my $dh, $dir
	or die "Cannot open $dir: $!\n";
my @files =  grep { -f && m{/\d{9}[x|\d]\.pdf$}i } 
		map {"$dir/$_"} 
			readdir $dh;

foreach my $file (@files) {

	# extract isbn from file name
	my $isbn = $file =~ /(\d{9}[x|\d])\.pdf$/i ? $1 : '0000000000' ; 
	# check database for isbn number, loop if failed
	my $book = WebService::ISBNDB::API::Books->find( { api_key => $api_key, isbn => $isbn } );
  next unless $book;

	# set new bibtex entry
	my $entry = new Text::BibTeX::Entry;
	$entry->set_metatype(BTE_REGULAR);
	$entry->set_type('book');
	$entry->set_key($isbn);

	# set title field
	$entry->set( 'title', $book->get_longtitle || $book->get_title );

	# set author or editor field
	my $authors = $book->get_authors_text;
	# some clean-up
	$authors =~ s/^by //;
	$authors =~ s/,$//;
	$authors =~ s/,\s+/ and /g;
	$authors =~ s/;\s+/ and /g;
	# authors or editors ?	
	if ( $authors =~ /^\s*\[?edited by\s+\]?(.*)$/i ) {
		(my $editors = $1) =~ s/with/and/;
		$entry->set('editor', $editors);
	}
	elsif ( $authors =~ /\(Editor\)/i ) {
		$authors =~ s/\s*\(Editor\)//gi;
	}
	else {
		$entry->set('author', $authors);
	}

	# parse publisher and edition fields for publisher and year data
	if ( $book->get_publisher_text =~ m/^(.*?),\s+c?(\d{4}).?$/ ) {
		$entry->set( 'publisher', $1 ) ;
		$entry->set( 'year', $2 );
		
	}
	else {
		$entry->set( 'publisher', $book->get_publisher_text ) ;
		if ( $book->get_edition_info =~ m/(\d{4})/ ) {
			$entry->set( 'year', $1 );
		}		
	}
	
	# miscellaneous fields
	my $notes = $book->get_notes;
	$entry->set( 'notes', $notes ) if $notes ;
	my $abstract = $book->get_summary;
	$entry->set( 'abstract', $abstract ) if $abstract ;
	
	$entry->set( 'local-url', $file);
	
	$entry->write($bib);
	
	# sleep 2;
}

__END__


=head1 NAME

isbn2bibtex.pl - Convert ISBN file names to BibTeX records 

=head1 SYNOPSIS

isbn2bibtex.pl [-? | --help] | [directory] [outfile.bib]

=head1 DESCRIPTION

Scans a directory for PDF files whose name are ISBN-10 identifiers,
fetches the corresponding book's data from isbndb.com, parses data
fields to get rid of inconsistencies, and finally, outputs a bibtex 
file with all fields set accordingly.

	-?             print usage
	-h --help      verbose help message
	
If no directory is given, scans the current directory. Outputs result
to STDOUT, unless a second argument is given.

An API key is required to access isbndb.com services. You can either 
paste it in the source code or set the environment variable ISBNDB_KEY.

=head1 LICENSE

Free to use and modifiy, same terms as Perl itself.

=head1 AUTHOR

i-blis, I<i-blis yandex ru>. 

=cut

Revision: 11729
at February 16, 2009 21:19 by iblis


Initial Code
#!/usr/bin/env perl

use strict; 
use warnings;

use Encoding "utf8";
use Text::BibTeX;
use WebService::ISBNDB::API::Books;
use Getopt::Long;
use Pod::Usage;

my %options;
GetOptions('usage|?'  => \$options{usage},
           'h|help' 	=> \$options{help}
          );
pod2usage(1) if $options{usage};
pod2usage(-verbose => 2) if $options{help};

my $api_key = $ENV{ISBNDB_KEY} || 'TMDKWJSX';

my $dir = shift || '.';
my $file = shift || '&STDOUT';
my $bib = Text::BibTeX::File->new('>'.$file); 


opendir my $dh, $dir
	or die "Cannot open $dir: $!\n";
my @files =  grep { -f && m{/\d{9}[x|\d]\.pdf$}i } 
		map {"$dir/$_"} 
			readdir $dh;

foreach my $file (@files) {

	# extract isbn from file name
	my $isbn = $file =~ /(\d{9}[x|\d])\.pdf$/i ? $1 : '0000000000' ; 
	# check database for isbn number, loop if failed
	my $book = WebService::ISBNDB::API::Books->find( { api_key => $api_key, isbn => $isbn } );
  next unless $book;

	# set new bibtex entry
	my $entry = new Text::BibTeX::Entry;
	$entry->set_metatype(BTE_REGULAR);
	$entry->set_type('book');
	$entry->set_key($isbn);

	# set title field
	$entry->set( 'title', $book->get_longtitle || $book->get_title );

	# set author or editor field
	my $authors = $book->get_authors_text;
	# some clean-up
	$authors =~ s/^by //;
	$authors =~ s/,$//;
	$authors =~ s/,\s+/ and /g;
	$authors =~ s/;\s+/ and /g;
	# authors or editors ?	
	if ( $authors =~ /^\s*\[?edited by\s+\]?(.*)$/i ) {
		(my $editors = $1) =~ s/with/and/;
		$entry->set('editor', $editors);
	}
	elsif ( $authors =~ /\(Editor\)/i ) {
		$authors =~ s/\s*\(Editor\)//gi;
	}
	else {
		$entry->set('author', $authors);
	}

	# parse publisher and edition fields for publisher and year data
	if ( $book->get_publisher_text =~ m/^(.*?),\s+c?(\d{4}).?$/ ) {
		$entry->set( 'publisher', $1 ) ;
		$entry->set( 'year', $2 );
		
	}
	else {
		$entry->set( 'publisher', $book->get_publisher_text ) ;
		if ( $book->get_edition_info =~ m/(\d{4})/ ) {
			$entry->set( 'year', $1 );
		}		
	}
	
	# miscellaneous fields
	my $notes = $book->get_notes;
	$entry->set( 'notes', $notes ) if $notes ;
	my $abstract = $book->get_summary;
	$entry->set( 'abstract', $abstract ) if $abstract ;
	
	$entry->set( 'local-url', $file);
	
	$entry->write($bib);
	
	# sleep 2;
}

__END__


=head1 NAME

isbn2bibtex.pl - Convert ISBN file names to BibTeX records 

=head1 SYNOPSIS

isbn2bibtex.pl [-? | --help] | [directory] [outfile.bib]

=head1 DESCRIPTION

Scans a directory for PDF files whose name are ISBN-10 identifiers,
fetches the corresponding book's data from isbndb.com, parses data
fields to get rid of inconsistencies, and finally, outputs a bibtex 
file with all fields set accordingly.

	-?             print usage
	-h --help      verbose help message
	
If no directory is given, scans the current directory. Outputs result
to STDOUT, unless a second argument is given.

An API key is required to access isbndb.com services. You can either 
paste it in the source code or set the environment variable ISBNDB_KEY.

=head1 LICENSE

Free to use and modifiy, same terms as Perl itself.

=head1 AUTHOR

i-blis, I<[email protected]>. 

=cut

Initial URL

                                

Initial Description

                                

Initial Title
Convert ISBN file names to BibTeX records

Initial Tags
web

Initial Language
Perl