Fetch RIFIAS on-line archive


/ Published in: Perl
Save to your folder(s)

Retrieves gif page images from RIFIAS on-line archive and combines them into a single pdf file


Copy this code and paste it in your HTML
  1. #!/usr/bin/perl -w
  2. #
  3. use strict;
  4. use LWP::Simple;
  5. use LWP::UserAgent;
  6. use Image::Magick;
  7. #
  8. die "Aaarg!!\n" if !defined( $ARGV[0] ) ;
  9. #
  10. my $code = shift;
  11. my $pagenb = 0;
  12. my $page = get("http://webapp1.dlib.indiana.edu:8080/metsnav/rifias/navigate.do?oid=r$code");
  13. if ( $page =~ m/\s*<input type.*> of (\d*)/ ) {
  14. $pagenb = $1;
  15. }
  16. else {
  17. die "No pages found for $code\n";
  18. }
  19. #
  20. print "retrieving $pagenb page images\n";
  21. #$pagenb = 2;
  22. #
  23. my $directory = '';
  24. my @filelist;
  25. #
  26. for (my $index=1; $index <= $pagenb ; $index++) {
  27. my $download = "http://purl.dlib.indiana.edu/iudl/rifias/page/r$code-" . sprintf("%08d", $index) . "-full.gif";
  28. my $file = sprintf("%03d", $index) . ".gif";
  29. push(@filelist, $file);
  30. my $user_agent = LWP::UserAgent->new;
  31. my $request = HTTP::Request->new('GET', $download);
  32. my $response = $user_agent->request ($request, $directory . $file);
  33. sleep 2;
  34. }
  35. #
  36. print "preparing $code.pdf out of @filelist\n";
  37. #
  38. my $magick = new Image::Magick( format => "pdf" );
  39. my $status;
  40. $status = $magick->Read( @filelist ) and warn "Read failed: $status";
  41.  
  42. $status = $magick->Write( "pdf:$code.pdf" ) and warn "Write failed: $status";
  43. #
  44. #system("convert *.gif $code.pdf");

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.