Posted By

jerryvig on 09/03/11


Tagged

java Firefox scrape selenium


Versions (?)

Scrape Contact Information from IECA with Java Selenium RC


 / Published in: Java
 

This is a Java program that scrapes contact information from IECA using Java Selenium RC.

http://www.iecaonline.com/cfmPublicSearch/pgPublicSearch.cfm?mode=entry

  1. import com.thoughtworks.selenium.DefaultSelenium;
  2. import java.io.IOException;
  3. import java.io.BufferedWriter;
  4. import java.io.FileWriter;
  5.  
  6. public class IECA {
  7. public static void main( String[] args ) {
  8. try {
  9.  
  10. BufferedWriter myWriter = new BufferedWriter( new FileWriter( "./IECA.csv" ) );
  11.  
  12. DefaultSelenium selena = new DefaultSelenium( "localhost",4444,"*firefox","http://www.iecaonline.com/cfm_PublicSearch/pg_PublicSearch.cfm?mode=execute");
  13. selena.start();
  14.  
  15. for ( int i=0; i<1000; i++ ) {
  16. selena.open( "http://www.iecaonline.com/cfm_PublicSearch/wnd_MemberDataWindow.cfm?MemberID=" + Integer.toString(i) );
  17. String bodyTxt = selena.getBodyText();
  18. String[] lines = bodyTxt.split("\n");
  19. try {
  20. String bPhone = "";
  21. String fax = "";
  22. String web = "";
  23. String email = "";
  24. String addy = "";
  25. String name = lines[1].trim();
  26. int bPhoneLine = 0;
  27. for ( int j=0; j<lines.length; j++ ) {
  28. if ( lines[j].trim().startsWith("Business Phone:") ) {
  29. bPhone = lines[j].split(":")[1].trim();
  30. bPhoneLine = j;
  31. }
  32. if ( lines[j].trim().startsWith("Fax:") ) {
  33. fax = lines[j].split(":")[1].trim();
  34. }
  35. if ( lines[j].trim().startsWith("Email:") ) {
  36. email = lines[j].split(":")[1].trim();
  37. }
  38. if ( lines[j].trim().startsWith("Web:") ) {
  39. web = lines[j].split(":")[1].trim();
  40. }
  41. }
  42.  
  43. for ( int j=2; j<bPhoneLine; j++ ) {
  44. addy += lines[j] + "\n";
  45. }
  46.  
  47. myWriter.write( "\"" + name + "\",\"" + addy + "\",\"" + bPhone + "\",\"" + fax + "\",\"" + email + "\",\"" + web + "\"\n" );
  48.  
  49. // myWriter.write( lines[0] + "\n" );
  50.  
  51. }
  52.  
  53. myWriter.close();
  54.  
  55. }
  56. catch ( Exception e ) { e.printStackTrace(); }
  57. }
  58. }

Report this snippet  

You need to login to post a comment.