Posted By

jerryvig on 09/29/11


Tagged

java auctions gov


Versions (?)

Gov Auctions 15


 / Published in: Java
 

Version 15 of the Government auctions scraping programs. This code scrapes from 17 different government auctions websites and aggregates those data into an SQL database.

  1. package com.mktneutral;
  2.  
  3. import java.io.IOException;
  4. import java.util.List;
  5. import java.util.ArrayList;
  6. import java.util.HashMap;
  7. import java.sql.Connection;
  8. import java.sql.Statement;
  9. import java.sql.DriverManager;
  10. import java.sql.SQLException;
  11. import java.net.URL;
  12. import java.net.MalformedURLException;
  13. import java.net.HttpURLConnection;
  14. import java.net.URLEncoder;
  15. import java.io.OutputStreamWriter;
  16. import java.io.InputStreamReader;
  17. import java.io.BufferedReader;
  18. import java.io.BufferedWriter;
  19. import java.io.FileWriter;
  20. import java.io.OutputStream;
  21. import java.util.regex.Pattern;
  22. import org.htmlparser.Parser;
  23. import org.htmlparser.Node;
  24. import org.htmlparser.util.NodeList;
  25. import org.htmlparser.util.ParserException;
  26.  
  27. public class GovAuctions {
  28. private static String searchString;
  29. private static String responseData;
  30. private static Connection hsql;
  31. private static Statement stmt;
  32. private static String searchState;
  33. private static HashMap<String,String> stateMap;
  34.  
  35. public static void main( String[] args ) {
  36. String resp = runScrapers("ford",15,"ALL");
  37. System.out.println( resp );
  38. }
  39.  
  40. public static String runScrapers( String _searchString, int searchEngineInt, String _searchState ) {
  41. try {
  42. Class.forName("org.hsqldb.jdbcDriver");
  43. } catch ( ClassNotFoundException cnfe ) { cnfe.printStackTrace(); }
  44.  
  45. String[] stateNameList = { "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "District of Columbia", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming", "American Samoa", "Guam", "Northern Mariana Islands", "Puerto Rico" };
  46. String[] stateAbbrevList = { "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY", "AS", "GU", "MP", "PR" };
  47.  
  48. stateMap = new HashMap<String,String>();
  49. for ( int i=0; i<stateNameList.length; i++ ) {
  50. stateMap.put( stateAbbrevList[i], stateNameList[i] );
  51. }
  52.  
  53. hsql = null;
  54. try {
  55. hsql = DriverManager.getConnection( "jdbc:hsqldb:file:govauctionsdb", "SA", "" );
  56. }
  57. catch ( SQLException sqle ) { sqle.printStackTrace(); }
  58.  
  59. try {
  60. stmt = hsql.createStatement();
  61. stmt.executeUpdate( "DROP TABLE IF EXISTS auction_records" );
  62. stmt.executeUpdate( "CREATE TABLE auction_records ( website VARCHAR(64), url VARCHAR(1024), results_content VARCHAR(1536) )" );
  63. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  64.  
  65. searchState = _searchState;
  66. searchString = _searchString;
  67.  
  68. responseData = "";
  69.  
  70. if ( searchEngineInt == 0 ) {
  71. GovLiquidation();
  72. }
  73. if ( searchEngineInt == 1 ) {
  74. GSAAuctions();
  75. }
  76. if ( searchEngineInt == 2 ) {
  77. IRSAuctions();
  78. }
  79. if ( searchEngineInt == 3 ) {
  80. BidCorp();
  81. }
  82. if ( searchEngineInt == 4 ) {
  83. bid4Assets();
  84. }
  85. if ( searchEngineInt == 5 ) {
  86. BankruptcySales();
  87. }
  88. if ( searchEngineInt == 6 ) {
  89. PublicSurplus();
  90. }
  91. if ( searchEngineInt == 7 ) {
  92. GovernmentAuction();
  93. }
  94. if ( searchEngineInt == 8 ) {
  95. HomeSteps();
  96. }
  97. if ( searchEngineInt == 9 ) {
  98. HomePath();
  99. }
  100. if ( searchEngineInt == 10 ) {
  101. HomeSalesGov();
  102. }
  103. if ( searchEngineInt == 16 ) {
  104. HudHomeStore();
  105. }
  106. if ( searchEngineInt == 11 ) {
  107. GSAGov();
  108. }
  109. if ( searchEngineInt == 12 ) {
  110. GovSales();
  111. }
  112. if ( searchEngineInt == 13 ) {
  113. LonestarOnline();
  114. }
  115. if ( searchEngineInt == 14 ) {
  116. GovDeals();
  117. }
  118. if ( searchEngineInt == 15 ) {
  119. IllinoisIbid();
  120. }
  121.  
  122. try {
  123. hsql.close();
  124. }
  125. catch ( SQLException sqle ) { sqle.printStackTrace(); }
  126.  
  127. return responseData;
  128. }
  129.  
  130. public static void GovLiquidation() {
  131. String location = "";
  132. String ntt = "";
  133. String ntk = "";
  134.  
  135. if ( !searchState.equals("ALL") ) {
  136. ntt = "US_" + searchState + "|" + searchString;
  137. location = "&location=US_" + searchState;
  138. ntk = "P_Auction_Country_State|P_Lot_Title";
  139. }
  140. else {
  141. ntt = searchString;
  142. location = "";
  143. ntk = "P_Lot_Title";
  144. }
  145.  
  146. String message = "Ntt=" + ntt + "&Ntk=" + ntk + "&Ntx=mode+matchall|mode+matchall&N=0&Nty=1&Ns=P_Lot_Number|0" + location + "&words=" + searchString + "&cmd=keyword";
  147.  
  148. try {
  149. URL theUrl = new URL( "http://www.govliquidation.com/auction/endecaSearch?" + message );
  150. String resp = "";
  151. BufferedReader reader = new BufferedReader(new InputStreamReader(theUrl.openStream()));
  152. String line;
  153. while ((line = reader.readLine()) != null) {
  154. resp += line;
  155. }
  156. reader.close();
  157.  
  158. int startIdx = resp.indexOf("<TABLE CELLSPACING=\"0\" CELLPADDING=\"0\" border=\"0\" WIDTH=\"748\" ALIGN=\"CENTER\" id=\"lots_table\">");
  159. int endIdx = resp.indexOf("<!-- Load the Brightcove JavaScript API -->");
  160.  
  161. Parser htmlParser = new Parser();
  162. if ( endIdx > 0 && startIdx > 0 ) {
  163. try {
  164. htmlParser.setInputHTML( resp.substring( startIdx, endIdx ) );
  165. NodeList nList = htmlParser.parse(null);
  166. if ( nList.size() > 0 ) {
  167. NodeList trList = nList.elementAt(0).getChildren();
  168. for ( int i=0; i<trList.size(); i++ ) {
  169. if ( trList.elementAt(i).getText().contains("resultsBackground1") || trList.elementAt(i).getText().contains("resultsBackground2") ) {
  170. NodeList tdList = trList.elementAt(i).getChildren();
  171. for ( int j=0; j<tdList.size(); j++ ) {
  172. if ( (tdList.elementAt(j).getText()).trim().equals("TD VALIGN=\"top\" class=\"annotationText1\"") ) {
  173. String resultsContent = (tdList.elementAt(j).toPlainTextString()).trim();
  174. resultsContent = resultsContent.replaceAll("\"", "'" );
  175. resultsContent = resultsContent.replaceAll("\n", "");
  176. resultsContent = resultsContent.replaceAll("\t", "");
  177.  
  178. NodeList aList = tdList.elementAt(j).getChildren();
  179. for ( int k=0; k<aList.size(); k++ ) {
  180. if ( aList.elementAt(k).getText().contains("A HREF=\"http://www.govliquidation.com/auction/view") ) {
  181. String anchorHref = aList.elementAt(k).getText().trim();
  182. String[] pieces = anchorHref.split("\"");
  183. anchorHref = pieces[1].trim();
  184.  
  185. try {
  186. System.out.println( "INSERT INTO auction_records ( website, url, results_content ) VALUES ( 'http://www.govliquidation.com/', '" + anchorHref + "', '" + resultsContent + "' )" );
  187.  
  188. stmt.executeUpdate( "INSERT INTO auction_records ( website, url, results_content ) VALUES ( 'http://www.govliquidation.com/', '" + anchorHref + "', '" + resultsContent + "' )" );
  189. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  190. }
  191. }
  192. }
  193. }
  194. }
  195. }
  196. }
  197. } catch ( ParserException pe ) { pe.printStackTrace(); }
  198. }
  199. } catch ( MalformedURLException me ) { me.printStackTrace(); }
  200. catch ( IOException ioe ) { ioe.printStackTrace(); }
  201. }
  202.  
  203. public static void GSAAuctions() {
  204. if ( searchState.equals("ALL") ) {
  205. try {
  206. String message = "Continue=" + URLEncoder.encode("Continue");
  207.  
  208. URL theUrl = new URL("http://gsaauctions.gov/gsaauctions/gsaauctions/");
  209. HttpURLConnection conn = (HttpURLConnection) theUrl.openConnection();
  210. conn.setDoOutput(true);
  211. conn.setRequestMethod("POST");
  212. conn.setRequestProperty("Content-Type","application/x-www-form-urlencoded");
  213. conn.setRequestProperty("User-Agent","Mozilla/5.0");
  214. OutputStream os = conn.getOutputStream();
  215. os.write(message.getBytes("UTF-8"));
  216. os.close();
  217.  
  218. BufferedReader myReader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
  219. String line;
  220. String resp = "";
  221. while ((line = myReader.readLine()) != null) {
  222. resp += line;
  223. }
  224. myReader.close();
  225.  
  226. int scIdx = resp.indexOf("name=\"scParam\" VALUE=\"");
  227. if ( scIdx > 0 ) {
  228. String[] pieces = resp.substring( scIdx+10, scIdx+80 ).split("VALUE=\"");
  229. if ( pieces.length > 1 ) {
  230. String[] piecesII = pieces[1].split("\"");
  231. String scParam = piecesII[0].trim();
  232.  
  233. int webPcmIdx = resp.indexOf("name=\"WEBPCMTRANSID\" VALUE=\"");
  234. if ( webPcmIdx > 1 ) {
  235. pieces = resp.substring( webPcmIdx+10, webPcmIdx+80 ).split("VALUE=\"");
  236. if ( pieces.length > 0 ) {
  237. piecesII = pieces[1].split("\"");
  238. String webPcmParam = piecesII[0].trim();
  239.  
  240. String msg2 = "scParam=" + URLEncoder.encode(scParam) + "&WEBPCMTRANSID=" + URLEncoder.encode( webPcmParam ) + "&sc=&scAction=" + URLEncoder.encode("AUCALSRH") + "&scRegn=&scSalNo=&scLotNo=&scQuery=&query=&order=&af2eq=&scCatCode=&scStCode=&scPassUpd=&scSrchDesc=" + URLEncoder.encode( searchString ) + "&scSrchDescCtxt=&catMenuNew=";
  241.  
  242. URL url2 = new URL("http://gsaauctions.gov/gsaauctions/aucindx");
  243. HttpURLConnection con2 = (HttpURLConnection) url2.openConnection();
  244. con2.setDoOutput(true);
  245. con2.setRequestMethod("POST");
  246. con2.setRequestProperty("Content-Type","application/x-www-form-urlencoded");
  247. con2.setRequestProperty("User-Agent","Mozilla/5.0");
  248.  
  249. OutputStream writer = con2.getOutputStream();
  250. writer.write(msg2.getBytes("UTF-8"));
  251. writer.close();
  252.  
  253. BufferedReader leer = new BufferedReader(new InputStreamReader(con2.getInputStream()));
  254. line = "";
  255. resp = "";
  256. while ((line = leer.readLine()) != null) {
  257. resp += line;
  258. }
  259. leer.close();
  260.  
  261. int startIdx = resp.indexOf("var tab=[");
  262. int endIdx = resp.indexOf("bldTr();");
  263.  
  264. pieces = resp.substring( startIdx, endIdx ).split(Pattern.quote("["));
  265. if ( pieces.length > 1 ) {
  266. piecesII = pieces[1].split(Pattern.quote("]"));
  267. pieces = piecesII[0].split("\",\"");
  268.  
  269. String resultsContent = "";
  270. String anchorHref = "";
  271. if ( pieces.length > 6 ) {
  272. for ( int j=0; j<pieces.length; j+=7 ) {
  273. resultsContent = pieces[j].replace("\"","") + " " + pieces[j+1] + " " + pieces[j+2] + " " + pieces[j+3] + " " + pieces[j+4] + " " + pieces[j+5] + " " + pieces[j+6].replace("\",","");
  274. anchorHref = "javascript:itemSel('" + pieces[j+1].substring(0,11) + "','" + pieces[j+1].substring(11) + "');";
  275. try {
  276. stmt.executeUpdate( "INSERT INTO auction_records ( website, url, results_content ) VALUES ( 'http://gsaauctions.gov/', '" + anchorHref + "', '" + resultsContent + "' )" );
  277. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  278. }
  279. }
  280. }
  281. }
  282. }
  283. }
  284. }
  285.  
  286. } catch ( MalformedURLException me ) { me.printStackTrace(); }
  287. catch ( IOException ioe ) { ioe.printStackTrace(); }
  288. }
  289. }
  290.  
  291. public static void IRSAuctions() {
  292. if ( searchState.equals("ALL") ) {
  293. try {
  294. URL theUrl = new URL("http://search.treas.gov/search?q=" + URLEncoder.encode(searchString) + "&output=xml_no_dtd&sort=date%3AD%3AL%3Ad1&ie=UTF-8&client=auctions&oe=UTF-8&proxystylesheet=auctions&site=Auctions");
  295. String resp = "";
  296. BufferedReader reader = new BufferedReader(new InputStreamReader(theUrl.openStream()));
  297. String line;
  298. while ((line = reader.readLine()) != null) {
  299. resp += line;
  300. }
  301. reader.close();
  302.  
  303. int startIdx = resp.indexOf("<p><font size=\"-2\">");
  304. int endIdx = resp.indexOf("In order to show you");
  305.  
  306. if ( startIdx > 0 && endIdx > startIdx ) {
  307. Parser htmlParser = new Parser();
  308. try {
  309. htmlParser.setInputHTML( resp.substring(startIdx,endIdx) );
  310. NodeList nList = htmlParser.parse(null);
  311.  
  312. for ( int i=0; i<nList.size(); i++ ) {
  313. if ( nList.elementAt(i).getText().startsWith("p") ) {
  314. NodeList pChildList = nList.elementAt(i).getChildren();
  315. for ( int j=0; j<pChildList.size(); j++ ) {
  316. String anchorHref = "";
  317. if ( pChildList.elementAt(j).getText().startsWith("a href=\"http://www.treasury") ) {
  318. String[] pieces = pChildList.elementAt(j).getText().split("\"");
  319. anchorHref = pieces[1].trim();
  320. }
  321. if ( pChildList.elementAt(j).getText().startsWith("span class=\"s\"") ) {
  322. String resultsContent = pChildList.elementAt(j).toPlainTextString().trim();
  323. resultsContent = resultsContent.replaceAll("\"", "");
  324. resultsContent = resultsContent.replaceAll("\n", "");
  325. try {
  326. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'http://search.treas.gov/', '" + anchorHref + "', '" + resultsContent + "' )" );
  327. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  328. }
  329. }
  330. }
  331. }
  332. } catch ( ParserException pe ) { pe.printStackTrace(); }
  333. }
  334. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  335.  
  336. }
  337. }
  338.  
  339. public static void BidCorp() {
  340. if ( searchState.equals("ALL" ) ) {
  341. try {
  342. URL theUrl = new URL("http://www.bidcorp.com/AdvancedSearch.aspx");
  343. String resp = "";
  344. BufferedReader reader = new BufferedReader(new InputStreamReader(theUrl.openStream()));
  345. String line;
  346. while ((line = reader.readLine()) != null) {
  347. resp += line;
  348. }
  349. reader.close();
  350.  
  351. int vsIdx = resp.indexOf("id=\"__VIEWSTATE\"");
  352. int endIdx = resp.indexOf("=\" />");
  353. String viewState = resp.substring(vsIdx,endIdx+1).split("value=\"")[1];
  354. vsIdx = resp.indexOf("id=\"__EVENTVALIDATION\"");
  355. endIdx = resp.indexOf("<table class=\"mainTable\"");
  356. String[] pieces = resp.substring(vsIdx,endIdx).split("value=\"");
  357. String evtValidation = pieces[1].split("\"")[0].trim();
  358.  
  359. String message = "__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=" + URLEncoder.encode(viewState) + "&__EVENTVALIDATION=" + URLEncoder.encode(evtValidation) + "&ctl00$ContentPlaceHolder1$txtKeywords=" + URLEncoder.encode( searchString ) + "&ctl00$ContentPlaceHolder1$rdoKeywordStatus=" + URLEncoder.encode("3") + "&ctl00$ContentPlaceHolder1$btnSubmitKeyword=" + URLEncoder.encode("Submit") + "&ctl00$ContentPlaceHolder1$txtLotNumber=";
  360.  
  361. URL url2 = new URL("http://www.bidcorp.com/AdvancedSearch.aspx");
  362. HttpURLConnection conn = (HttpURLConnection) url2.openConnection();
  363. conn.setDoOutput(true);
  364. conn.setRequestMethod("POST");
  365. conn.setRequestProperty("Content-Type","application/x-www-form-urlencoded");
  366. conn.setRequestProperty("User-Agent","Mozilla/5.0");
  367.  
  368. OutputStream writer = conn.getOutputStream();
  369. writer.write(message.getBytes("UTF-8"));
  370. writer.close();
  371.  
  372. BufferedReader myReader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
  373. line = "";
  374. resp = "";
  375. while ((line = myReader.readLine()) != null) {
  376. resp += line;
  377. }
  378. myReader.close();
  379.  
  380. int startIdx = resp.indexOf("<table cellspacing=\"0\" cellpadding=\"4\"");
  381. endIdx = resp.indexOf("<div style=\"margin: 5px; ");
  382.  
  383. if ( startIdx > 0 && endIdx > startIdx ) {
  384. Parser htmlParser = new Parser();
  385. try {
  386. htmlParser.setInputHTML( resp.substring(startIdx,endIdx) );
  387. NodeList nList = htmlParser.parse(null);
  388. if ( nList.size() > 0 ) {
  389. NodeList trList = nList.elementAt(0).getChildren();
  390. for ( int i=0; i<trList.size(); i++ ) {
  391. if ( trList.elementAt(i).getText().contains("bgcolor=\"White\"") || trList.elementAt(i).getText().contains("bgcolor=\"#CCCCCC\"") ) {
  392. NodeList tdList = trList.elementAt(i).getChildren();
  393. if ( tdList.size() > 2 ) {
  394. String resultsContent = (tdList.elementAt(2).toPlainTextString()).trim();
  395. resultsContent = resultsContent.replaceAll("\"", "");
  396. resultsContent = resultsContent.replaceAll("\n", "");
  397. resultsContent = resultsContent.replaceAll("\t", "");
  398.  
  399. String[] piecesII = (tdList.elementAt(2).getChildren().elementAt(1).getText()).split("'");
  400. String anchorHref = "http://www.bidcorp.com/" + piecesII[1].trim();
  401. try {
  402.  
  403. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'http://www.bidcorp.com/', '" + anchorHref + "', '" + resultsContent + "' )" );
  404.  
  405. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  406.  
  407. }
  408. }
  409. }
  410. }
  411. } catch ( ParserException pe ) { pe.printStackTrace(); }
  412. }
  413. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  414. }
  415. }
  416.  
  417. public static void bid4Assets() {
  418. try {
  419. String stateSearch = "";
  420. if ( searchState.equals("ALL") ) {
  421. stateSearch = "all"; }
  422. else { stateSearch = searchState; }
  423.  
  424. String message = "fuseaction=" + URLEncoder.encode("search") + "&type=" + URLEncoder.encode( "powerSearch" ) + "&criteria=" + URLEncoder.encode( searchString ) + "&keywordType=" + URLEncoder.encode( "allWords" ) + "&channel=" + URLEncoder.encode("all") + "&cat2=" + URLEncoder.encode("all") + "&cat3=" + URLEncoder.encode("all") + "&LocationChoice=" + URLEncoder.encode("1") + "&locatedState=" + URLEncoder.encode( stateSearch ) + "&ZIp=&ZipRadius=" + URLEncoder.encode("1") + "&assetstatus=" + URLEncoder.encode("Live") + "&DateHistory=" + URLEncoder.encode("6") + "&sort=" + URLEncoder.encode("bidCloseTime");
  425.  
  426. URL theUrl = new URL( "http://www.bid4assets.com/search/index.cfm" );
  427. HttpURLConnection connection = (HttpURLConnection) theUrl.openConnection();
  428. connection.setDoOutput( true );
  429. connection.setRequestMethod( "POST" );
  430. connection.setRequestProperty( "Content-Type", "application/x-www-form-urlencoded" );
  431.  
  432. OutputStream writer = connection.getOutputStream();
  433. writer.write( message.getBytes("UTF-8") );
  434. writer.close();
  435.  
  436. BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
  437. String line;
  438. String responseContent = "";
  439.  
  440. while ((line = reader.readLine()) != null) {
  441. responseContent += line;
  442. }
  443. reader.close();
  444.  
  445. int startIdx = responseContent.indexOf( "<table width=\"100%\" border=\"0\" cellspacing=\"0\" cellpadding=\"3\">" );
  446. int endIdx = responseContent.indexOf( "<!-- google_ad_section_end" );
  447.  
  448. if ( startIdx<0 || endIdx<0 ) return;
  449.  
  450. Parser htmlParser = new Parser();
  451. try {
  452. // System.out.println( startIdx + " " + endIdx );
  453. if ( startIdx > 0 && endIdx > 0 ) {
  454.  
  455. htmlParser.setInputHTML( responseContent.substring( startIdx, endIdx ) );
  456. NodeList nList = htmlParser.parse( null );
  457.  
  458. if ( nList.size() > 4 ) {
  459.  
  460. NodeList trList = nList.elementAt(4).getChildren();
  461.  
  462. for ( int i=0; i<trList.size(); i++ ) {
  463. if ( !trList.elementAt(i).toPlainTextString().trim().equals("") ) {
  464. String resultsContent = trList.elementAt(i).toPlainTextString().trim();
  465. resultsContent = resultsContent.replaceAll("\"", "");
  466. resultsContent = resultsContent.replaceAll("\n", " ");
  467. resultsContent = resultsContent.replaceAll("\t", " ");
  468. resultsContent = resultsContent.replace("\\", "");
  469. resultsContent = resultsContent.replaceAll("'", "");
  470.  
  471. NodeList tdList = trList.elementAt(i).getChildren();
  472. try {
  473. for ( int j=0; j<tdList.size(); j++ ) {
  474. if ( tdList.elementAt(j).getText().contains("mediumArial02") ) {
  475. NodeList aList = tdList.elementAt(j).getChildren();
  476. for ( int k=0; k<aList.size(); k++ ) {
  477. if ( aList.elementAt(k).getText().contains("href") ) {
  478. String href = aList.elementAt(k).getText().trim();
  479.  
  480. String anchorHref = href.split("=\"")[1];
  481. anchorHref = "http://www.bid4assets.com/" + anchorHref.substring(0,anchorHref.length()-1);
  482. //System.out.println( "6" );
  483. try {
  484. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'http://www.bid4assets.com/', '" + anchorHref + "', '" + resultsContent + "' )" );
  485. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  486.  
  487. }
  488. }
  489. }
  490. }
  491. } catch ( NullPointerException npe ) { npe.printStackTrace(); }
  492. }
  493. }
  494. }
  495. }
  496. } catch ( ParserException pe ) { pe.printStackTrace(); }
  497. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  498. }
  499.  
  500. public static void BankruptcySales() {
  501. if ( searchState.equals("ALL" ) ) {
  502. try {
  503. String msg = "keyWord=" + URLEncoder.encode( searchString );
  504.  
  505. URL theUrl = new URL("http://www.bankruptcysales.com/assets_search2.cfm");
  506. HttpURLConnection conn = (HttpURLConnection) theUrl.openConnection();
  507. conn.setDoOutput(true);
  508. conn.setRequestMethod("POST");
  509. conn.setRequestProperty("Content-Type","application/x-www-form-urlencoded");
  510. conn.setRequestProperty("User-Agent","Mozilla/5.0");
  511. OutputStream os = conn.getOutputStream();
  512. os.write(msg.getBytes("UTF-8"));
  513. os.close();
  514.  
  515. BufferedReader lector = new BufferedReader(new InputStreamReader(conn.getInputStream()));
  516. String line;
  517. String resp = "";
  518. while ((line = lector.readLine())!= null) {
  519. resp = resp.concat(line);
  520. }
  521. lector.close();
  522.  
  523. int startIdx = resp.indexOf("<!-- InstanceBeginEditable name=\"Body\" -->");
  524. int endIdx = resp.indexOf("CLASS=\"bottomLinks\"");
  525.  
  526. Parser htmlParser = new Parser();
  527. try {
  528. htmlParser.setInputHTML( resp.substring(startIdx,endIdx) );
  529. NodeList nList = htmlParser.parse(null);
  530.  
  531. for ( int i=0; i<nList.size(); i++ ) {
  532. if ( nList.elementAt(i).getText().startsWith("TABLE WIDTH=\"95%\"") ) {
  533. NodeList nextChildren = nList.elementAt(i).getChildren();
  534. for ( int j=0; j<nextChildren.size(); j++ ) {
  535. if ( nextChildren.elementAt(j).getText().equals("TR") ) {
  536. NodeList nxtChildren = nextChildren.elementAt(j).getChildren();
  537. if ( nxtChildren.size() > 0 ) {
  538. NodeList pChildren = nxtChildren.elementAt(1).getChildren();
  539. for ( int k=0; k<pChildren.size(); k++ ) {
  540. if ( pChildren.elementAt(k).getText().startsWith("TABLE WIDTH=\"95%\"") ) {
  541. NodeList trList = pChildren.elementAt(k).getChildren();
  542. for ( int l=2; l<trList.size(); l++ ) {
  543. if ( !trList.elementAt(l).toPlainTextString().trim().equals("") ) {
  544. String resultsContent = trList.elementAt(l).toPlainTextString().trim();
  545. resultsContent = resultsContent.replaceAll("\t","");
  546. resultsContent = resultsContent.replaceAll("\n","");
  547. while ( resultsContent.contains(" ") ) {
  548. resultsContent = resultsContent.replaceAll(" ","");
  549. }
  550.  
  551. String[] hrefCandidates = trList.elementAt(l).toHtml().split("HREF=\"");
  552. if ( hrefCandidates.length > 0 ) {
  553. String anchorHref = hrefCandidates[1].split("\"")[0];
  554. anchorHref = "http://www.bankruptcysales.com/" + anchorHref;
  555. try {
  556. //System.out.println( "INSERT INTO auction_records VALUES ( 'http://www.bankruptcysales.com/', '" + anchorHref + "', '" + resultsContent + "' )" );
  557. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'http://www.bankruptcysales.com/', '" + anchorHref + "', '" + resultsContent + "' )" );
  558. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  559. }
  560.  
  561. }
  562. }
  563. }
  564. }
  565. }
  566. }
  567. }
  568. }
  569. }
  570.  
  571. } catch ( ParserException pe ) { pe.printStackTrace(); }
  572. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  573. }
  574. }
  575.  
  576. public static void PublicSurplus() {
  577. try {
  578. String stateSearch = searchState;
  579. if ( searchState.equals("ALL") ) stateSearch = "";
  580.  
  581. String message = "http://www.publicsurplus.com/sms/browse/search?posting=y&slth=&page=0&sortBy=timeLeft&keyWord=" + searchString + "&catId=-1&endHours=-1&startHours=-1&lowerPrice=0&higherPrice=0&milesLocation=-1&zipCode=&region=" + "all%2C" + stateSearch;
  582.  
  583. URL theUrl = new URL( message );
  584. String resp = "";
  585. BufferedReader reader = new BufferedReader(new InputStreamReader(theUrl.openStream()));
  586. String line;
  587. while ((line = reader.readLine()) != null) {
  588. resp += line;
  589. }
  590. reader.close();
  591.  
  592. int startIdx = resp.indexOf("<div class=\"SepTable\">");
  593. int endIdx = resp.indexOf("Hide Images");
  594.  
  595. if ( startIdx > 0 && endIdx > 0 ) {
  596. //System.out.println( resp.substring(startIdx,endIdx) );
  597.  
  598. Parser htmlParser = new Parser();
  599. try {
  600. htmlParser.setInputHTML(resp.substring(startIdx,endIdx));
  601. NodeList nList = htmlParser.parse(null);
  602.  
  603. for ( int i=0; i<nList.size(); i++ ) {
  604. if ( nList.elementAt(i).getText().equals("table width=\"100%\" border=\"0\" cellspacing=\"0\" cellpadding=\"5\" class=\"tabCurr\"") ) {
  605. NodeList tHeadList = nList.elementAt(i).getChildren();
  606. for ( int j=4; j<tHeadList.size(); j++ ) {
  607. if ( tHeadList.elementAt(j).getText().startsWith("tr") ) {
  608. String resultsContent = tHeadList.elementAt(j).toPlainTextString().trim();
  609. resultsContent = resultsContent.replaceAll("&nbsp;","");
  610. resultsContent = resultsContent.replaceAll("\n","");
  611. resultsContent = resultsContent.replaceAll("\t","");
  612. while ( resultsContent.contains(" ") ) {
  613. resultsContent = resultsContent.replaceAll(" "," ");
  614. }
  615. //System.out.println( resultsContent );
  616. String[] pieces = tHeadList.elementAt(j).toHtml().split("href=\"");
  617. String[] piecesII = pieces[1].split("\"");
  618. String anchorHref = "http://www.publicsurplus.com" + piecesII[0].trim();
  619. try {
  620. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'http://http://www.publicsurplus.com/', '" + anchorHref + "', '" + resultsContent + "' )" );
  621. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  622. }
  623. }
  624. }
  625. }
  626.  
  627. } catch ( ParserException pe ) { pe.printStackTrace(); }
  628. }
  629. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  630. }
  631.  
  632. public static void GovernmentAuction() {
  633. if ( searchState.equals("ALL" ) ) {
  634. String message = "http://auctions.governmentauction.com/view-auctions/individual-lots/?key=" + searchString + "&page=1&cat=&catm=any&order=timeleft&live=yes&timed=yes&regular=yes&buynow=yes&makeoffer=yes&auctioneer=&minprice=&maxprice=&items=48";
  635. try {
  636. URL theUrl = new URL( message );
  637. String resp = "";
  638. BufferedReader reader = new BufferedReader(new InputStreamReader(theUrl.openStream()));
  639. String line;
  640. while ((line = reader.readLine()) != null) {
  641. resp += line;
  642. }
  643. reader.close();
  644.  
  645. int startIdx = resp.indexOf("<div id=\"lac");
  646. int endIdx = resp.indexOf("<div id=\"footer\">");
  647.  
  648. Parser prse = new Parser();
  649. try {
  650. prse.setInputHTML( resp.substring(startIdx,endIdx) );
  651. NodeList nList = prse.parse(null);
  652.  
  653. for ( int i=0; i<nList.size(); i++ ) {
  654. if ( nList.elementAt(i).getText().contains("id=\"lac") ) {
  655. NodeList divList = nList.elementAt(i).getChildren();
  656. for ( int j=0; j<divList.size(); j++ ) {
  657. if ( divList.elementAt(j).getText().equals("div class=\"name\"") ) {
  658. String resultsContent = divList.elementAt(j).toPlainTextString();
  659. resultsContent = resultsContent.replaceAll("\n","");
  660. resultsContent = resultsContent.replaceAll("\t","");
  661. while ( resultsContent.contains(" ") ) {
  662. resultsContent = resultsContent.replaceAll(" "," ");
  663. }
  664. //System.out.println( resultsContent );
  665. String[] pieces = divList.elementAt(j).toHtml().split("href=\"");
  666. String[] piecesII = pieces[1].split("\"");
  667. String anchorHref = piecesII[0].trim();
  668. anchorHref = "http://auctions.governmentauction.com" + anchorHref;
  669. try {
  670. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'http://auctions.governmentauction.com', '" + anchorHref + "', '" + resultsContent + "' )" );
  671. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  672. }
  673. }
  674. }
  675. }
  676. } catch ( ParserException pe ) { pe.printStackTrace(); }
  677. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  678. }
  679. }
  680.  
  681. public static void HomeSteps() {
  682. if ( searchState.equals("ALL" ) ) {
  683. return;
  684. }
  685. String msg = "City=&County=&State=" + URLEncoder.encode(searchState) + "&Zip=&PriceRangeLow=" + URLEncoder.encode("*") + "&PriceRangeHigh=" + URLEncoder.encode("*") + "&TotalRooms=" + URLEncoder.encode("*") + "&NumberBedrooms=" + URLEncoder.encode("*") + "&NumberBathrooms=" + URLEncoder.encode("*") + "&B1=Find Homes&IPP=50";
  686.  
  687. try {
  688. URL theUrl = new URL("http://www.homesteps.com/cgi-bin/dynamic/formsearch.cgi");
  689. HttpURLConnection conn = (HttpURLConnection) theUrl.openConnection();
  690. conn.setDoOutput(true);
  691. conn.setRequestMethod("POST");
  692. conn.setRequestProperty("Content-Type","application/x-www-form-urlencoded");
  693. conn.setRequestProperty("User-Agent","Mozilla/5.0");
  694. OutputStream os = conn.getOutputStream();
  695. os.write(msg.getBytes("UTF-8"));
  696. os.close();
  697.  
  698. BufferedReader myReader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
  699. String line;
  700. String resp = "";
  701. while ((line = myReader.readLine())!= null) {
  702. resp = resp.concat(line);
  703. }
  704. myReader.close();
  705.  
  706. int startIdx = resp.indexOf("<table cellpadding=1 cellspacing=1 class=tblHomesteps>");
  707. int endIdx = resp.indexOf("Freddie Mac takes");
  708.  
  709. Parser prsr = new Parser();
  710. try {
  711. prsr.setInputHTML(resp.substring(startIdx,endIdx));
  712. NodeList nList = prsr.parse(null);
  713. for ( int i=0; i<nList.size(); i++ ) {
  714. if ( nList.elementAt(i).getText().equals("table cellpadding=1 cellspacing=1 class=tblHomesteps") ) {
  715. NodeList trList = nList.elementAt(i).getChildren();
  716. for ( int j=2; j<trList.size(); j++ ) {
  717. if ( trList.elementAt(j).getText().equals("tr") ) {
  718. String resultsContent = trList.elementAt(j).toPlainTextString();
  719. resultsContent = resultsContent.replaceAll("\n","");
  720. resultsContent = resultsContent.replaceAll("\t","");
  721. while ( resultsContent.contains(" ") ) {
  722. resultsContent = resultsContent.replaceAll(" "," ");
  723. }
  724. String anchorHref = "http://www.homesteps.com/featuresearch.html";
  725. try {
  726. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'http://www.homesteps.com/featuresearch.html', '" + anchorHref + "', '" + resultsContent + "' )" );
  727. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  728. }
  729. }
  730. }
  731. }
  732. } catch ( ParserException pe ) { pe.printStackTrace(); }
  733. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  734. }
  735.  
  736. public static void HomePath() {
  737. if ( searchState.equals("ALL" ) ) {
  738. return;
  739. }
  740.  
  741. String message = "http://www.homepath.com/search.html?st=" + searchState + "&cno=000&ci=&zip=&src_ref=&mlsid=&pi=&pa=&bdi=&bhi=&x=26&y=11&ms=&xs=";
  742. try {
  743. URL ubl = new URL( message );
  744. String resp = "";
  745. BufferedReader reader = new BufferedReader(new InputStreamReader(ubl.openStream()));
  746. String line;
  747. while ((line = reader.readLine()) != null) {
  748. resp += line;
  749. }
  750. reader.close();
  751.  
  752. int startIdx = resp.indexOf("<table class=\"multipleLinesSearchResults\"");
  753. int endIdx = resp.indexOf("Get FREE HomePath");
  754.  
  755. if ( startIdx > 0 && endIdx > 0 ) {
  756. Parser htmlParser = new Parser();
  757. try {
  758. htmlParser.setInputHTML(resp.substring(startIdx,endIdx));
  759. NodeList nList = htmlParser.parse(null);
  760.  
  761. for ( int i=0; i<nList.size(); i++ ) {
  762. if ( nList.elementAt(i).getText().equals("table class=\"multipleLinesSearchResults\" cellpadding=\"0\" cellspacing=\"0\" border=\"0\"") ) {
  763. NodeList trList = nList.elementAt(i).getChildren();
  764. for ( int j=0; j<trList.size(); j++ ) {
  765. if ( trList.elementAt(j).getText().startsWith("tr class") ) {
  766. String resultsContent = trList.elementAt(j).toPlainTextString().trim();
  767. resultsContent = resultsContent.replaceAll("\n","");
  768. resultsContent = resultsContent.replaceAll("\t","");
  769. resultsContent = resultsContent.replaceAll("&nbsp;","");
  770. resultsContent = resultsContent.replaceAll("SaveMap","");
  771. while ( resultsContent.contains(" ") ) {
  772. resultsContent = resultsContent.replaceAll(" "," ");
  773. }
  774. //System.out.println( resultsContent );
  775. String[] pieces = trList.elementAt(j).toHtml().split("href=\"");
  776. String[] piecesII = pieces[1].split("\"");
  777. String anchorHref = piecesII[0].trim();
  778. anchorHref = anchorHref;
  779. anchorHref = "http://www.homepath.com" + anchorHref;
  780. if ( !resultsContent.equals("") ) {
  781. try {
  782. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'http://www.homepath.com/', '" + anchorHref + "', '" + resultsContent + "' )" );
  783. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  784. }
  785. }
  786. }
  787. }
  788. }
  789. } catch ( ParserException pe ) { pe.printStackTrace(); }
  790. }
  791. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  792. }
  793.  
  794. public static void HomeSalesGov() {
  795. if ( searchState.equals("ALL" ) ) {
  796. return;
  797. }
  798. String msg = "selectedCities=" + URLEncoder.encode("ALL") + "&selBedrooms=&selBaths=&pageAction=" + URLEncoder.encode("Search for Properties") + "&stateName=" + URLEncoder.encode( stateMap.get(searchState) ) + "&state=" + URLEncoder.encode(searchState) + "&propertyType=" + URLEncoder.encode("RESIDENTIAL");
  799. try {
  800. URL theUrl = new URL("http://www.homesales.gov/homesales/mainAction.do");
  801. HttpURLConnection conn = (HttpURLConnection) theUrl.openConnection();
  802. conn.setDoOutput(true);
  803. conn.setRequestMethod("POST");
  804. conn.setRequestProperty("Content-Type","application/x-www-form-urlencoded");
  805. conn.setRequestProperty("User-Agent","Mozilla/5.0");
  806. OutputStream os = conn.getOutputStream();
  807. os.write(msg.getBytes("UTF-8"));
  808. os.close();
  809.  
  810. BufferedReader myReader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
  811. String line;
  812. String resp = "";
  813. while ((line = myReader.readLine())!= null) {
  814. resp = resp.concat(line);
  815. }
  816. myReader.close();
  817.  
  818. int startIdx = resp.indexOf("<table width=\"80%\" align=\"center\" border=\"0\" rules=\"none\">");
  819. int endIdx = resp.indexOf(" Total Result Pages:");
  820.  
  821. if ( startIdx > 0 && endIdx > startIdx ) {
  822. try {
  823. Parser prs = new Parser();
  824. prs.setInputHTML(resp.substring(startIdx,endIdx));
  825.  
  826. NodeList nList = prs.parse(null);
  827. for ( int i=0; i<nList.size(); i++ ) {
  828. if ( nList.elementAt(i).getText().startsWith("table width=\"80%\"") ) {
  829. NodeList trList = nList.elementAt(i).getChildren();
  830. for ( int j=0; j<trList.size(); j++ ) {
  831. if ( trList.elementAt(j).getText().equals("tr") ) {
  832. String resultsContent = trList.elementAt(j).toPlainTextString();
  833. resultsContent = resultsContent.replaceAll("&nbsp;","");
  834. resultsContent = resultsContent.replaceAll("\n","");
  835. resultsContent = resultsContent.replaceAll("\t","");
  836. while ( resultsContent.contains(" ") ) {
  837. resultsContent = resultsContent.replaceAll(" "," ");
  838. }
  839.  
  840. String[] pieces = trList.elementAt(j).toHtml().split("href=\"");
  841. if ( pieces.length > 1 ) {
  842. String[] piecesII = pieces[1].split("\"");
  843. if ( piecesII.length > 0 ) {
  844. String anchorHref = piecesII[0].trim();
  845. anchorHref = anchorHref;
  846. try {
  847. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'http://www.homepath.com/', '" + anchorHref + "', '" + resultsContent + "' )" );
  848. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  849. }
  850. }
  851. }
  852. }
  853. }
  854. }
  855. } catch ( ParserException pe ) { pe.printStackTrace(); }
  856. }
  857. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  858. }
  859.  
  860. public static void HudHomeStore() {
  861. if ( searchState.equals("ALL" ) ) {
  862. return;
  863. }
  864.  
  865. String message = "https://hudhomestore.secureportalk.net/HUD/PropertySearchResult.aspx?PageId=1&zipCode=&city=&county=&sState="+searchState+"&fromPrice=0&toPrice=0&caseNumber=&bed=0&bath=0&street=&buyerType=0&specialProgram=&Status=0&OrderbyName=SCASENUMBER&OrderbyValue=ASC&sPageSize=50";
  866. try {
  867. URL ubl = new URL( message );
  868. String resp = "";
  869. BufferedReader reader = new BufferedReader(new InputStreamReader(ubl.openStream()));
  870. String line;
  871. while ((line = reader.readLine()) != null) {
  872. resp += line;
  873. }
  874. reader.close();
  875.  
  876. int startIdx = resp.indexOf("<tr class=\"FormTablerow\"");
  877. int endIdx = resp.indexOf("<!-- ============= Data List");
  878.  
  879. try {
  880. Parser p = new Parser();
  881. p.setInputHTML(resp.substring(startIdx,endIdx));
  882. NodeList nList = p.parse(null);
  883. for ( int i=0; i<nList.size(); i++ ) {
  884. if ( nList.elementAt(i).getText().startsWith("tr class=\"FormTablerow") ) {
  885. String resultsContent = nList.elementAt(i).toPlainTextString();
  886. resultsContent = resultsContent.replaceAll("&nbsp;","");
  887. resultsContent = resultsContent.replaceAll("\n","");
  888. resultsContent = resultsContent.replaceAll("\t","");
  889. resultsContent = resultsContent.replaceAll("Exclusive","");
  890. resultsContent = resultsContent.replaceAll("View","");
  891. resultsContent = resultsContent.replaceAll("Street","");
  892. resultsContent = resultsContent.replaceAll("Map it","");
  893. resultsContent = resultsContent.replaceAll("Email","");
  894. resultsContent = resultsContent.replaceAll("Info","");
  895. resultsContent = resultsContent.replaceAll("Extended","");
  896. resultsContent = resultsContent.replaceAll("Lottery","");
  897. while ( resultsContent.contains(" ") ) {
  898. resultsContent = resultsContent.replaceAll(" "," ");
  899. }
  900. //System.out.println( resultsContent );
  901. NodeList tdList = nList.elementAt(i).getChildren();
  902. for ( int j=0; j<tdList.size(); j++ ) {
  903. if ( tdList.elementAt(j).getText().startsWith("td align=\"center\" valign=\"middle\"") ) {
  904. NodeList aList = tdList.elementAt(j).getChildren();
  905. for ( int k=0; k<aList.size(); k++ ) {
  906. if ( aList.elementAt(k).getText().startsWith("a href=\"#;\"") ) {
  907. String[] pieces = aList.elementAt(k).getText().split(Pattern.quote("getGoogleTranslationstring(&#039;"));
  908. if ( pieces.length > 0 ) {
  909. String[] piecesII = pieces[1].split(Pattern.quote("&#039;"));
  910. if ( piecesII.length > 0 ) {
  911. String anchorHref = piecesII[0].trim();
  912. try {
  913. System.out.println( "INSERT INTO auction_records VALUES ( 'http://www.hudhomestore.com/', '" + anchorHref + "', '" + resultsContent + "' )" );
  914. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'http://www.hudhomestore.com/', '" + anchorHref + "', '" + resultsContent + "' )" );
  915. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  916. }
  917. }
  918. }
  919. }
  920. }
  921. }
  922. }
  923. }
  924. } catch ( ParserException pe ) { pe.printStackTrace(); }
  925. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  926. }
  927.  
  928. public static void GSAGov() {
  929. if ( searchState.equals("ALL" ) ) {
  930. return;
  931. }
  932.  
  933. String message = "https://extportal.pbs.gsa.gov/ResourceCenter/PRHomePage/searchProperty.do?state=5&amp;statename=" + stateMap.get(searchState).toUpperCase() + "&amp;propType=&amp;propTypeName=ALL&amp;searchType=S";
  934. try {
  935. URL ubl = new URL( message );
  936. String resp = "";
  937. BufferedReader reader = new BufferedReader(new InputStreamReader(ubl.openStream()));
  938. String line;
  939. while ((line = reader.readLine()) != null) {
  940. resp += line;
  941. }
  942. reader.close();
  943.  
  944. int startIdx = resp.indexOf("<div class=\"cmSectionText\">");
  945. int endIdx = resp.indexOf("<!-- div cmSectionText -->");
  946.  
  947. if ( startIdx > 0 && endIdx > startIdx ) {
  948. Parser p = new Parser();
  949. try {
  950. p.setInputHTML( resp.substring(startIdx,endIdx) );
  951. NodeList nList = p.parse(null);
  952.  
  953. for ( int i=0; i<nList.size(); i++ ) {
  954. if ( nList.elementAt(i).getText().startsWith("div class=\"cmSectionText\"") ) {
  955. NodeList divChildren = nList.elementAt(i).getChildren();
  956. for ( int j=0; j<divChildren.size(); j++ ) {
  957. if ( divChildren.elementAt(j).getText().contains("D1D1D1") ) {
  958. String resultsContent = divChildren.elementAt(j).toPlainTextString().trim();
  959. resultsContent = resultsContent.replaceAll("Click here for more information","");
  960. resultsContent = resultsContent.replaceAll("&nbsp;","");
  961. resultsContent = resultsContent.replaceAll("\n","");
  962. resultsContent = resultsContent.replaceAll("\t","");
  963. resultsContent = resultsContent.replaceAll("\"","");
  964. while ( resultsContent.contains(" ") ) {
  965. resultsContent = resultsContent.replaceAll(" "," ");
  966. }
  967. //System.out.println( resultsContent );
  968. String htm = divChildren.elementAt(j).toHtml();
  969. //System.out.println( htm );
  970. int hrefIdx = htm.indexOf("loadProperty.do");
  971. if ( hrefIdx > 0 ) {
  972. String[] pieces = htm.substring(hrefIdx,hrefIdx+34).split(">");
  973. if ( pieces.length > 0 ) {
  974. String anchorHref = pieces[0].substring(0,pieces[0].length()-1);
  975. anchorHref = "https://extportal.pbs.gsa.gov/ResourceCenter/PRHomePage/" + anchorHref;
  976. try {
  977. //System.out.println( "INSERT INTO auction_records VALUES ( 'https://propertydisposal.gsa.gov/', '" + anchorHref + "', '" + resultsContent + "' )" );
  978. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'https://propertydisposal.gsa.gov/', '" + anchorHref + "', '" + resultsContent + "' )" );
  979. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  980. }
  981. }
  982. }
  983. }
  984. }
  985. }
  986. } catch ( ParserException pe ) { pe.printStackTrace(); }
  987. }
  988. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  989. }
  990.  
  991. public static void GovSales() {
  992. String msg = "http://www.govsales.gov/fassys/fassys/?function=010000000000";
  993.  
  994. try {
  995. URL ubl = new URL( msg );
  996. String resp = "";
  997. BufferedReader reader = new BufferedReader(new InputStreamReader(ubl.openStream()));
  998. String line;
  999. while ((line = reader.readLine()) != null) {
  1000. resp += line;
  1001. }
  1002. reader.close();
  1003.  
  1004. String scParam = "";
  1005. int scParamIdx = resp.indexOf("name=scParam VALUE=\"");
  1006. if ( scParamIdx > 0 ) {
  1007. scParam = resp.substring(scParamIdx, scParamIdx+80);
  1008. String[] pieces = scParam.split("VALUE=\"");
  1009. if ( pieces.length > 1 ) {
  1010. String[] piecesII = pieces[1].split("\"");
  1011. if ( piecesII.length > 1 ) {
  1012. scParam = piecesII[0].trim();
  1013. }
  1014. }
  1015. }
  1016. String webPcm = "";
  1017. int webPcmIdx = resp.indexOf("WEBPCMTRANSID");
  1018. if ( webPcmIdx > 0 ) {
  1019. webPcm = resp.substring(webPcmIdx, webPcmIdx+40);
  1020. String[] pieces = webPcm.split("VALUE=");
  1021. if ( pieces.length > 1 ) {
  1022. String[] piecesII = pieces[1].split("\"");
  1023. if ( piecesII.length > 1 ) {
  1024. webPcm = piecesII[1].trim();
  1025. }
  1026. }
  1027. }
  1028.  
  1029. String msg2 = "scParam=" + URLEncoder.encode( scParam ) + "&scCurTabCat=" + URLEncoder.encode("010000000000") + "&scSelTabCat=" + URLEncoder.encode("010000000000") + "&scSelLink=" + URLEncoder.encode("GS") + "&scGSName=" + URLEncoder.encode( searchString.toUpperCase() ) + "&scGSOptn=" + URLEncoder.encode("1") + "&scSelState=&scSelBCCat=&scSelRow=&scSelCatList=&scSeeAllCat=&WEBPCMTRANSID=" + URLEncoder.encode( webPcm );
  1030.  
  1031. URL usl = new URL("http://www.govsales.gov/fassys/fasallcat/");
  1032. HttpURLConnection conn = (HttpURLConnection) usl.openConnection();
  1033. conn.setDoOutput(true);
  1034. conn.setRequestMethod("POST");
  1035. conn.setRequestProperty("Content-Type","application/x-www-form-urlencoded");
  1036. conn.setRequestProperty("User-Agent","Mozilla/5.0");
  1037. OutputStream os = conn.getOutputStream();
  1038. os.write(msg2.getBytes("UTF-8"));
  1039. os.close();
  1040.  
  1041. BufferedReader lecteur = new BufferedReader(new InputStreamReader(conn.getInputStream()));
  1042. String rayo;
  1043. String resp2 = "";
  1044. while ((rayo = lecteur.readLine())!= null) {
  1045. resp2 += rayo;
  1046. }
  1047. lecteur.close();
  1048.  
  1049. scParam = "";
  1050. scParamIdx = resp2.indexOf("name=scParam VALUE=\"");
  1051. if ( scParamIdx > 0 ) {
  1052. scParam = resp2.substring(scParamIdx, scParamIdx+80);
  1053. String[] pieces = scParam.split("VALUE=\"");
  1054. if ( pieces.length > 1 ) {
  1055. String[] piecesII = pieces[1].split("\"");
  1056. if ( piecesII.length > 1 ) {
  1057. scParam = piecesII[0].trim();
  1058. }
  1059. }
  1060. }
  1061. String scStoreLocal1 = "";
  1062. int scSL1Idx = resp2.indexOf("name=scStoreLocal1");
  1063. if ( scSL1Idx > 0 ) {
  1064. scStoreLocal1 = resp2.substring(scSL1Idx, scSL1Idx+160);
  1065. String[] pieces = scStoreLocal1.split("VALUE=\"");
  1066. if ( pieces.length > 1 ) {
  1067. String[] piecesII = pieces[1].split("\"");
  1068. if ( piecesII.length > 1 ) {
  1069. scStoreLocal1 = piecesII[0].trim();
  1070. }
  1071. }
  1072. }
  1073. String scStoreLocal2 = "";
  1074. int scSL2Idx = resp2.indexOf("name=scStoreLocal2");
  1075. if ( scSL1Idx > 0 ) {
  1076. scStoreLocal2 = resp2.substring(scSL2Idx, scSL2Idx+160);
  1077. String[] pieces = scStoreLocal2.split("VALUE=\"");
  1078. if ( pieces.length > 1 ) {
  1079. String[] piecesII = pieces[1].split("\"");
  1080. if ( piecesII.length > 1 ) {
  1081. scStoreLocal2 = piecesII[0];
  1082. }
  1083. }
  1084. }
  1085. String scStoreLocal3 = "";
  1086. int scSL3Idx = resp2.indexOf("name=scStoreLocal3");
  1087. if ( scSL3Idx > 0 ) {
  1088. scStoreLocal3 = resp2.substring(scSL3Idx, scSL3Idx+160);
  1089. String[] pieces = scStoreLocal3.split("VALUE=\"");
  1090. if ( pieces.length > 1 ) {
  1091. String[] piecesII = pieces[1].split("\"");
  1092. if ( piecesII.length > 1 ) {
  1093. scStoreLocal3 = piecesII[0];
  1094. }
  1095. }
  1096. }
  1097. String scStoreLocal5 = "";
  1098. int scSL5Idx = resp2.indexOf("name=scStoreLocal5");
  1099. if ( scSL5Idx > 0 ) {
  1100. scStoreLocal5 = resp2.substring(scSL5Idx, scSL5Idx+160);
  1101. String[] pieces = scStoreLocal5.split("VALUE=\"");
  1102. if ( pieces.length > 1 ) {
  1103. String[] piecesII = pieces[1].split("\"");
  1104. if ( piecesII.length > 1 ) {
  1105. scStoreLocal5 = piecesII[0];
  1106. }
  1107. }
  1108. }
  1109. webPcm = "";
  1110. webPcmIdx = resp2.indexOf("WEBPCMTRANSID");
  1111. if ( webPcmIdx > 0 ) {
  1112. webPcm = resp2.substring(webPcmIdx, webPcmIdx+40);
  1113. String[] pieces = webPcm.split("VALUE=");
  1114. if ( pieces.length > 1 ) {
  1115. String[] piecesII = pieces[1].split("\"");
  1116. if ( piecesII.length > 1 ) {
  1117. webPcm = piecesII[1].trim();
  1118. }
  1119. }
  1120. }
  1121.  
  1122. String msg3 = "scParam=" + URLEncoder.encode( scParam ) + "&scStoreLocal1=" + URLEncoder.encode( scStoreLocal1 ) + "&scStoreLocal2=" + URLEncoder.encode( scStoreLocal2 ) + "&scStoreLocal3=" + URLEncoder.encode( scStoreLocal3 ) + "&scStoreLocal4=&scStoreLocal5=" + URLEncoder.encode( scStoreLocal5 ) + "&scStoreLocal6=&scStoreLocal7=&scStoreLocal8=&scStoreLocal9=&scStoreLocal10=&scStoreLocal11=&scStoreLocal12=&scCurTabCat=&scSelTabCat=&scSelLink=&scGSName=&scGSOptn=" + URLEncoder.encode("1") + "&scSelState=&scSelRow=" + URLEncoder.encode("2G1WB58K381263163") + "&scSortOrder=&scPageNo=" + URLEncoder.encode("1") + "&WEBPCMTRANSID=" + URLEncoder.encode("0690484,2970121");
  1123.  
  1124. //System.out.println( msg3 );
  1125.  
  1126. int startIdx = resp2.indexOf("var tab=");
  1127. int endIdx = resp2.indexOf("bldTr();");
  1128.  
  1129. String dataBlock = resp2.substring(startIdx,endIdx).replaceAll(Pattern.quote("["),"").replaceAll(Pattern.quote("];"),"");
  1130. dataBlock = dataBlock.replaceAll("var tab=\"","");
  1131. dataBlock = dataBlock.replaceAll(" "," ");
  1132. //System.out.println( dataBlock );
  1133. String[] dataBlockPieces = dataBlock.split("\", \"");
  1134.  
  1135. ArrayList<String> resultsContentStrings = new ArrayList<String>();
  1136. ArrayList<String> urlIdList = new ArrayList<String>();
  1137.  
  1138. for ( int i=0; i<dataBlockPieces.length; i+=12 ) {
  1139. String content = "";
  1140. for ( int j=0; j<12; j++ ) {
  1141. content += dataBlockPieces[i+j] + " ";
  1142. }
  1143. resultsContentStrings.add( content );
  1144. urlIdList.add( dataBlockPieces[i+1] );
  1145. }
  1146.  
  1147. ArrayList<String> msgs = new ArrayList<String>();
  1148.  
  1149. for ( String urlId : urlIdList ) {
  1150. msgs.add( "scParam=" + URLEncoder.encode( scParam ) + "&scStoreLocal1=" + URLEncoder.encode( scStoreLocal1 ) + "&scStoreLocal2=" + URLEncoder.encode( scStoreLocal2 ) + "&scStoreLocal3=" + URLEncoder.encode( scStoreLocal3 ) + "&scStoreLocal4=&scStoreLocal5=" + URLEncoder.encode( scStoreLocal5 ) + "&scStoreLocal6=&scStoreLocal7=&scStoreLocal8=&scStoreLocal9=&scStoreLocal10=&scStoreLocal11=&scStoreLocal12=&scCurTabCat=&scSelTabCat=&scSelLink=&scGSName=&scGSOptn=" + URLEncoder.encode("1") + "&scSelState=&scSelRow=" + URLEncoder.encode( urlId ) + "&scSortOrder=&scPageNo=" + URLEncoder.encode("1") + "&WEBPCMTRANSID=" + URLEncoder.encode("0690484,2970121") );
  1151. }
  1152.  
  1153. ArrayList<String> urlList = new ArrayList<String>();
  1154.  
  1155. for ( String msg1 : msgs ) {
  1156. urlList.add( "javascript:jQuery.post( &quot;http://www.govsales.gov/fassys/fassrchlist/&quot;, &quot;" + msg1 + "&quot; )" );
  1157. }
  1158.  
  1159. for ( int i=0; i<resultsContentStrings.size()-1; i++ ) {
  1160. try {
  1161. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'http://www.govsales.gov/', '" + urlList.get(i) + "', '" + resultsContentStrings.get(i) + "' )" );
  1162. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  1163. }
  1164.  
  1165. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  1166.  
  1167. }
  1168.  
  1169. public static void LonestarOnline() {
  1170. String msg = "search_type=title_search&search_name=Title+%26+Description+Search&search_text=" + searchString + "&phrase_match=any&category=-1&search_span=title&search_limit=active&order_by=title&sort_order=ASC";
  1171.  
  1172. try {
  1173. URL theUrl = new URL( "http://www.lonestaronline.com/search/search_results.cfm?" + msg );
  1174. String resp = "";
  1175. BufferedReader reader = new BufferedReader(new InputStreamReader(theUrl.openStream()));
  1176. String line;
  1177. while ((line = reader.readLine()) != null) {
  1178. resp += line;
  1179. }
  1180. reader.close();
  1181.  
  1182. int startIdx = resp.indexOf("<table border=0 cellspacing=0 cellpadding=2 noshade width=780>");
  1183. int endIdx = resp.indexOf("<form name=\"blah\"");
  1184. if ( startIdx > 0 && endIdx > startIdx ) {
  1185. //System.out.println( startIdx + " " + endIdx );
  1186.  
  1187. Parser psr = new Parser();
  1188. try {
  1189. //System.out.println( resp.substring( startIdx, endIdx ) );
  1190. psr.setInputHTML( resp.substring(startIdx,endIdx) );
  1191. NodeList nList = psr.parse(null);
  1192.  
  1193. for ( int i=0; i<nList.size(); i++ ) {
  1194. if ( nList.elementAt(i).getText().equals("table border=0 cellspacing=0 cellpadding=2 noshade width=780") ) {
  1195. NodeList trList = nList.elementAt(i).getChildren();
  1196. for ( int j=0; j<trList.size(); j++ ) {
  1197. if ( trList.elementAt(j).getText().startsWith("tr") ) {
  1198. String resultsContent = trList.elementAt(j).toPlainTextString();
  1199. resultsContent = resultsContent.replaceAll("&nbsp;","");
  1200. resultsContent = resultsContent.replaceAll("\n","");
  1201. resultsContent = resultsContent.replaceAll("\t","");
  1202. resultsContent = resultsContent.replaceAll("\"","");
  1203. resultsContent = resultsContent.replaceAll("'","");
  1204. while ( resultsContent.contains(" ") ) {
  1205. resultsContent = resultsContent.replaceAll(" "," ");
  1206. }
  1207. NodeList tdList = trList.elementAt(j).getChildren();
  1208. for ( int k=0; k<tdList.size(); k++ ) {
  1209. if ( tdList.elementAt(k).getText().equals("td width=\"496\"") ) {
  1210. String tdContent = tdList.elementAt(k).toHtml();
  1211. String[] pieces = tdContent.split("href=\"");
  1212. if ( pieces.length > 1 ) {
  1213. String[] piecesII = pieces[1].split("\"");
  1214. String anchorHref = "http://www.lonestaronline.com" + piecesII[0].trim();
  1215. try {
  1216. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'http://www.lonestaronline.com', '" + anchorHref + "', '" + resultsContent + "' )" );
  1217. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  1218. }
  1219. }
  1220. }
  1221. }
  1222. }
  1223. }
  1224. }
  1225. } catch ( ParserException pe ) { pe.printStackTrace(); }
  1226. }
  1227. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  1228. }
  1229.  
  1230. public static void GovDeals() {
  1231. String msg = "";
  1232. if ( searchState.equals("ALL") ) {
  1233. msg = "fa=Main.AdvSearchResults&timing1=&mystate=&myseller=0&myselectbox=00&desc=" + searchString + "&inv_nbr=";
  1234. }
  1235. else {
  1236. msg = "fa=Main.AdvSearchResults&timing1=&mystate=" + searchState + "&myseller=0&myselectbox=00&desc=" + searchString + "&inv_nbr=";
  1237. }
  1238.  
  1239. try {
  1240. URL ubl = new URL( "http://www.govdeals.com/index.cfm?" + msg );
  1241. String resp = "";
  1242. BufferedReader reader = new BufferedReader(new InputStreamReader(ubl.openStream()));
  1243. String line;
  1244. while ((line = reader.readLine()) != null) {
  1245. resp += line;
  1246. }
  1247. reader.close();
  1248.  
  1249. int startIdx = resp.indexOf("<tr bgcolor=\"#CCCCCC\">");
  1250. int endIdx = resp.indexOf("<td colspan=\"9\" style=\"border-bottom:none\" scope=\"row\">");
  1251.  
  1252. if ( startIdx > 0 && endIdx > startIdx ) {
  1253. // System.out.println( startIdx + " " + endIdx );
  1254. Parser prse = new Parser();
  1255. try {
  1256. prse.setInputHTML( resp.substring( startIdx, endIdx ) );
  1257. NodeList nList = prse.parse(null);
  1258. for ( int i=0; i<nList.size(); i++ ) {
  1259. if ( nList.elementAt(i).getText().startsWith("tr valign=\"top\"") ) {
  1260. String resultsContent = nList.elementAt(i).toPlainTextString();
  1261. resultsContent = resultsContent.replaceAll("&nbsp;","");
  1262. resultsContent = resultsContent.replaceAll("\n","");
  1263. resultsContent = resultsContent.replaceAll("\t","");
  1264. resultsContent = resultsContent.replaceAll("\"","");
  1265. resultsContent = resultsContent.replaceAll("'","");
  1266. resultsContent = resultsContent.replaceAll("View by same:CategoryLocationMake/BrandModelProximity-------------Terms & ConditionsView this Item","");
  1267. while ( resultsContent.contains(" ") ) {
  1268. resultsContent = resultsContent.replaceAll(" "," ");
  1269. }
  1270. resultsContent = resultsContent.replaceAll("0Bids","0 Bids");
  1271. //System.out.println( resultsContent );
  1272. NodeList tdList = nList.elementAt(i).getChildren();
  1273. for ( int j=0; j<tdList.size(); j++ ) {
  1274. if ( tdList.elementAt(j).getText().equals("td valign=\"top\" nowrap=\"nowrap\"") ) {
  1275. String[] pieces = tdList.elementAt(j).toHtml().split("href=\"");
  1276. if ( pieces.length > 1 ) {
  1277. String[] piecesII = pieces[1].split("\"");
  1278. String anchorHref = "http://www.govdeals.com/" + piecesII[0];
  1279. try {
  1280. stmt.executeUpdate( "INSERT INTO auction_records VALUES ( 'http://www.govdeals.com/', '" + anchorHref + "', '" + resultsContent + "' )" );
  1281. } catch ( SQLException sqle ) { sqle.printStackTrace(); }
  1282. }
  1283. }
  1284. }
  1285. }
  1286. }
  1287. } catch ( ParserException pe ) { pe.printStackTrace(); }
  1288. }
  1289. } catch ( IOException ioe ) { ioe.printStackTrace(); }
  1290. }
  1291.  
  1292. public static void IllinoisIbid() {
  1293. String msg = "search=1&catid=&SearchStr=" + searchString + "&AllAnyExact=all&Region1=&Region2=&Region3=&Region4=&customs_criteria=1&cfs_txt_EqNum=&cfs_txt_VIN=&cfs_int_min_Odometer=&cfs_int_max_Odometer=&cfs_txt_make=&cfs_txt_model=&cfs_txt_modelyear=&PriceFrom=&PriceTo=&StartFrom=&StartTo=&EndFrom=&EndTo=&ExcludeStr=&OrderBy=end_asc&MaxResults=50&sbmtAdvSearch=Go#results";
  1294. try {
  1295. URL ubl = new URL( "http://ibid.illinois.gov/advancedsearch.asp?" + msg );
  1296. String resp = "";
  1297. BufferedReader lecteur = new BufferedReader(new InputStreamReader(ubl.openStream()));
  1298. String line;
  1299. while ((line = lecteur.readLine()) != null) {
  1300. resp += line;
  1301. }
  1302. lecteur.close();
  1303.  
  1304. int startIdx = resp.indexOf( "<div id=\"SearchResults\">");
  1305. int endIdx = resp.indexOf( "<div id=\"PageLinksS\">" );
  1306. //System.out.println( startIdx + " " + endIdx );
  1307. if ( startIdx > 0 && endIdx > startIdx ) {
  1308. Parser prs = new Parser();
  1309. try {
  1310. prs.setInputHTML( resp.substring(startIdx,endIdx) );
  1311. NodeList nList = prs.parse(null);
  1312.  
  1313. for ( int i=0; i<nList.size(); i++ ) {
  1314. if ( nList.elementAt(i).getText().equals("div id=\"SearchResults\"") ) {
  1315. NodeList tblList = nList.elementAt(i).getChildren();
  1316. for ( int j=0; j<tblList.size(); j++ ) {
  1317. if ( tblList.elementAt(j).getText().equals("table cellpadding=\"2\" cellspacing=\"1\"") ) {
  1318. NodeList trList = tblList.elementAt(j).getChildren();
  1319. for ( int k=0; k<trList.size(); k++ ) {
  1320. if ( trList.elementAt(k).getText().startsWith("tr class=\"Color") ) {
  1321. String resultsContent = trList.elementAt(k).toPlainTextString();
  1322. resultsContent = resultsContent.replaceAll("&nbsp;","");
  1323. resultsContent = resultsContent.replaceAll("\n","");
  1324. resultsContent = resultsContent.replaceAll("\t","");
  1325. resultsContent = resultsContent.replaceAll("\"","");
  1326. resultsContent = resultsContent.replaceAll("'","");
  1327. r

Report this snippet  

You need to login to post a comment.