Posted By

jerryvig on 10/18/11


Tagged

test groovy yelp


Versions (?)

YelpTest


 / Published in: Groovy
 

  1. import com.gargoylesoftware.htmlunit.WebClient
  2. import com.gargoylesoftware.htmlunit.BrowserVersion
  3. import com.gargoylesoftware.htmlunit.html.HtmlPage
  4. import java.util.regex.Pattern
  5. import org.json.JSONObject
  6.  
  7. def fh = new File("./output.html")
  8. def fh2 = new File("./YelpTaxis.csv")
  9. fh2.write( "\"Name\",\"Phone\",\"URL\"\n" )
  10.  
  11. def wc = new WebClient( BrowserVersion.FIREFOX_3 )
  12. wc.setJavaScriptEnabled( false )
  13.  
  14. def wcII = new WebClient( BrowserVersion.FIREFOX_3 )
  15. wcII.setJavaScriptEnabled( false )
  16.  
  17. for ( def start=0; start<800; start+=40 ) {
  18. def urlString = "http://www.yelp.com/search/snippet?attrs=&cflt=&find_desc=taxi&find_loc=New+York+NY&mapsize=small&rpp=40&show_filters=
  19. 1&sortby=best_match&start=${start}"
  20. def page = wc.getPage( urlString )
  21. def jsonContent = page.getWebResponse().getContentAsString().trim()
  22. def jsonObj = new JSONObject( jsonContent )
  23. def nameArray = jsonObj.names()
  24. def valArray = jsonObj.toJSONArray( nameArray );
  25. fh.write( "<html><body>" + valArray.getString(0) + "</body></html>" )
  26.  
  27. def htmPage = wcII.getPage( "file:///tmp/EstherKestenbaum/output.html" )
  28. def divList = htmPage.getDocumentElement().getHtmlElementsByTagName("div")
  29. divList.each { div ->
  30. if ( div.getAttribute("class").equals("businessresult clearfix") ) {
  31. def name = ""; def phone = ""; def url = "";
  32. def subDivs = div.getHtmlElementsByTagName("div")
  33. subDivs.each { sub ->
  34. if ( sub.getAttribute("class").equals("leftcol") ) {
  35. def aList = sub.getHtmlElementsByTagName("a")
  36. url = "http://www.yelp.com" + aList[0].getAttribute("href").trim()
  37. name = aList[0].asText().trim()
  38. }
  39. if ( sub.getAttribute("class").equals("phone") ) {
  40. phone = sub.asText().trim()
  41. }
  42. }
  43. println name;
  44. fh2.append( "\"" + name + "\",\"" + phone + "\",\"" + url + "\"\n" )
  45. }
  46. }
  47. }

Report this snippet  

You need to login to post a comment.