Posted By

sukantahazra on 03/08/10


Tagged

html ruby xml parsing


Versions (?)

HTML processing using nokogiri


 / Published in: Ruby
 

  1. # nokogiri_test.rb
  2. require 'rubygems'
  3. require 'nokogiri'
  4. require 'open-uri'
  5.  
  6. url = "http://www.walmart.com/search/search-ng.do?search_constraint=0&ic=48_0&search_query=batman&Find.x=0&Find.y=0&Find=Find"
  7. doc = Nokogiri::HTML(open(url))
  8. puts doc.at_css("title").text
  9. doc.css(".item").each do |item|
  10. title = item.at_css(".prodLink").text
  11. price = item.at_css(".PriceCompare .BodyS, .PriceXLBold").text[/\$[0-9\.]+/]
  12. puts "#{title} - #{price}"
  13. puts item.at_css(".prodLink")[:href]
  14. end
  15.  
  16. # lib/tasks/product_prices.rake
  17. desc "Fetch product prices"
  18. task :fetch_prices => :environment do
  19. require 'nokogiri'
  20. require 'open-uri'
  21.  
  22. Product.find_all_by_price(nil).each do |product|
  23. url = "http://www.walmart.com/search/search-ng.do?search_constraint=0&ic=48_0&search_query=#{CGI.escape(product.name)}&Find.x=0&Find.y=0&Find=Find"
  24. doc = Nokogiri::HTML(open(url))
  25. price = doc.at_css(".PriceCompare .BodyS, .PriceXLBold").text[/[0-9\.]+/]
  26. product.update_attribute(:price, price)
  27. end
  28. end

Report this snippet  

You need to login to post a comment.