Posted By

Abe on 10/01/10


Tagged

scraping scrap


Versions (?)

Scraping Links


 / Published in: PHP
 

URL: http://www.merchantos.com/makebeta/php/scraping-links-with-php/

  1. <?php
  2.  
  3. $target_url = "http://www.merchantos.com/";
  4. $userAgent = 'Googlebot/2.1 (http://www.googlebot.com/bot.html)';
  5.  
  6. // make the cURL request to $target_url
  7. $ch = curl_init();
  8. curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
  9. curl_setopt($ch, CURLOPT_URL,$target_url);
  10. curl_setopt($ch, CURLOPT_FAILONERROR, true);
  11. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  12. curl_setopt($ch, CURLOPT_AUTOREFERER, true);
  13. curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
  14. curl_setopt($ch, CURLOPT_TIMEOUT, 10);
  15. $html= curl_exec($ch);
  16. if (!$html) {
  17. echo "<br />cURL error number:" .curl_errno($ch);
  18. echo "<br />cURL error:" . curl_error($ch);
  19. }
  20.  
  21. // parse the html into a DOMDocument
  22. $dom = new DOMDocument();
  23. @$dom->loadHTML($html);
  24.  
  25. // grab all the on the page
  26. $xpath = new DOMXPath($dom);
  27. $hrefs = $xpath->evaluate("/html/body//a");
  28.  
  29. for ($i = 0; $i < $hrefs->length; $i++) {
  30. $href = $hrefs->item($i);
  31. $url = $href->getAttribute('href');
  32. echo "<br />Link: $url";
  33. }
  34. ?>

Report this snippet  

You need to login to post a comment.