Posted By

danfsmith on 08/25/11


Tagged

test html content


Versions (?)

Check destinations of a list of links in a document for a string


 / Published in: Windows PowerShell
 

  1. param ($url, $tofind, $agilitypackpath="f:\dan\tools\html-agility-pack\HtmlAgilityPack.dll", $xpath="//a")
  2.  
  3. add-type -Path $agilitypackpath
  4. $client = New-Object System.Net.WebClient
  5. $contents = $client.DownloadString($url)
  6.  
  7. $doc = New-Object HtmlAgilityPack.HtmlDocument
  8. $htmldoc = $doc.LoadHtml($contents)
  9.  
  10. $linknodes = $doc.DocumentNode.SelectNodes($xpath)
  11. foreach ($node in $linknodes)
  12. {
  13. $link = $node.GetAttributeValue("href", "")
  14. if ($link)
  15. {
  16. $c = $client.DownloadString($link)
  17. if ($c.Contains($tofind))
  18. {
  19. $isfound = "true";
  20. }
  21. else
  22. {
  23. $isfound = "false";
  24. }
  25. New-Object PsObject -Property @{Url = $link; IsFound = $isfound;}
  26. }
  27. }

Report this snippet  

You need to login to post a comment.