Revision: 50500
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at August 25, 2011 00:15 by danfsmith
Initial Code
param ($path, $urlpath)
add-type -Path f:\dan\tools\html-agility-pack\HtmlAgilityPack.dll
$files = Get-ChildItem -Include *.htm,*.aspx -Path $path -Recurse
$doc = New-Object HtmlAgilityPack.HtmlDocument
$result = $files | % {
Write-Host "Checking $_"
$name = $_.FullName.Replace($path,$urlpath).Replace("\", "/")
$htmldoc = $doc.Load($_.FullName)
$linknodes = $doc.DocumentNode.SelectNodes("//a")
if ($linknodes) {
foreach ($node in $linknodes) {
if ($node.GetAttributeValue("href", "").ToLower().Contains("pdf"))
{
Write-Host "Found" $node.GetAttributeValue("href", "")
$pdflink = $node.GetAttributeValue("href", "")
$line = $node.Line
New-Object PsObject -Property @{PdfLink = $pdflink; FileName = $name; LineNumber = $line;}
}
}
}
}
$result | Sort PdfLink
Initial URL
Initial Description
Initial Title
Get All PDF links from HTML files
Initial Tags
html
Initial Language
Windows PowerShell