/ Published in: Windows PowerShell
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
add-type -Path c:\dan\tools\html-agility-pack\HtmlAgilityPack.dll $files = Get-ChildItem -Filter *.htm -Path C:\Path\ -Recurse $doc = New-Object HtmlAgilityPack.HtmlDocument $result = $files | % { #Write-Host "Checking $_" #Get second folder of URL as "section" $sections = $name.Split("/") $section = $sections[3] if ($section.Contains(".htm")) { $section = "" } $titlenode = $doc.DocumentNode.SelectSingleNode("//title") $descriptionnode = $doc.DocumentNode.SelectSingleNode("//meta[@name='description']") if ($descriptionnode) { $description = $descriptionnode.GetAttributeValue("content", "") } else { $description = "" } $title = $titlenode.InnerText New-Object PsObject -Property @{ Name = $name; Section=$section; Title=$title; Description=$description;} | Select Name, Section, Title, Description } $result | Sort Section, Name