Revision: 49316
Updated Code
at July 19, 2011 22:10 by danfsmith
Updated Code
add-type -Path c:\dan\tools\html-agility-pack\HtmlAgilityPack.dll $files = Get-ChildItem -Filter *.htm -Path C:\Path\ -Recurse $doc = New-Object HtmlAgilityPack.HtmlDocument $result = $files | % { #Write-Host "Checking $_" $name = $_.FullName.Replace("FILEPATH","WEBPATH").Replace("\", "/") #Get second folder of URL as "section" $sections = $name.Split("/") $section = $sections[3] if ($section.Contains(".htm")) { $section = "" } $htmldoc = $doc.Load($_.FullName) $titlenode = $doc.DocumentNode.SelectSingleNode("//title") $descriptionnode = $doc.DocumentNode.SelectSingleNode("//meta[@name='description']") if ($descriptionnode) { $description = $descriptionnode.GetAttributeValue("content", "") } else { $description = "" } $title = $titlenode.InnerText New-Object PsObject -Property @{ Name = $name; Section=$section; Title=$title; Description=$description;} | Select Name, Section, Title, Description } $result | Sort Section, Name
Revision: 49315
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at July 19, 2011 22:05 by danfsmith
Initial Code
add-type -Path c:\dan\tools\html-agility-pack\HtmlAgilityPack.dll $files = Get-ChildItem -Filter *.htm -Path C:\WaldenWWW-HTM\ -Recurse $doc = New-Object HtmlAgilityPack.HtmlDocument $result = $files | % { #Write-Host "Checking $_" $name = $_.FullName.Replace("C:\WaldenWWW-HTM\","http://www.waldenu.edu/").Replace("\", "/") $sections = $name.Split("/") $section = $sections[3] if ($section.Contains(".htm")) { $section = "" } $htmldoc = $doc.Load($_.FullName) $titlenode = $doc.DocumentNode.SelectSingleNode("//title") $descriptionnode = $doc.DocumentNode.SelectSingleNode("//meta[@name='description']") if ($descriptionnode) { $description = $descriptionnode.GetAttributeValue("content", "") } else { $description = "" } $title = $titlenode.InnerText New-Object PsObject -Property @{ Name = $name; Section=$section; Title=$title; Description=$description;} | Select Name, Section, Title, Description } $result | Sort Section, Name
Initial URL
Initial Description
Initial Title
Get Title and MetaData from HTML files
Initial Tags
html
Initial Language
Windows PowerShell