Revision: 49316
Updated Code
at July 19, 2011 22:10 by danfsmith
Updated Code
add-type -Path c:\dan\tools\html-agility-pack\HtmlAgilityPack.dll
$files = Get-ChildItem -Filter *.htm -Path C:\Path\ -Recurse
$doc = New-Object HtmlAgilityPack.HtmlDocument
$result = $files | % {
#Write-Host "Checking $_"
$name = $_.FullName.Replace("FILEPATH","WEBPATH").Replace("\", "/")
#Get second folder of URL as "section"
$sections = $name.Split("/")
$section = $sections[3]
if ($section.Contains(".htm"))
{
$section = ""
}
$htmldoc = $doc.Load($_.FullName)
$titlenode = $doc.DocumentNode.SelectSingleNode("//title")
$descriptionnode = $doc.DocumentNode.SelectSingleNode("//meta[@name='description']")
if ($descriptionnode) {
$description = $descriptionnode.GetAttributeValue("content", "")
}
else {
$description = ""
}
$title = $titlenode.InnerText
New-Object PsObject -Property @{ Name = $name; Section=$section; Title=$title; Description=$description;} | Select Name, Section, Title, Description
}
$result | Sort Section, Name
Revision: 49315
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at July 19, 2011 22:05 by danfsmith
Initial Code
add-type -Path c:\dan\tools\html-agility-pack\HtmlAgilityPack.dll
$files = Get-ChildItem -Filter *.htm -Path C:\WaldenWWW-HTM\ -Recurse
$doc = New-Object HtmlAgilityPack.HtmlDocument
$result = $files | % {
#Write-Host "Checking $_"
$name = $_.FullName.Replace("C:\WaldenWWW-HTM\","http://www.waldenu.edu/").Replace("\", "/")
$sections = $name.Split("/")
$section = $sections[3]
if ($section.Contains(".htm"))
{
$section = ""
}
$htmldoc = $doc.Load($_.FullName)
$titlenode = $doc.DocumentNode.SelectSingleNode("//title")
$descriptionnode = $doc.DocumentNode.SelectSingleNode("//meta[@name='description']")
if ($descriptionnode) {
$description = $descriptionnode.GetAttributeValue("content", "")
}
else {
$description = ""
}
$title = $titlenode.InnerText
New-Object PsObject -Property @{ Name = $name; Section=$section; Title=$title; Description=$description;} | Select Name, Section, Title, Description
}
$result | Sort Section, Name
Initial URL
Initial Description
Initial Title
Get Title and MetaData from HTML files
Initial Tags
html
Initial Language
Windows PowerShell