Revision: 60360
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at November 5, 2012 14:50 by denakitan
Initial Code
using System;
using System.Net;
using System.Collections.Generic;
using System.Linq;
using HtmlAgilityPack;
namespace PixarWebClient
{
public class PixarWebClient
{
public static void Main(string[] args)
{
using (WebClient client = new WebClient())
{
// fetching HTML
string pixarHtml = client.DownloadString("http://en.wikipedia.org/wiki/List_of_Pixar_films");
HtmlDocument document = new HtmlDocument();
document.LoadHtml(pixarHtml);
HtmlNode pixarTable = (from d in document.DocumentNode.Descendants()
where d.Name == "table" && d.Attributes["class"].Value == "sortable wikitable"
select d).First();
IEnumerable<HtmlNode> pixarRows = from d in pixarTable.Descendants() where d.Name == "tr" select d;
// removing first row that contains header information
pixarRows.ElementAt(0).Remove();
foreach (HtmlNode row in pixarRows)
{
IEnumerable<HtmlNode> columns = from d in row.Descendants() where d.Name == "td" select d;
int count = 0;
string title = string.Empty;
foreach (HtmlNode column in columns)
{
if (count > 1)
break;
if (count == 0) {
title = column.Element("i").Element("a").InnerText;
} else {
Console.WriteLine(column.InnerText + " - " + title);
}
count++;
}
}
}
}
}
}
Initial URL
http://htmlagilitypack.codeplex.com/
Initial Description
Shows how to use WebClient class to retrieve HTML from an URL and then to use HtmlAgilityPack to parse it.
Initial Title
.NET - C# - WebClient and HtmlAgilityPack - Fetching and Parsing HTML
Initial Tags
html, Net, c#
Initial Language
C#