Revision: 60360
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at November 5, 2012 14:50 by denakitan
Initial Code
using System; using System.Net; using System.Collections.Generic; using System.Linq; using HtmlAgilityPack; namespace PixarWebClient { public class PixarWebClient { public static void Main(string[] args) { using (WebClient client = new WebClient()) { // fetching HTML string pixarHtml = client.DownloadString("http://en.wikipedia.org/wiki/List_of_Pixar_films"); HtmlDocument document = new HtmlDocument(); document.LoadHtml(pixarHtml); HtmlNode pixarTable = (from d in document.DocumentNode.Descendants() where d.Name == "table" && d.Attributes["class"].Value == "sortable wikitable" select d).First(); IEnumerable<HtmlNode> pixarRows = from d in pixarTable.Descendants() where d.Name == "tr" select d; // removing first row that contains header information pixarRows.ElementAt(0).Remove(); foreach (HtmlNode row in pixarRows) { IEnumerable<HtmlNode> columns = from d in row.Descendants() where d.Name == "td" select d; int count = 0; string title = string.Empty; foreach (HtmlNode column in columns) { if (count > 1) break; if (count == 0) { title = column.Element("i").Element("a").InnerText; } else { Console.WriteLine(column.InnerText + " - " + title); } count++; } } } } } }
Initial URL
http://htmlagilitypack.codeplex.com/
Initial Description
Shows how to use WebClient class to retrieve HTML from an URL and then to use HtmlAgilityPack to parse it.
Initial Title
.NET - C# - WebClient and HtmlAgilityPack - Fetching and Parsing HTML
Initial Tags
html, Net, c#
Initial Language
C#