htmlagilitypackDemo
来源:互联网 发布:ubuntu终端怎么打开 编辑:程序博客网 时间:2024/05/16 08:02
using System;using System.Collections.Generic;using System.Linq;using System.Text;using HtmlAgilityPack;using System.Net;using System.IO;namespace htmlagilitypackDemo{ class Program { private const string CategoryListXPath = "./div[2]/div"; //关键点,不同网站分析不同的路径 private const string CategoryNameXPath = "./table[1]/tr[1]/td[2]/h3[1]"; //关键点,不同网站分析不同的路径 private const string ChooseXPath = "./a[1]"; static void Main(string[] args) { Demo_1(); Console.Read(); } static void Demo_1() { Uri url = new Uri("http://news.sogou.com/news?query=%BA%A3%B6%FB&p=42230305&dp=1&_ast=1353663787&_asf=news.sogou.com&time=0&w=03009900&sort=1&mode=1&manual="); Uri uriCategory = null; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); WebResponse response = request.GetResponse(); Stream stream = response.GetResponseStream(); StreamReader read = new StreamReader(stream, Encoding.GetEncoding("gb2312")); string str = read.ReadToEnd(); HtmlDocument html = new HtmlDocument(); html.LoadHtml(str); //HtmlNode rootNode = html.DocumentNode; HtmlNode rootNode = html.GetElementbyId("main"); HtmlNodeCollection categoryNodeList = rootNode.SelectNodes(CategoryListXPath); HtmlNode temp = null; var list = new List<Category>(); foreach (HtmlNode categoryNode in categoryNodeList) { temp = HtmlNode.CreateNode(categoryNode.OuterHtml); HtmlNode singleNode = temp.SelectSingleNode(CategoryNameXPath); if (singleNode == null) continue; HtmlNodeCollection singleList = temp.SelectNodes(CategoryNameXPath); foreach (HtmlNode node in singleList) { HtmlNode createNode = HtmlNode.CreateNode(node.OuterHtml); HtmlNode reNode = createNode.SelectSingleNode(ChooseXPath); if (reNode == null) continue; Category category = new Category(); category.Subject = reNode.InnerText; Uri.TryCreate(url, reNode.Attributes["href"].Value, out uriCategory); category.IndexUrl = uriCategory.ToString(); list.Add(category); Console.WriteLine("name:{0}",category.Subject); Console.WriteLine("url:{0}",category.IndexUrl); //break; } } } } public class Category { public string Subject { get; set; } public string IndexUrl { get; set; } }}