超简单Nsoup版Csdn博客爬虫

来源:互联网 发布:库里总决赛数据 编辑:程序博客网 时间:2024/04/29 22:43

自己摸索,.NET程序员也有春天

using System;using System.Collections.Generic;using System.IO;using System.Linq;using System.Net;using System.Text;using Microsoft.SqlServer.Management.Common;using Microsoft.SqlServer.Management.Smo;using NSoup.Select;namespace NsoupDemo{    class Program    {        static void Main(string[] args)        {            WebClient webClient = new WebClient();            webClient.Headers.Add("Host", "blog.csdn.net");            webClient.Headers.Add("Referer", "http://blog.csdn.net/WuLex/article/list");            webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36");                        for (int i = 1; i < 3; i++)            {                String HtmlString = Encoding.GetEncoding("utf-8").GetString(webClient.DownloadData("http://blog.csdn.net/WuLex/article/list/"+i));                NSoup.Nodes.Document doc = NSoup.NSoupClient.Parse(HtmlString);                Elements elements = doc.GetElementsByClass("list_item");                foreach (var ele in elements)                {                    string title = ele.GetElementsByClass("link_title").Text;                    string descripe = ele.GetElementsByClass("article_description").Text;                    string views = ele.GetElementsByClass("link_view").Text;                    Write(@"D:\Info.txt","\r\n"+title+"\r\n"+descripe+"\r\n"+views+"\r\n");                }            }                        Console.ReadLine();        }        public static void Write(string path,string content)        {            FileStream fs = new FileStream(path, FileMode.Append);            StreamWriter sw = new StreamWriter(fs);            //开始写入            sw.Write(content);            //清空缓冲区            sw.Flush();            //关闭流            sw.Close();            fs.Close();        }     }}
结果图:



0 0
原创粉丝点击