c#爬虫三类

来源:互联网 发布:cxax2013车床软件 编辑:程序博客网 时间:2024/06/16 20:15
-----------------提取网页内容----------------1】<<WebClient类>>WebClient wc = new WebClient();    wc.Encoding = Encoding.UTF8;    string str = wc.DownloadString("http://tieba.baidu.com/p/2314539885");//下载页面中所有的字符串    MatchCollection mc = Regex.Matches(str, "([0-9a-zA-Z_.-]+)@([0-9a-zA-Z_.]+([.][a-zA-Z]+){1,2})");   foreach (Match  item in mc)   {       if (item.Success)       {           Console.WriteLine(item.Groups[1].Value+"==="+item.Groups[2].Value);       }   }   Console.WriteLine(mc.Count);   Console.ReadKey();或者》》 WebClient MyWebClient = new WebClient(); MyWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于向Internet资源的请求进行身份验证的网络凭据 Byte[] pageData = MyWebClient.DownloadData(“http://www.163.com”); //从指定网站下载数据 string pageHtml = Encoding.Default.GetString(pageData);  //如果获取网站页面采用的是GB2312,则使用这句    //string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句 Console.WriteLine(pageHtml);//在控制台输入获取的内容2】<<WebRequest类>>   public void RequestSite(string url)   {       WebRequest req = WebRequest.Create(url);       HttpWebResponse res;       try       {           res = (HttpWebResponse)(req.GetResponse());       }       catch (WebException ex) { res = (HttpWebResponse)ex.Response; }       Stream st = res.GetResponseStream();       StreamReader rdr = new StreamReader(st);       string s = rdr.ReadToEnd();       todo.AddRange(GetLink(s));   }   3】<<WebBrowser类>>//using System.Windows.Forms;估计是用在客户端的;WebBrowser web = new WebBrowser(); web.Navigate("http://www.xjflcp.com/ssc/"); web.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(web_DocumentCompleted); void web_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)         {             WebBrowser web = (WebBrowser)sender;             HtmlElementCollection ElementCollection = web.Document.GetElementsByTagName("Table");             foreach (HtmlElement item in ElementCollection)             {                  File.AppendAllText("Kaijiang_xj.txt", item.InnerText);             }         }

原创粉丝点击