C#网页数据采集（三）HttpWebRequest

来源：互联网发布：网络交易监管编辑：程序博客网时间：2024/06/04 17:57

<span style="font-family: Arial, Helvetica, sans-serif; background-color: rgb(255, 255, 255);">截取到网页数据是js加载完以后的</span>

  <span style="white-space:pre"></span>    HtmlWeb webClient = new HtmlWeb();            string _url = "http://news.baidu.com/";            HtmlAgilityPack.HtmlDocument html1 = webClient.Load(_url);//是你需要解析的url            var end3 = html1.Encoding.BodyName;//获取页面编码格式            string _htmlSource = GetHtmlSource(_url, System.Text.Encoding.GetEncoding(end3));//还是需要设置一次编码格式避免乱码 调用<span style="font-family: Arial, Helvetica, sans-serif;">GetHtmlSource方法</span>

public static string GetHtmlSource(string url, Encoding charset)        {            string _html = string.Empty;            try            {                HttpWebRequest _request = (HttpWebRequest)WebRequest.Create(url);                HttpWebResponse _response = (HttpWebResponse)_request.GetResponse();                using (Stream _stream = _response.GetResponseStream())                {                    using (StreamReader _reader = new StreamReader(_stream, charset))                    {                        _html = _reader.ReadToEnd();                    }                }            }            catch (WebException ex)            {                using (StreamReader sr = new StreamReader(ex.Response.GetResponseStream()))                {                    _html = sr.ReadToEnd();                }            }            catch (Exception ex)            {                _html = ex.Message;            }            return _html;        }

0 0