C#获取指定网页源码的几种方法

来源:互联网 发布:网络剧发行新方法 编辑:程序博客网 时间:2024/05/16 08:42
// WebClient       private string GetWebClient(string url)        {            string strHTML = "";            WebClient myWebClient = new WebClient();            Stream myStream = myWebClient.OpenRead(url);            StreamReader sr = new StreamReader(myStream, System.Text.Encoding.GetEncoding("utf-8"));            strHTML = sr.ReadToEnd();            myStream.Close();            return strHTML;        }        // WebRequest        private string GetWebRequest(string url)        {            Uri uri = new Uri(url);            WebRequest myReq = WebRequest.Create(uri);            WebResponse result = myReq.GetResponse();            Stream receviceStream = result.GetResponseStream();            StreamReader readerOfStream = new StreamReader(receviceStream,System.Text.Encoding.GetEncoding("gb2312"));            string strHTML = readerOfStream.ReadToEnd();            readerOfStream.Close();            receviceStream.Close();            result.Close();            return strHTML;        }        // HttpWebRequest        private string GetHttpWebRequest(string url)        {            try            {                Uri uri = new Uri(url);                HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(uri);                myReq.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705";                myReq.Accept = "*/*";                myReq.KeepAlive = true;                myReq.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");                HttpWebResponse result = (HttpWebResponse)myReq.GetResponse();                Stream receviceStream = result.GetResponseStream();                StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding("gb2312"));                string strHTML = readerOfStream.ReadToEnd();                readerOfStream.Close();                receviceStream.Close();                result.Close();                                return strHTML;            }            catch (Exception ex)            {                throw new Exception("采集指定网址异常," + ex.Message);            }        }


// 获取网页源码,如果启用了gzip压缩后页面获取会产生乱码,采用此方法可解决gzip压缩而产生的乱码情况       private string GetHtmlCode(string url)       {           string htmlCode;           HttpWebRequest webRequest = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url);            webRequest.Timeout = 30000;            webRequest.Method = "GET";            webRequest.UserAgent = "Mozilla/4.0";            webRequest.Headers.Add("Accept-Encoding", "gzip, deflate");            HttpWebResponse webResponse = (System.Net.HttpWebResponse)webRequest.GetResponse();            if (webResponse.ContentEncoding.ToLower() == "gzip")//如果使用了GZip则先解压            {                using (System.IO.Stream streamReceive = webResponse.GetResponseStream())                {                    using (var zipStream =                        new System.IO.Compression.GZipStream(streamReceive, System.IO.Compression.CompressionMode.Decompress))                    {                        using (StreamReader sr = new System.IO.StreamReader(zipStream, Encoding.Default))                        {                            htmlCode = sr.ReadToEnd();                        }                    }                }            }            else            {                using (System.IO.Stream streamReceive = webResponse.GetResponseStream())                {                    using (System.IO.StreamReader sr = new System.IO.StreamReader(streamReceive, Encoding.Default))                    {                        htmlCode = sr.ReadToEnd();                    }                }            }            return htmlCode;        }

原文地址

0 0