C#HttpHelper类---苏飞版

来源:互联网 发布:淘宝申诉场景怎么写 编辑:程序博客网 时间:2024/05/22 04:52
C#HttpHelper实现了C#HttpWebRequest抓取时无视编码,无视证书,无视Cookie,并且实现的代理的功能,使用它您可以进行Get和Post请求,可以很方便 的设置Cookie,证书,代理,编码问题您不用管,因为类会自动为您识别网页的编码。


       这个类是我以前写百度,和网络蜘蛛时使用的,经过上千万个网站的测试,上万个网站抓取的例子总结出来的,中间的方法也是我实验了很久之后方案,所以大家可以放心使用。


        我不敢说100%,但是应该是99%的网站都没有问题,都可以无视编码,证书,和Cookie,如果你确实发现那个网站在使用本类过程中有问题,出现乱码,或者是获取不了,不能带Cookie,不能带证书等问题


/// <summary>/// 类说明:HttpHelps类,用来实现Http访问,Post或者Get方式的,直接访问,带Cookie的,带证书的等方式,可以设置代理/// 编码日期:2011-09-20/// 编 码 人:苏飞/// 联系方式:361983679  /// 更新网址:http://www.sufeinet.com/thread-3-1-1.html/// 修改日期:2013-02-07/// </summary>using System;using System.Collections.Generic;using System.Text;using System.Net;using System.IO;using System.Text.RegularExpressions;using System.IO.Compression;using System.Security.Cryptography.X509Certificates;using System.Net.Security;namespace DotNet.Utilities{    /// <summary>    /// Http连接操作帮助类     /// </summary>    public class HttpHelper    {        #region 预定义方法或者变更        //默认的编码        private Encoding encoding = Encoding.Default;        //HttpWebRequest对象用来发起请求        private HttpWebRequest request = null;        //获取影响流的数据对象        private HttpWebResponse response = null;        /// <summary>        /// 根据相传入的数据,得到相应页面数据        /// </summary>        /// <param name="strPostdata">传入的数据Post方式,get方式传NUll或者空字符串都可以</param>        /// <returns>string类型的响应数据</returns>        private HttpResult GetHttpRequestData(HttpItem objhttpitem)        {            //返回参数            HttpResult result = new HttpResult();            try            {                #region 得到请求的response                using (response = (HttpWebResponse)request.GetResponse())                {                    result.Header = response.Headers;                    if (response.Cookies != null)                    {                        result.CookieCollection = response.Cookies;                    }                    if (response.Headers["set-cookie"] != null)                    {                        result.Cookie = response.Headers["set-cookie"];                    }                    MemoryStream _stream = new MemoryStream();                    //GZIIP处理                    if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))                    {                        //开始读取流并设置编码方式                        //new GZipStream(response.GetResponseStream(), CompressionMode.Decompress).CopyTo(_stream, 10240);                        //.net4.0以下写法                        _stream = GetMemoryStream(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));                    }                    else                    {                        //开始读取流并设置编码方式                        //response.GetResponseStream().CopyTo(_stream, 10240);                        //.net4.0以下写法                        _stream = GetMemoryStream(response.GetResponseStream());                    }                    //获取Byte                    byte[] RawResponse = _stream.ToArray();                    //是否返回Byte类型数据                    if (objhttpitem.ResultType == ResultType.Byte)                    {                        result.ResultByte = RawResponse;                    }                    //从这里开始我们要无视编码了                    if (encoding == null)                    {                        string temp = Encoding.Default.GetString(RawResponse, 0, RawResponse.Length);                        //<meta(.*?)charset([\s]?)=[^>](.*?)>                        Match meta = Regex.Match(temp, "<meta([^<]*)charset=([^<]*)[\"']", RegexOptions.IgnoreCase | RegexOptions.Multiline);                        string charter = (meta.Groups.Count > 2) ? meta.Groups[2].Value : string.Empty;                        charter = charter.Replace("\"", string.Empty).Replace("'", string.Empty).Replace(";", string.Empty);                        if (charter.Length > 0)                        {                            charter = charter.ToLower().Replace("iso-8859-1", "gbk");                            encoding = Encoding.GetEncoding(charter);                        }                        else                        {                            if (response.CharacterSet.ToLower().Trim() == "iso-8859-1")                            {                                encoding = Encoding.GetEncoding("gbk");                            }                            else                            {                                if (string.IsNullOrEmpty(response.CharacterSet.Trim()))                                {                                    encoding = Encoding.UTF8;                                }                                else                                {                                    encoding = Encoding.GetEncoding(response.CharacterSet);                                }                            }                        }                    }                    //得到返回的HTML                    result.Html = encoding.GetString(RawResponse);                    //最后释放流                    _stream.Close();                }                #endregion            }            catch (WebException ex)            {                //这里是在发生异常时返回的错误信息                result.Html = "String Error";                response = (HttpWebResponse)ex.Response;            }            if (objhttpitem.IsToLower)            {                result.Html = result.Html.ToLower();            }            return result;        }        /// <summary>        /// 4.0以下.net版本取数据使用        /// </summary>        /// <param name="streamResponse">流</param>        private static MemoryStream GetMemoryStream(Stream streamResponse)        {            MemoryStream _stream = new MemoryStream();            int Length = 256;            Byte[] buffer = new Byte[Length];            int bytesRead = streamResponse.Read(buffer, 0, Length);            // write the required bytes              while (bytesRead > 0)            {                _stream.Write(buffer, 0, bytesRead);                bytesRead = streamResponse.Read(buffer, 0, Length);            }            return _stream;        }        /// <summary>        /// 为请求准备参数        /// </summary>        ///<param name="objhttpItem">参数列表</param>        /// <param name="_Encoding">读取数据时的编码方式</param>        private void SetRequest(HttpItem objhttpItem)        {            // 验证证书            SetCer(objhttpItem);            //设置Header参数            if (objhttpItem.Header != null)            {                request.Headers = objhttpItem.Header;            }            // 设置代理            SetProxy(objhttpItem);            //请求方式Get或者Post            request.Method = objhttpItem.Method;            request.Timeout = objhttpItem.Timeout;            request.ReadWriteTimeout = objhttpItem.ReadWriteTimeout;            //Accept            request.Accept = objhttpItem.Accept;            //ContentType返回类型            request.ContentType = objhttpItem.ContentType;            //UserAgent客户端的访问类型,包括浏览器版本和操作系统信息            request.UserAgent = objhttpItem.UserAgent;            // 编码            SetEncoding(objhttpItem);            //设置Cookie            SetCookie(objhttpItem);            //来源地址            request.Referer = objhttpItem.Referer;            //是否执行跳转功能            request.AllowAutoRedirect = objhttpItem.Allowautoredirect;            //设置Post数据            SetPostData(objhttpItem);            //设置最大连接            if (objhttpItem.Connectionlimit > 0)            {                request.ServicePoint.ConnectionLimit = objhttpItem.Connectionlimit;            }        }        /// <summary>        /// 设置证书        /// </summary>        /// <param name="objhttpItem"></param>        private void SetCer(HttpItem objhttpItem)        {            if (!string.IsNullOrEmpty(objhttpItem.CerPath))            {                //这一句一定要写在创建连接的前面。使用回调的方法进行证书验证。                ServicePointManager.ServerCertificateValidationCallback = new System.Net.Security.RemoteCertificateValidationCallback(CheckValidationResult);                //初始化对像,并设置请求的URL地址                request = (HttpWebRequest)WebRequest.Create(GetUrl(objhttpItem.URL));                //创建证书文件                X509Certificate objx509 = new X509Certificate(objhttpItem.CerPath);                //添加到请求里                request.ClientCertificates.Add(objx509);            }            else            {                //初始化对像,并设置请求的URL地址                request = (HttpWebRequest)WebRequest.Create(GetUrl(objhttpItem.URL));            }        }        /// <summary>        /// 设置编码        /// </summary>        /// <param name="objhttpItem">Http参数</param>        private void SetEncoding(HttpItem objhttpItem)        {            if (string.IsNullOrEmpty(objhttpItem.Encoding) || objhttpItem.Encoding.ToLower().Trim() == "null")            {                //读取数据时的编码方式                encoding = null;            }            else            {                //读取数据时的编码方式                encoding = System.Text.Encoding.GetEncoding(objhttpItem.Encoding);            }        }        /// <summary>        /// 设置Cookie        /// </summary>        /// <param name="objhttpItem">Http参数</param>        private void SetCookie(HttpItem objhttpItem)        {            if (!string.IsNullOrEmpty(objhttpItem.Cookie))            {                //Cookie                request.Headers[HttpRequestHeader.Cookie] = objhttpItem.Cookie;            }            //设置Cookie            if (objhttpItem.CookieCollection != null)            {                request.CookieContainer = new CookieContainer();                request.CookieContainer.Add(objhttpItem.CookieCollection);            }        }        /// <summary>        /// 设置Post数据        /// </summary>        /// <param name="objhttpItem">Http参数</param>        private void SetPostData(HttpItem objhttpItem)        {            //验证在得到结果时是否有传入数据            if (request.Method.Trim().ToLower().Contains("post"))            {                //写入Byte类型                if (objhttpItem.PostDataType == PostDataType.Byte)                {                    //验证在得到结果时是否有传入数据                    if (objhttpItem.PostdataByte != null && objhttpItem.PostdataByte.Length > 0)                    {                        request.ContentLength = objhttpItem.PostdataByte.Length;                        request.GetRequestStream().Write(objhttpItem.PostdataByte, 0, objhttpItem.PostdataByte.Length);                    }                }//写入文件                else if (objhttpItem.PostDataType == PostDataType.FilePath)                {                    StreamReader r = new StreamReader(objhttpItem.Postdata, encoding);                    byte[] buffer = Encoding.Default.GetBytes(r.ReadToEnd());                    r.Close();                    request.ContentLength = buffer.Length;                    request.GetRequestStream().Write(buffer, 0, buffer.Length);                }                else                {                    //验证在得到结果时是否有传入数据                    if (!string.IsNullOrEmpty(objhttpItem.Postdata))                    {                        byte[] buffer = Encoding.Default.GetBytes(objhttpItem.Postdata);                        request.ContentLength = buffer.Length;                        request.GetRequestStream().Write(buffer, 0, buffer.Length);                    }                }            }        }        /// <summary>        /// 设置代理        /// </summary>        /// <param name="objhttpItem">参数对象</param>        private void SetProxy(HttpItem objhttpItem)        {            if (string.IsNullOrEmpty(objhttpItem.ProxyUserName) && string.IsNullOrEmpty(objhttpItem.ProxyPwd) && string.IsNullOrEmpty(objhttpItem.ProxyIp))            {                //不需要设置            }            else            {                //设置代理服务器                WebProxy myProxy = new WebProxy(objhttpItem.ProxyIp, false);                //建议连接                myProxy.Credentials = new NetworkCredential(objhttpItem.ProxyUserName, objhttpItem.ProxyPwd);                //给当前请求对象                request.Proxy = myProxy;                //设置安全凭证                request.Credentials = CredentialCache.DefaultNetworkCredentials;            }        }        /// <summary>        /// 回调验证证书问题        /// </summary>        /// <param name="sender">流对象</param>        /// <param name="certificate">证书</param>        /// <param name="chain">X509Chain</param>        /// <param name="errors">SslPolicyErrors</param>        /// <returns>bool</returns>        public bool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors)        {            // 总是接受                return true;        }        #endregion        #region 普通类型        /// <summary>            /// 传入一个正确或不正确的URl,返回正确的URL        /// </summary>            /// <param name="URL">url</param>           /// <returns>        /// </returns>        public static string GetUrl(string URL)        {            if (!(URL.Contains("http://") || URL.Contains("https://")))            {                URL = "http://" + URL;            }            return URL;        }        ///<summary>        ///采用https协议访问网络,根据传入的URl地址,得到响应的数据字符串。        ///</summary>        ///<param name="objhttpItem">参数列表</param>        ///<returns>String类型的数据</returns>        public HttpResult GetHtml(HttpItem objhttpItem)        {            //准备参数            SetRequest(objhttpItem);            //调用专门读取数据的类            return GetHttpRequestData(objhttpItem);        }        #endregion    }    /// <summary>    /// Http请求参考类     /// </summary>    public class HttpItem    {        string _URL;        /// <summary>        /// 请求URL必须填写        /// </summary>        public string URL        {            get { return _URL; }            set { _URL = value; }        }        string _Method = "GET";        /// <summary>        /// 请求方式默认为GET方式        /// </summary>        public string Method        {            get { return _Method; }            set { _Method = value; }        }        int _Timeout = 100000;        /// <summary>        /// 默认请求超时时间        /// </summary>        public int Timeout        {            get { return _Timeout; }            set { _Timeout = value; }        }        int _ReadWriteTimeout = 30000;        /// <summary>        /// 默认写入Post数据超时间        /// </summary>        public int ReadWriteTimeout        {            get { return _ReadWriteTimeout; }            set { _ReadWriteTimeout = value; }        }        string _Accept = "text/html, application/xhtml+xml, */*";        /// <summary>        /// 请求标头值 默认为text/html, application/xhtml+xml, */*        /// </summary>        public string Accept        {            get { return _Accept; }            set { _Accept = value; }        }        string _ContentType = "text/html";        /// <summary>        /// 请求返回类型默认 text/html        /// </summary>        public string ContentType        {            get { return _ContentType; }            set { _ContentType = value; }        }        string _UserAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)";        /// <summary>        /// 客户端访问信息默认Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)        /// </summary>        public string UserAgent        {            get { return _UserAgent; }            set { _UserAgent = value; }        }        string _Encoding = string.Empty;        /// <summary>        /// 返回数据编码默认为NUll,可以自动识别        /// </summary>        public string Encoding        {            get { return _Encoding; }            set { _Encoding = value; }        }        private PostDataType _PostDataType = PostDataType.String;        /// <summary>        /// Post的数据类型        /// </summary>        public PostDataType PostDataType        {            get { return _PostDataType; }            set { _PostDataType = value; }        }        string _Postdata;        /// <summary>        /// Post请求时要发送的字符串Post数据        /// </summary>        public string Postdata        {            get { return _Postdata; }            set { _Postdata = value; }        }        private byte[] _PostdataByte = null;        /// <summary>        /// Post请求时要发送的Byte类型的Post数据        /// </summary>        public byte[] PostdataByte        {            get { return _PostdataByte; }            set { _PostdataByte = value; }        }        CookieCollection cookiecollection = null;        /// <summary>        /// Cookie对象集合        /// </summary>        public CookieCollection CookieCollection        {            get { return cookiecollection; }            set { cookiecollection = value; }        }        string _Cookie = string.Empty;        /// <summary>        /// 请求时的Cookie        /// </summary>        public string Cookie        {            get { return _Cookie; }            set { _Cookie = value; }        }        string _Referer = string.Empty;        /// <summary>        /// 来源地址,上次访问地址        /// </summary>        public string Referer        {            get { return _Referer; }            set { _Referer = value; }        }        string _CerPath = string.Empty;        /// <summary>        /// 证书绝对路径        /// </summary>        public string CerPath        {            get { return _CerPath; }            set { _CerPath = value; }        }        private Boolean isToLower = true;        /// <summary>        /// 是否设置为全文小写        /// </summary>        public Boolean IsToLower        {            get { return isToLower; }            set { isToLower = value; }        }        private Boolean allowautoredirect = true;        /// <summary>        /// 支持跳转页面,查询结果将是跳转后的页面        /// </summary>        public Boolean Allowautoredirect        {            get { return allowautoredirect; }            set { allowautoredirect = value; }        }        private int connectionlimit = 1024;        /// <summary>        /// 最大连接数        /// </summary>        public int Connectionlimit        {            get { return connectionlimit; }            set { connectionlimit = value; }        }        private string proxyusername = string.Empty;        /// <summary>        /// 代理Proxy 服务器用户名        /// </summary>        public string ProxyUserName        {            get { return proxyusername; }            set { proxyusername = value; }        }        private string proxypwd = string.Empty;        /// <summary>        /// 代理 服务器密码        /// </summary>        public string ProxyPwd        {            get { return proxypwd; }            set { proxypwd = value; }        }        private string proxyip = string.Empty;        /// <summary>        /// 代理 服务IP        /// </summary>        public string ProxyIp        {            get { return proxyip; }            set { proxyip = value; }        }        private ResultType resulttype = ResultType.String;        /// <summary>        /// 设置返回类型String和Byte        /// </summary>        public ResultType ResultType        {            get { return resulttype; }            set { resulttype = value; }        }        private WebHeaderCollection header = new WebHeaderCollection();        //header对象        public WebHeaderCollection Header        {            get { return header; }            set { header = value; }        }    }    /// <summary>    /// Http返回参数类    /// </summary>    public class HttpResult    {        string _Cookie = string.Empty;        /// <summary>        /// Http请求返回的Cookie        /// </summary>        public string Cookie        {            get { return _Cookie; }            set { _Cookie = value; }        }        CookieCollection cookiecollection = null;        /// <summary>        /// Cookie对象集合        /// </summary>        public CookieCollection CookieCollection        {            get { return cookiecollection; }            set { cookiecollection = value; }        }        private string html = string.Empty;        /// <summary>        /// 返回的String类型数据 只有ResultType.String时才返回数据,其它情况为空        /// </summary>        public string Html        {            get { return html; }            set { html = value; }        }        private byte[] resultbyte = null;        /// <summary>        /// 返回的Byte数组 只有ResultType.Byte时才返回数据,其它情况为空        /// </summary>        public byte[] ResultByte        {            get { return resultbyte; }            set { resultbyte = value; }        }        private WebHeaderCollection header = new WebHeaderCollection();        //header对象        public WebHeaderCollection Header        {            get { return header; }            set { header = value; }        }    }    /// <summary>    /// 返回类型    /// </summary>    public enum ResultType    {        String,//表示只返回字符串        Byte//表示返回字符串和字节流    }    /// <summary>    /// Post的数据格式默认为string    /// </summary>    public enum PostDataType    {        String,//字符串        Byte,//字符串和字节流        FilePath//表示传入的是文件    }}

来源:http://www.sufeinet.com/thread-3-1-1.html


原创粉丝点击