注册了这个久终于想好了写什么了。

来源:互联网 发布:cool edit pro mac版 编辑:程序博客网 时间:2024/04/27 18:18

注册了这个久终于想好了写什么了。
  
  最近因公司需要做了网络取数据。
  
  这网上打资料都没有完整的资料特别是验证码登陆。
  
  其实我这都是从网上收集整理出来的的。做的不是太好。
  
  使用的两个WebClient,HttpWebRequest。NET的类分别做出一个静态类和动态类。
  
  静态类GetHTML 支持一般的网页取数据和POET提交,但不能支持验证码,自动识别网页编码也可以手动输入网页编码。不过最好是手动输入那样会让程序少做运行代码。
  
  System.Collections.Specialized.NameValueCollection PostVars =new System.Collections.Specialized.NameValueCollection()
  PostVars.Add("uid","name");
  PostVars.Add("pwd","123456");
  
  string tmphtml= GetStrHtmlPost(url,PostVars);
  
  动态类PostWeb 支持验证码、验证用户、登陆过会产生COOKIES字符串,第二次运行程序时可通过COOKIES而不用再次登陆。
  
  PostWeb web=new PostWeb();
  
  web.GetCode(验证码地址);
  
  string tmplogin=web.LoginPost("http://www.mystand.com.cn/login/submit.jsp","userid=hgj0000&password=06045369","http://www.mystand.com.cn/");
  
  if(tmplogin.Contains(条件))
  
  {
  
  string cookie= web.cookieHeader;//保存到文件中下次直接付到类就可免登陆
  
  web.GetPage("http://www.mystand.com.cn/", "http://www.mystand.com.cn/");
  
  }
  
  
  
  PostWeb web=new PostWeb();
  
  web.cookieHeader=cookie;//把保存文件中的cookie付到类中
  
  web.GetPage("http://www.mystand.com.cn/", "http://www.mystand.com.cn/");
  
  
  using System;
  using System.Collections.Generic;
  using System.Text;
  using System.Net;
  using System.Text.RegularExpressions;
  using System.IO;
  using System.IO.Compression;
  
  namespace Manager.Net.Html
  {
  
   /// <summary>
   /// HTML相关
   /// </summary>
   public class CHtml
   {
  
  
   public CHtml()
   {
  
   }
  
   ~CHtml()
   {
  
   }
   /// <summary>
   /// 过滤 Sql 语句字符串中的注入脚本
   /// </summary>
   /// <param name="source">传入的字符串</param>
   /// <returns></returns>
   public static string FilterSql(string source)
   {
   //单引号替换成两个单引号
   source = source.Replace("'", "''");
   source = source.Replace("/"", "“");
   source = source.Replace("|", "|");
   //半角封号替换为全角封号,防止多语句执行
   source = source.Replace(";", ";");
  
   //半角括号替换为全角括号
   source = source.Replace("(", "(");
   source = source.Replace(")", ")");
  
   /**/
   ///////////////要用正则表达式替换,防止字母大小写得情况////////////////////
  
   //去除执行存储过程的命令关键字
   source = source.Replace("Exec", "");
   source = source.Replace("Execute", "");
  
   //去除系统存储过程或扩展存储过程关键字
   source = source.Replace("xp_", "x p_");
   source = source.Replace("sp_", "s p_");
  
   //防止16进制注入
   source = source.Replace("0x", "0 x");
  
   return source;
   }
  
  
  
  
  
  
  
   /// <summary>
   /// 输出HTML
   /// </summary>
   /// <param name="Stream">流</param>
   /// <param name="Encod">编码</param>
   /// <returns></returns>
   public static string HtmlStr(System.IO.Stream Stream, Encoding Encod)
   {
  
   System.IO.StreamReader sr;
   if (Encod != null)
   {
   sr = new System.IO.StreamReader(Stream, Encod);
   return sr.ReadToEnd();
   }
   else
   {
   sr = new System.IO.StreamReader(Stream, Encoding.Default);
   return sr.ReadToEnd();
  
   }
  
   }
  
  
   /// <summary>
   /// 检验用户提交的URL参数字符里面是否有非法字符,如果有则返回True.防止SQL注入.
   /// </summary>
   /// <param name="str">(string)</param>
   /// <returns>bool</returns>
   public static bool VerifyString(string str)
   {
   string strTmp = str.ToUpper();
   if (strTmp.IndexOf("SELECT ") >= 0 || strTmp.IndexOf(" AND ") >= 0 || strTmp.IndexOf(" OR ") >= 0 ||
   strTmp.IndexOf("EXEC ") >= 0 || strTmp.IndexOf("CHAR(") >= 0)
   {
   return true;
   }
  
   strTmp.Replace("'", "'").Replace(";", ";");
   return false;
   }
  
  
   /// <summary>
   /// 匹配页面的图片地址
   /// </summary>
   /// <param name="HtmlCode"></param>
   /// <param name="imgHttp">要补充的http://路径信息</param>
   /// <returns></returns>
   public static string GetImgSrc(string HtmlCode, string imgHttp)
   {
   string MatchVale = "";
   string Reg = @"<img.+?>";
   foreach (Match m in Regex.Matches(HtmlCode.ToLower(), Reg))
   {
   MatchVale += GetImg((m.Value).ToLower().Trim(), imgHttp) + "|";
   }
  
   return MatchVale;
   }
  
   /// <summary>
   /// 匹配<img src=http://www.ASPcool.com/lanmu/"" />中的图片路径实际链接
   /// </summary>
   /// <param name="ImgString"><img src=http://www.aspcool.com/lanmu/"" />字符串</param>
   /// <returns></returns>
   public static string GetImg(string ImgString, string imgHttp)
   {
   string MatchVale = "";
   string Reg = @"src=http://www.aspcool.com/lanmu/.+/.(bmp|jpg|gif|png|)";
   foreach (Match m in Regex.Matches(ImgString.ToLower(), Reg))
   {
   MatchVale += (m.Value).ToLower().Trim().Replace("src=http://www.aspcool.com/lanmu/", "");
   }
   if (MatchVale.IndexOf(".net") != -1 || MatchVale.IndexOf(".com") != -1 || MatchVale.IndexOf(".org") != -1 || MatchVale.IndexOf(".cn") != -1 || MatchVale.IndexOf(".cc") != -1 || MatchVale.IndexOf(".info") != -1 || MatchVale.IndexOf(".biz") != -1 || MatchVale.IndexOf(".tv") != -1)
   return (MatchVale);
   else
   return (imgHttp + MatchVale);
   }
  
  
  
   /// <summary>
   /// 获取页面的链接正则
   /// </summary>
   /// <param name="HtmlCode"></param>
   /// <returns></returns>
   public static string GetHref(string HtmlCode)
   {
   string MatchVale = "";
   string Reg = @"(h|H)(r|R)(e|E)(f|F) *= *('|"")?((/w|//|//|/.|:|-|_)+)[/S]*";
   foreach (Match m in Regex.Matches(HtmlCode, Reg))
   {
   MatchVale += (m.Value).ToLower().Replace("href=http://www.aspcool.com/lanmu/", "").Trim() + "|";
   }
   return MatchVale;
   }
  
   /// <summary>
   /// 去HTML标记
   /// </summary>
   /// <param name="strhtml">HTML页面</param>
   /// <returns></returns>
   public static string RemoveHTML(string strhtml)
   {
   string stroutput = strhtml;
   Regex regex = new Regex(@"<[^>]+>|</[^>]+>");
   stroutput = regex.Replace(stroutput, "");
   return stroutput.Trim();
   }
  
   /// <summary>
   /// 取网页编码
   /// </summary>
   /// <param name="strHtml">HTML页面</param>
   /// <returns>返回编码</returns>
   public static Encoding GetEncoding(string strHtml)
   {
  
   string pattern = @"(?i)/bcharset=(?<charset>[-a-zA-Z_0-9]+)";
   string charset = Regex.Match(strHtml, pattern).Groups["charset"].Value;
   if (charset.Length <= 0)
   {
   if (strHtml.Contains("charset=/""))
   charset = Manager.Text.StringEx.GetStringMiddle(strHtml, "charset=/"", "/"");
  
   }
   if (charset.Length <= 0)
   {
   if (strHtml.Contains("charset="))
   charset = Manager.Text.StringEx.GetStringMiddle(strHtml, "charset=", "/"");
   }
   if (charset.Length <= 0)
   {
   charset = Encoding.UTF8.BodyName;
   }
   try
   {
   return Encoding.GetEncoding(charset);
   }
   catch (Exception)
   {
  
   return Encoding.Default;
   }
  
  
  
   }
  
  
   /// <summary>
   /// 取IE版本
   /// </summary>
   /// <returns></returns>
   public static string GetIEVersion()
   {
   using (Microsoft.Win32.RegistryKey versionKey = Microsoft.Win32.Registry.LocalMachine.OpenSubKey(@"Software/Microsoft/Internet Explorer"))
   {
   String version = versionKey.GetValue("Version").ToString();
   return version;
   }
   }
   }
  
   /// <summary>
   /// 模拟网页提交数据
   /// </summary>
   public class PostWeb
   {
   /// <summary>
   /// Cookies
   /// </summary>
   public string cookieHeader = "";
   /// <summary>
   /// 网页编号
   /// </summary>
   public string Encod = "";
  
   public bool SetCookies = false;
   public string Method = "POST";
   /// <summary>
   /// 是否使用代理
   /// </summary>
   public bool IsProxy = false;
  
   /// <summary>
   /// 代理地址
   /// </summary>
   public string proxyaddress = "";
   /// <summary>
   /// 密码验证用户
   /// </summary>
   public string CredentialUserName = "";
   /// <summary>
   /// 密码验证密码
   /// </summary>
   public string CredentialPassword = "";
   /// <summary>
   /// 密码验证域名
   /// </summary>
   public string CredentialDoMain = "";
  
  
   Encoding tmpEncod;
   public PostWeb()
   {
  
   }
   ~PostWeb()
   {
  
   }
   /// <summary>
   /// 代理
   /// </summary>
   /// <param name="request"></param>
   private void ProxySetting(HttpWebRequest request)
   {
   if (IsProxy)
   {
   WebProxy proxy = WebProxy.GetDefaultProxy();//获取IE缺省设置
   //如果缺省设置为空,则有可能是根本不需要代理服务器,如果此时配置文件中也未配置则认为不需Proxy
   if (proxy.Address == null && !String.IsNullOrEmpty(proxyaddress))
   proxy.Address = new Uri(proxyaddress);//按配置文件创建Proxy 地置
   }
   }
  
   /// <summary>
   /// 身份验证
   /// </summary>
   /// <param name="request"></param>
   private void NetworkCredentialSetting(HttpWebRequest request)
   {
   if (!String.IsNullOrEmpty(CredentialUserName) && !String.IsNullOrEmpty(CredentialPassWord) && !String.IsNullOrEmpty(CredentialDoMain))
   {
   request.PreAuthenticate = true;
   NetworkCredential myCred = new NetworkCredential(CredentialUserName, CredentialPassWord, CredentialDoMain);
   request.Credentials = myCred;
   request.SendChunked = false;
   }
   }
  
   /// <summary>
   /// 验证码,并保存文件
   /// </summary>
   /// <param name="strURL">验证码地址</param>
   /// <param name="dir">目录</param>
   /// <param name="filename">文件</param>
   /// <param name="imageFormat">格式</param>
   public void GetCode(string strURL, string dir, string filename, System.Drawing.Imaging.ImageFormat imageFormat)
   {
   string path = "";
   if (!String.IsNullOrEmpty(dir))
   {
  
   System.IO.Directory.CreateDirectory(dir);
   path = dir + "//";
  
   }
   string FileNamePath = path + filename;
   System.Drawing.Image code = GetCode(strURL);
   code.Save(FileNamePath, imageFormat);
   code.Dispose();
   }
  
   /// <summary>
   /// 验证码,返回Bitmap
   /// </summary>
   /// <param name="strURL">验证码地址</param>
   /// <returns></returns>
   public System.Drawing.Image GetCode(string strURL)
   {
   HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(strURL);
   ProxySetting(myHttpWebRequest);
   NetworkCredentialSetting(myHttpWebRequest);
   myHttpWebRequest.Method = "GET";
   myHttpWebRequest.KeepAlive = true;
   CookieCollection myCookies = null;
   CookieContainer myCookieContainer = new CookieContainer();
   myHttpWebRequest.CookieContainer = myCookieContainer;
   using (HttpWebResponse response = (HttpWebResponse)myHttpWebRequest.GetResponse())
   {
   cookieHeader = myHttpWebRequest.CookieContainer.GetCookieHeader(new Uri(strURL));
   myCookies = response.Cookies;
   System.Drawing.Image code = System.Drawing.Image.FromStream(response.GetResponseStream(), false,false);
   return code;
  
   }
   }
  
   /// <summary>
   /// 功能描述:模拟登录页面,提交登录数据进行登录,并记录Header中的cookie
   /// </summary>
   /// <param name="strURL">登录数据提交的页面地址</param>
   /// <param name="strArgs">用户登录数据</param>
   /// <param name="strReferer">引用地址</param>
   /// <returns>可以返回页面内容或不返回</returns>
   public string LoginGet(string strURL, string strReferer)
   {
  
   HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(strURL);
   ProxySetting(myHttpWebRequest);
   NetworkCredentialSetting(myHttpWebRequest);
   myHttpWebRequest.AllowAutoRedirect = true;
   myHttpWebRequest.KeepAlive = true;
   myHttpWebRequest.Accept = " image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-Flash, application/vnd.ms-Excel, application/vnd.ms-PowerPoint, application/msword, application/xaml+XML, application/vnd.ms-xpsdocument, application/x-ms-xbap, application/x-ms-application, application/QVOD, */*";
   myHttpWebRequest.Referer = strReferer;
   myHttpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; MAXTHON 2.0)";
   myHttpWebRequest.ContentType = "application/x-www-form-urlencoded";
   myHttpWebRequest.Method = "Get";
   myHttpWebRequest.Timeout = 3000;
  
   CookieCollection myCookies = null;
   CookieContainer myCookieContainer = new CookieContainer();
   if (cookieHeader.Length > 0)
   {
   myCookieContainer.SetCookies(new Uri(strURL), cookieHeader);
   myHttpWebRequest.CookieContainer = myCookieContainer;
   }
  
  
  
  
   using (HttpWebResponse response = (HttpWebResponse)myHttpWebRequest.GetResponse())
   {
   cookieHeader = myHttpWebRequest.CookieContainer.GetCookieHeader(new Uri(strURL));
   myCookies = response.Cookies;
  
  
   SetEncod(response.CharacterSet);
  
  
   return System.Web.HttpUtility.HtmlDecode(System.Web.HttpUtility.UrlDecode(CHtml.HtmlStr(response.GetResponseStream(), tmpEncod)));
   }
   }
  
   /// <summary>
   /// 设置网页编码
   /// </summary>
   void SetEncod(string cod)
   {
   if (tmpEncod == null)
   {
   if (Encod.Length > 0)
   tmpEncod = Encoding.GetEncoding(Encod);
   else
   tmpEncod = Encoding.GetEncoding(cod);
   }
  
  
  
   }
  
   /// <summary>
   /// 功能描述:模拟登录页面,提交登录数据进行登录,并记录Header中的cookie
   /// LoginPost("http://www.mystand.com.cn/login/submit.jsp","userid=hgj0000&password=06045369","http://www.mystand.com.cn/");
   /// </summary>
   /// <param name="strURL">登录数据提交的页面地址</param>
   /// <param name="strArgs">用户登录数据</param>
   /// <param name="strReferer">引用地址</param>
   /// <returns>可以返回页面内容或不返回</returns>
   public string LoginPost(string strURL, string strArgs, string strReferer)
   {
  
   HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(strURL);
   ProxySetting(myHttpWebRequest);
   NetworkCredentialSetting(myHttpWebRequest);
   myHttpWebRequest.AllowAutoRedirect = true;
   myHttpWebRequest.KeepAlive = true;
   myHttpWebRequest.Accept = " image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-PowerPoint, application/msword, application/xaml+xml, application/vnd.ms-xpsdocument, application/x-ms-xbap, application/x-ms-application, application/QVOD, */*";
   myHttpWebRequest.Referer = strReferer;
   myHttpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; MAXTHON 2.0)";
   myHttpWebRequest.ContentType = "application/x-www-form-urlencoded";
   myHttpWebRequest.Method = "POST";
   myHttpWebRequest.Timeout = 3000;
  
   CookieCollection myCookies = null;
   CookieContainer myCookieContainer = new CookieContainer();
   if (cookieHeader.Length > 0)
   {
   myCookieContainer.SetCookies(new Uri(strURL), cookieHeader);
   myHttpWebRequest.CookieContainer = myCookieContainer;
   }
   Stream MyRequestStrearm = myHttpWebRequest.GetRequestStream();
   StreamWriter MyStreamWriter = new StreamWriter(MyRequestStrearm, Encoding.ASCII);
   //把数据写入HttpWebRequest的Request流
   MyStreamWriter.Write(strArgs);
   //关闭打开对象
   MyStreamWriter.Close();
   MyRequestStrearm.Close();
  
  
  
   using (HttpWebResponse response = (HttpWebResponse)myHttpWebRequest.GetResponse())
   {
   cookieHeader = myHttpWebRequest.CookieContainer.GetCookieHeader(new Uri(strURL));
   myCookies = response.Cookies;
  
  
   SetEncod(response.CharacterSet);
  
   return System.Web.HttpUtility.HtmlDecode(System.Web.HttpUtility.UrlDecode( CHtml.HtmlStr(response.GetResponseStream(), tmpEncod)));
   }
   }
  
   /// <summary>
   /// 功能描述:在PostLogin成功登录后记录下Headers中的cookie,然后获取此网站上其他页面的内容
   /// </summary>
   /// <param name="strURL">获取网站的某页面的地址</param>
   /// <param name="strReferer">引用的地址</param>
   /// <returns>返回页面内容</returns>
   public string GetPage(string strURL, string strReferer)
   {
  
   HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(strURL);
   ProxySetting(myHttpWebRequest);
   NetworkCredentialSetting(myHttpWebRequest);
   myHttpWebRequest.ContentType = "text/html";
   myHttpWebRequest.Method = "GET";
   // myHttpWebRequest.Timeout = 5000;
   if (!string.IsNullOrEmpty(strReferer))
   myHttpWebRequest.Referer = strReferer;
   myHttpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; MAXTHON 2.0)";
   myHttpWebRequest.Headers.Add("cookie:" + cookieHeader);
   CookieContainer myCookieContainer = new CookieContainer();
   Console.WriteLine(myHttpWebRequest.TransferEncoding);
   using (HttpWebResponse response = (HttpWebResponse)myHttpWebRequest.GetResponse())
   {
   SetEncod(response.CharacterSet);
  
   return System.Web.HttpUtility.HtmlDecode(System.Web.HttpUtility.UrlDecode(CHtml.HtmlStr(response.GetResponseStream(), tmpEncod)));
   }
   }
  
  
   }
  
   /// <summary>
   /// 取网页数据
   /// </summary>
   public class GetHtml
   {
   public GetHtml()
   {
  
   }
   ~GetHtml()
   {
  
   }
  
  
   /// <summary>
   /// 模拟提交表单POST
   /// System.Collections.Specialized.NameValueCollection PostVars =new System.Collections.Specialized.NameValueCollection()
   /// PostVars.Add("uid","name");
   /// PostVars.Add("pwd","123456");
   /// GetStrHtmlPost(url,PostVars)
   /// </summary>
   /// <param name="url">地址</param>
   /// <param name="PostVars">PostValue</param>
   /// <returns></returns>
   public static string GetStrHtmlPost(String url, System.Collections.Specialized.NameValueCollection PostVars)
   {
   if (PostVars == null)
   return "";
   System.Net.WebClient WebClientObj = new System.Net.WebClient();
  
   string html;
   try
   {
   byte[] buf = WebClientObj.UploadValues(url, "POST", PostVars);
   //下面都没用啦,就上面一句话就可以了
   html = System.Text.Encoding.Default.GetString(buf);
   Encoding encoding = CHtml.GetEncoding(html);
   if (encoding == Encoding.UTF8) return html;
   return encoding.GetString(buf);
  
   }
   catch
   {
  
   }
  
   return "";
  
   }
  
   /// <summary>
   /// 获取网页的HTML内容
   /// </summary>
   /// <param name="url">url</param>
   /// <returns></returns>
   public static string GetStrHtml(string url)
   {
   return GetStrHtml(url, null);
   }
   /// <summary>
   /// 获取网页的HTML内容
   /// </summary>
   /// <param name="url">URL</param>
   /// <param name="encoding">Encoding</param>
   /// <returns></returns>
   public static string GetStrHtml(string url, Encoding encoding)
   {
   byte[] buf = new WebClient().DownloadData(url);
   if (encoding != null) return encoding.GetString(buf);
   string html = Encoding.UTF8.GetString(buf);
   encoding = CHtml.GetEncoding(html);
   if (encoding == Encoding.UTF8) return html;
   return encoding.GetString(buf);
   }   
   }
  }