模拟web访问有登录且有验证码的登录后抓取数据

来源:互联网 发布:淘宝儿童女装14岁 编辑:程序博客网 时间:2024/05/01 08:04

模拟web访问有登录且有验证码的登录后抓取数据

1 取验证码

1 在窗体上放一个picturebox (imgValidate)存放获取的验证码图片,
2 用浏览器的开发者工具firefox (f12) 分析出验证码的网址
private void GetValidateImage()
        {
            cookies = new CookieContainer();
            string strUrl = "http://www.xxx.com/ValidateCodePicture.aspx?Key="+strValidCode;  //验证码页面 strValidCode这个随机码要先取出来

            CookieContainer cc = new CookieContainer();
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strUrl);

            //set request args
            request.Method = "Get";
            request.CookieContainer = cc;
            request.KeepAlive = true;

            //request.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
            request.ContentType = "text/html";

            //模拟goole浏览器访问
            request.UserAgent =
                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36";
            //request.Referer = strUrl;
            request.Headers.Add("x-requested-with:XMLHttpRequest");
            request.Headers.Add(HttpRequestHeader.AcceptLanguage, "zh-CN,zh;q=0.8,en;q=0.6,nl;q=0.4,zh-TW;q=0.2");
            //request.ContentLength = postdataByte.Length;  text/html; charset=utf-8
            request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
            request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip |
                                             DecompressionMethods.None;
            //支持跳转页面,查询结果将是跳转后的页面
            ////request.AllowAutoRedirect = true;

            request.Headers.Add("Accept-Encoding", "gzip, deflate");
            if (request.Method == "POST")
            {
                (request as HttpWebRequest).ContentType = "application/x-www-form-urlencoded";
            }

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();

            MemoryStream ms = null;
            using (var stream = response.GetResponseStream())
            {
                Byte[] buffer = new Byte[response.ContentLength];
                int offset = 0, actuallyRead = 0;
                do
                {
                    actuallyRead = stream.Read(buffer, offset, buffer.Length - offset);
                    offset += actuallyRead;
                }
                while (actuallyRead > 0);
                ms = new MemoryStream(buffer);
            }
            response.Close();

            cookies = request.CookieContainer; //保存cookies
            strCookies = request.CookieContainer.GetCookieHeader(request.RequestUri); //把cookies转换成字符串

            Bitmap sourcebm = new Bitmap((Stream)ms);//初始化Bitmap图片
            imgValidate.Image = sourcebm;
        }

2 取js赋值的内容

  有的网页用查看网页源代码的方式看不到控件的值,需要用到下面的方法
  即用C#自带的webbrowse来加载网页,再用webBrowser1.Document来取对应控件的值,如
  tring strMsg2 = webBrowser1.Document.GetElementById("hdValidateCodeID").OuterHtml;

3 取得要提交的参数

  如果是asp.net的网页还有提交”__EVENTTARGET“,"__EVENTARGUMENT","__VIEWSTATE"这三个参数,这个也可以在开发者工具-网络-参数里看到
可以用httpRequest先取得源代码再分析出
这里用的是webbrowse里加载好的
  
        private void GetViewState()
        {
            string strMsg = webBrowser1.Document.GetElementById("__VIEWSTATE").OuterHtml;
            //取viewstate value
            //<INPUT id=__VIEWSTATE type=hidden value=/wEPDwUKMTg0NTk3Mjg2N2Rk name=__VIEWSTATE>
            MatchCollection mc = Regex.Matches(strMsg, "id=__VIEWSTATE.*(?<viewstate>value[^>]*)", RegexOptions.IgnoreCase);

            if (mc.Count > 0)
            {
                foreach (Match m in mc)
                {
                    strViewState = m.Groups["viewstate"].Value.ToString().Trim();
                    if (strViewState.Length > 0)
                    {
                        strViewState = strViewState.Replace("value=", "").Replace("\"", "").Replace("\\", "").Replace("name=__VIEWSTATE","").Replace(" ","");
                    }
                }
            }

            //<INPUT id=hdValidateCodeID type=hidden value=c1b52d3a-1f8b-1dc4-0d44-32a4b46ef8af name=hdValidateCodeID>
            string strMsg2 = webBrowser1.Document.GetElementById("hdValidateCodeID").OuterHtml;
            MatchCollection mc2 = Regex.Matches(strMsg2, "id=hdValidateCodeID.*(?<validatecode>value[^>]*)", RegexOptions.IgnoreCase);

            if (mc2.Count > 0)
            {
                foreach (Match m in mc2)
                {
                    strValidCode = m.Groups["validatecode"].Value.ToString().Trim();
                    if (strValidCode.Length > 0)
                    {
                        strValidCode = strValidCode.Replace("value=", "").Replace("\"", "").Replace("\\", "").Replace("/", "").Replace("name=hdValidateCodeID","").Replace(" ","");
                    }
                }
            }
            txtValidCode.Text = strValidCode;
            txtViewState.Text = strViewState;

            //String 的Cookie 要转成 Cookie型的 并放入CookieContainer中  
            string cookieStr = webBrowser1.Document.Cookie;
            string[] cookstr = cookieStr.Split(';');

            foreach (string str in cookstr)
            {
                try
                {
                    string[] cookieNameValue = str.Split('=');
                    Cookie ck = new Cookie(cookieNameValue[0].Trim().ToString(), cookieNameValue[1].Trim().ToString());
                    ck.Domain = "XXX.com"; //必须写对  
                    myCookieContainer.Add(ck);
                }
                catch
                {
                }
            }  
        }
  

4 登录并且存取cookie

提交参数,并存下cookie,供后续用
private void Login()
        {
            cookies = new CookieContainer();
            string strUrl = "http://www.xxx.com/Login.aspx";  //验证码页面

            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strUrl);

            //set request args
            request.Method = "POST";
            request.CookieContainer = myCookieContainer;
            request.KeepAlive = true;

            //request.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
            request.ContentType = "text/html";


            //模拟goole浏览器访问
            request.UserAgent =
                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36";
            //request.Referer = strUrl;
            request.Headers.Add("x-requested-with:XMLHttpRequest");
            request.Headers.Add(HttpRequestHeader.AcceptLanguage, "zh-CN,zh;q=0.8,en;q=0.6,nl;q=0.4,zh-TW;q=0.2");
            //request.ContentLength = postdataByte.Length;  text/html; charset=utf-8
            request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
            request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip |
                                             DecompressionMethods.None;
            //支持跳转页面,查询结果将是跳转后的页面
            ////request.AllowAutoRedirect = true;

            request.Headers.Add("Accept-Encoding", "gzip, deflate");
            if (request.Method == "POST")
            {
                (request as HttpWebRequest).ContentType = "application/x-www-form-urlencoded";
            }

            //---begin
           
            string postData = string.Format("txtUserName={0}&txtPassword={1}&txtValidateCode={2}&hdValidateCodeID={3}&ddlLanguage=CN&btnLogin=登录&__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE={4}", txtUserName.Text, txtPassword.Text, txtValidate.Text,strValidCode,strViewState);  //这里按照前面FireBug中查到的POST字符串做相应修改。
            byte[] postdatabyte = Encoding.UTF8.GetBytes(postData);
           
            request.ContentLength = postdatabyte.Length;

            using (Stream stream = request.GetRequestStream())
            {
                stream.Write(postdatabyte, 0, postdatabyte.Length);
            }
            //---end---

            HttpWebResponse response = (HttpWebResponse)request.GetResponse();

            //StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312"));
            StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
            string strMsg = reader.ReadToEnd();

            response.Close();

            cookies = request.CookieContainer; //保存cookies,后面再请求其它网页就可用这个cookie,不用在登录了
            lbLogin.Text = "已登录";
            btnSearchResume.Enabled = true;

        }

1 0
原创粉丝点击