模拟web访问有登录且有验证码的登录后抓取数据
来源:互联网 发布:淘宝儿童女装14岁 编辑:程序博客网 时间:2024/05/01 08:04
模拟web访问有登录且有验证码的登录后抓取数据
1 取验证码
1 在窗体上放一个picturebox (imgValidate)存放获取的验证码图片,2 用浏览器的开发者工具firefox (f12) 分析出验证码的网址
private void GetValidateImage()
{
cookies = new CookieContainer();
string strUrl = "http://www.xxx.com/ValidateCodePicture.aspx?Key="+strValidCode; //验证码页面 strValidCode这个随机码要先取出来
CookieContainer cc = new CookieContainer();
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strUrl);
//set request args
request.Method = "Get";
request.CookieContainer = cc;
request.KeepAlive = true;
//request.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
request.ContentType = "text/html";
//模拟goole浏览器访问
request.UserAgent =
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36";
//request.Referer = strUrl;
request.Headers.Add("x-requested-with:XMLHttpRequest");
request.Headers.Add(HttpRequestHeader.AcceptLanguage, "zh-CN,zh;q=0.8,en;q=0.6,nl;q=0.4,zh-TW;q=0.2");
//request.ContentLength = postdataByte.Length; text/html; charset=utf-8
request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip |
DecompressionMethods.None;
//支持跳转页面,查询结果将是跳转后的页面
////request.AllowAutoRedirect = true;
request.Headers.Add("Accept-Encoding", "gzip, deflate");
if (request.Method == "POST")
{
(request as HttpWebRequest).ContentType = "application/x-www-form-urlencoded";
}
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
MemoryStream ms = null;
using (var stream = response.GetResponseStream())
{
Byte[] buffer = new Byte[response.ContentLength];
int offset = 0, actuallyRead = 0;
do
{
actuallyRead = stream.Read(buffer, offset, buffer.Length - offset);
offset += actuallyRead;
}
while (actuallyRead > 0);
ms = new MemoryStream(buffer);
}
response.Close();
cookies = request.CookieContainer; //保存cookies
strCookies = request.CookieContainer.GetCookieHeader(request.RequestUri); //把cookies转换成字符串
Bitmap sourcebm = new Bitmap((Stream)ms);//初始化Bitmap图片
imgValidate.Image = sourcebm;
}
2 取js赋值的内容
有的网页用查看网页源代码的方式看不到控件的值,需要用到下面的方法即用C#自带的webbrowse来加载网页,再用webBrowser1.Document来取对应控件的值,如
tring strMsg2 = webBrowser1.Document.GetElementById("hdValidateCodeID").OuterHtml;
3 取得要提交的参数
如果是asp.net的网页还有提交”__EVENTTARGET“,"__EVENTARGUMENT","__VIEWSTATE"这三个参数,这个也可以在开发者工具-网络-参数里看到可以用httpRequest先取得源代码再分析出
这里用的是webbrowse里加载好的
private void GetViewState()
{
string strMsg = webBrowser1.Document.GetElementById("__VIEWSTATE").OuterHtml;
//取viewstate value
//<INPUT id=__VIEWSTATE type=hidden value=/wEPDwUKMTg0NTk3Mjg2N2Rk name=__VIEWSTATE>
MatchCollection mc = Regex.Matches(strMsg, "id=__VIEWSTATE.*(?<viewstate>value[^>]*)", RegexOptions.IgnoreCase);
if (mc.Count > 0)
{
foreach (Match m in mc)
{
strViewState = m.Groups["viewstate"].Value.ToString().Trim();
if (strViewState.Length > 0)
{
strViewState = strViewState.Replace("value=", "").Replace("\"", "").Replace("\\", "").Replace("name=__VIEWSTATE","").Replace(" ","");
}
}
}
//<INPUT id=hdValidateCodeID type=hidden value=c1b52d3a-1f8b-1dc4-0d44-32a4b46ef8af name=hdValidateCodeID>
string strMsg2 = webBrowser1.Document.GetElementById("hdValidateCodeID").OuterHtml;
MatchCollection mc2 = Regex.Matches(strMsg2, "id=hdValidateCodeID.*(?<validatecode>value[^>]*)", RegexOptions.IgnoreCase);
if (mc2.Count > 0)
{
foreach (Match m in mc2)
{
strValidCode = m.Groups["validatecode"].Value.ToString().Trim();
if (strValidCode.Length > 0)
{
strValidCode = strValidCode.Replace("value=", "").Replace("\"", "").Replace("\\", "").Replace("/", "").Replace("name=hdValidateCodeID","").Replace(" ","");
}
}
}
txtValidCode.Text = strValidCode;
txtViewState.Text = strViewState;
//String 的Cookie 要转成 Cookie型的 并放入CookieContainer中
string cookieStr = webBrowser1.Document.Cookie;
string[] cookstr = cookieStr.Split(';');
foreach (string str in cookstr)
{
try
{
string[] cookieNameValue = str.Split('=');
Cookie ck = new Cookie(cookieNameValue[0].Trim().ToString(), cookieNameValue[1].Trim().ToString());
ck.Domain = "XXX.com"; //必须写对
myCookieContainer.Add(ck);
}
catch
{
}
}
}
4 登录并且存取cookie
提交参数,并存下cookie,供后续用private void Login()
{
cookies = new CookieContainer();
string strUrl = "http://www.xxx.com/Login.aspx"; //验证码页面
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strUrl);
//set request args
request.Method = "POST";
request.CookieContainer = myCookieContainer;
request.KeepAlive = true;
//request.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
request.ContentType = "text/html";
//模拟goole浏览器访问
request.UserAgent =
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36";
//request.Referer = strUrl;
request.Headers.Add("x-requested-with:XMLHttpRequest");
request.Headers.Add(HttpRequestHeader.AcceptLanguage, "zh-CN,zh;q=0.8,en;q=0.6,nl;q=0.4,zh-TW;q=0.2");
//request.ContentLength = postdataByte.Length; text/html; charset=utf-8
request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
request.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip |
DecompressionMethods.None;
//支持跳转页面,查询结果将是跳转后的页面
////request.AllowAutoRedirect = true;
request.Headers.Add("Accept-Encoding", "gzip, deflate");
if (request.Method == "POST")
{
(request as HttpWebRequest).ContentType = "application/x-www-form-urlencoded";
}
//---begin
string postData = string.Format("txtUserName={0}&txtPassword={1}&txtValidateCode={2}&hdValidateCodeID={3}&ddlLanguage=CN&btnLogin=登录&__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE={4}", txtUserName.Text, txtPassword.Text, txtValidate.Text,strValidCode,strViewState); //这里按照前面FireBug中查到的POST字符串做相应修改。
byte[] postdatabyte = Encoding.UTF8.GetBytes(postData);
request.ContentLength = postdatabyte.Length;
using (Stream stream = request.GetRequestStream())
{
stream.Write(postdatabyte, 0, postdatabyte.Length);
}
//---end---
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
//StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312"));
StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
string strMsg = reader.ReadToEnd();
response.Close();
cookies = request.CookieContainer; //保存cookies,后面再请求其它网页就可用这个cookie,不用在登录了
lbLogin.Text = "已登录";
btnSearchResume.Enabled = true;
}
1 0
- 模拟web访问有登录且有验证码的登录后抓取数据
- python模拟登录有验证码的网站记录
- HttpClient 模拟登录豆瓣网(有验证码)
- C# 利用 HttpWebRequest 和 HttpWebResponse 模拟登录有验证码的网站
- 记一次C#的web模拟登录抓取
- Python使用mechanize模拟登录、抓取数据的代码
- Python使用mechanize模拟登录、抓取数据的代码
- 网页数据抓取-接前文模拟登录
- PHP CURL模拟登录抓取数据
- 生成验证码且进行登录验证
- 基于验证码模拟登录的爬虫
- 模拟登录时的验证码功能
- 模拟登录抓取页面
- Android登录客户端,验证码的获取,网页数据抓取与解析,HttpWatch基本使用
- csdn的登录验证做的太有“准”啦
- 利用httpclient 模拟登录,获取登录后信息数据
- 利用httpclient 模拟登录,获取登录后信息数据
- 利用httpclient 模拟登录,获取登录后信息数据
- 如何写出好的代码?
- java高级编程的教程
- NineOldAndroids动画兼容库的使用
- 多态中成员函数的特点
- 分布式 Key-Value 存储系统:Cassandra 入门
- 模拟web访问有登录且有验证码的登录后抓取数据
- linux cut 命令详解
- iOS--常见的几种数据存储方式
- 23种设计模式详解
- CentOS 7 最小化安装之后安装Mysql
- Gulp.js-livereload 不用F5了,实时自动刷新页面来开发
- InnoDB锁问题
- iOS后台定位被拒
- 批处理的一些理解