模拟Form提交,得到结果页内容
来源:互联网 发布:php str contains 编辑:程序博客网 时间:2024/04/30 06:47
/// <summary> /// 根据任务要素构造抓取的url /// </summary> protected override string createUrl(int pageIndex) { NameValueCollection PostVars = new NameValueCollection(); PostVars.Add("mod", "forum"); PostVars.Add("srchtype", "title"); PostVars.Add("srhfid", "0"); PostVars.Add("srhlocality", "portal::index"); PostVars.Add("srchtxt", keyWord); string sRemoteInfo = CommonFunction.getSearchResault(PostVars, "http://bbs.tuniu.com/search.php?searchsubmit=yes", true); Regex regexRtCt = new Regex(@"<div class=""pgs cl mbm""><div class=""pg"">[\s\S]+?</div></div>"); MatchCollection matchRtCtList = regexRtCt.Matches(sRemoteInfo); Regex regexRtCtHref = new Regex(@"<a[\s\S]+?>[\s\S]+?</a>"); Regex regexRtCtURL = new Regex(@"href=""[\s\S]+?"">"); string strUrlTemp = "http://bbs.tuniu.com/" + regexRtCtURL.Match(regexRtCtHref.Matches(matchRtCtList[0].Value)[0].Value).Value.Replace("href=\"", "").Replace("\">", "").Replace("page=2", "page="); string strUrl = strUrlTemp + pageIndex; return strUrl; }
/// <summary> /// 模拟Form提交,得到结果页内容 /// </summary> public static string getSearchResault(NameValueCollection PostVars, string searchUrl, bool UrlEncodeUtf8) { WebClient WebClientObj = new WebClient(); string sRemoteInfo = ""; try { byte[] byRemoteInfo = WebClientObj.UploadValues(searchUrl, "POST", PostVars); if (UrlEncodeUtf8) { sRemoteInfo = System.Text.Encoding.UTF8.GetString(byRemoteInfo); } else { sRemoteInfo = System.Text.Encoding.Default.GetString(byRemoteInfo); } } catch { } return sRemoteInfo; }