取网页内容拼分析

来源:互联网 发布:企业级大数据平台 编辑:程序博客网 时间:2024/04/30 01:23
C# code
public XmlNode GetStudentInfo(string ID, string psw, out Stream Photo) { XmlDocument xdoc = new XmlDocument(); XmlNode StdInfo = xdoc.CreateElement("StudentInfo"); XmlNode xtmp = xdoc.CreateElement("xtml"); XmlAttribute xatt; string __EVENTTARGET, __EVENTARGUMENT, __LASTFOCUS, __VIEWSTATE, __PREVIOUSPAGE, __EVENTVALIDATION; HttpWebRequest req; HttpWebResponse resp; Cookie AUTH, ROLES; Regex rg; string PostFrame, PostArgs, PostData, RecData; byte[] buf; req = HttpWebRequest.Create(@"http://IP/home.aspx") as HttpWebRequest; resp = req.GetResponse() as HttpWebResponse; RecData = new StreamReader(resp.GetResponseStream()).ReadToEnd(); GetField(RecData, out __EVENTTARGET, out __EVENTARGUMENT, out __LASTFOCUS, out __VIEWSTATE, out __PREVIOUSPAGE, out __EVENTVALIDATION); req = HttpWebRequest.Create(@"http://IP/home.aspx") as HttpWebRequest; req.Method = "POST"; req.ContentType = "application/x-www-form-urlencoded"; req.AllowAutoRedirect = false; PostArgs = string.Format(Properties.Resources.PostLoginArgs, ID, psw); PostData = string.Format(Properties.Resources.PostFrame, "", "", __LASTFOCUS, __VIEWSTATE, PostArgs, __PREVIOUSPAGE, __EVENTVALIDATION) + Properties.Resources.PostLoginButtonArg; buf = Encoding.UTF8.GetBytes(PostData); req.ContentLength = buf.Length; req.GetRequestStream().Write(buf, 0, buf.Length); resp = req.GetResponse() as HttpWebResponse; string[] cp = resp.Headers["Set-Cookie"].Split(';')[0].Split('='); AUTH = new Cookie(cp[0], cp[1], "/", "IP"); req = HttpWebRequest.Create(@"http://IP/middle.aspx") as HttpWebRequest; req.AllowAutoRedirect = false; req.CookieContainer = new CookieContainer(); req.CookieContainer.Add(AUTH); resp = req.GetResponse() as HttpWebResponse; cp = resp.Headers["Set-Cookie"].Split(';')[0].Split('='); ROLES = new Cookie(cp[0], cp[1], "/", "IP"); HttpWebRequest hr = HttpWebRequest.Create(@"http://IP/StudentUI/StuFunction/xj_infcheck.aspx") as HttpWebRequest; req = HttpWebRequest.Create(@"http://IP/StudentUI/StuFunction/xj_infcheck.aspx") as HttpWebRequest; req.CookieContainer = new CookieContainer(); req.CookieContainer.Add(AUTH); req.CookieContainer.Add(ROLES); resp = req.GetResponse() as HttpWebResponse;//如果没有记错,就是这个地方超时 RecData = new StreamReader(resp.GetResponseStream()).ReadToEnd(); rg = new Regex(@"<input name=""ctl00/$ContentPlaceHolder1/$.*?"" type=""text"" value="".*?"" .*?/>"); MatchCollection mc = rg.Matches(RecData); foreach(Match item in mc) { xtmp.InnerXml = item.Value; xatt = xdoc.CreateAttribute(xtmp.FirstChild.Attributes["name"].Value.Substring(28)); xatt.Value = xtmp.FirstChild.Attributes["value"].Value; StdInfo.Attributes.Append(xatt); } rg = new Regex(@"(?<=<img id=""ctl00_ContentPlaceHolder1_imZP"" src=""../../).*?(?="" alt)"); req = HttpWebRequest.Create(@"http://IP/" + System.Web.HttpUtility.HtmlDecode(rg.Match(RecData).Value)) as HttpWebRequest; req.CookieContainer = new CookieContainer(); req.CookieContainer.Add(AUTH); req.CookieContainer.Add(ROLES); Photo = req.GetResponse().GetResponseStream(); return StdInfo; } static void GetField(string Data, out string __EVENTTARGET, out string __EVENTARGUMENT, out string __LASTFOCUS, out string __VIEWSTATE, out string __PREVIOUSPAGE, out string __EVENTVALIDATION) { __EVENTTARGET = null; __EVENTARGUMENT = null; __LASTFOCUS = null; __VIEWSTATE = null; __PREVIOUSPAGE = null; __EVENTVALIDATION = null; Regex rg = new Regex(@"<input type=""hidden"" name=""__.*/>"); MatchCollection mc = rg.Matches(Data); foreach(Match item in mc) { XmlDocument xdoc = new XmlDocument(); xdoc.InnerXml = item.Value; string name = xdoc.FirstChild.Attributes["name"].Value; string value = xdoc.FirstChild.Attributes["value"].Value; switch(name) { case "__EVENTTARGET": __EVENTTARGET = value; break; case "__EVENTARGUMENT": __EVENTARGUMENT = value; break; case "__VIEWSTATE": __VIEWSTATE = value; break; case "__PREVIOUSPAGE": __PREVIOUSPAGE = value; break; case "__EVENTVALIDATION": __EVENTVALIDATION = value; break; } } __EVENTTARGET = System.Web.HttpUtility.UrlEncode(__EVENTTARGET); __EVENTARGUMENT = System.Web.HttpUtility.UrlEncode(__EVENTARGUMENT); __LASTFOCUS = System.Web.HttpUtility.UrlEncode(__LASTFOCUS); __VIEWSTATE = System.Web.HttpUtility.UrlEncode(__VIEWSTATE); __PREVIOUSPAGE = System.Web.HttpUtility.UrlEncode(__PREVIOUSPAGE); __EVENTVALIDATION = System.Web.HttpUtility.UrlEncode(__EVENTVALIDATION); } }
原创粉丝点击