java利用url解析网页内容并模拟手动form提交数据

来源:互联网 发布:福富软件待遇 编辑:程序博客网 时间:2024/05/16 07:06
package com.test;import java.io.BufferedReader;import java.io.ByteArrayOutputStream;import java.io.DataInputStream;import java.io.DataOutputStream;import java.io.File;import java.io.FileOutputStream;import java.io.FileWriter;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.OutputStream;import java.io.PrintWriter;import java.net.HttpURLConnection;import java.net.URL;import java.net.URLConnection;import java.net.URLEncoder;import java.nio.charset.Charset;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.StringTokenizer;/** *  *  * @author: ZhouQiShan * @since: 2011-06-09 */public class CookieUtil {public final static String CONTENT_TYPE = "Content-Type";public static void setProxy(String host, String port) {          System.setProperty("proxySet", "true");          System.setProperty("proxyHost", host);          System.setProperty("proxyPort", port);      }  public static Content curl(String method, String sUrl,Map<String, String> paramMap, Map<String, String> requestHeaderMap,boolean isOnlyReturnHeader,String path) {Content content = null;HttpURLConnection httpUrlConnection = null;InputStream in = null; setProxy("172.16.55.51", "808"); try {URL url = new URL(sUrl);boolean isPost = "POST".equals(method);if (method == null|| (!"GET".equalsIgnoreCase(method) && !"POST".equalsIgnoreCase(method))) {method = "POST";}URL resolvedURL = url;if ("GET".equals(method) && paramMap != null) {boolean firstParam = true;StringBuffer newUrlBuffer = new StringBuffer(url.toExternalForm());if (url.getQuery() == null) {newUrlBuffer.append("?");} else {newUrlBuffer.append("&");}for (Map.Entry<String, String> entry : paramMap.entrySet()) {String encName = URLEncoder.encode(entry.getKey(), "UTF-8");if (firstParam) {firstParam = false;} else {newUrlBuffer.append("&");}String encValue = URLEncoder.encode(entry.getValue(),"UTF-8");newUrlBuffer.append(encName);newUrlBuffer.append("=");newUrlBuffer.append(encValue);}resolvedURL = new java.net.URL(newUrlBuffer.toString());}URLConnection urlConnection = resolvedURL.openConnection();httpUrlConnection = (HttpURLConnection) urlConnection;httpUrlConnection.setRequestMethod(method);httpUrlConnection.setRequestProperty("Accept-Language", "zh-cn,zh;q=0.5");// Do not follow redirects, We will handle redirects ourselfhttpUrlConnection.setInstanceFollowRedirects(false);urlConnection.setDoOutput(true);urlConnection.setDoInput(true);urlConnection.setConnectTimeout(5000);urlConnection.setReadTimeout(5000);urlConnection.setUseCaches(false);urlConnection.setDefaultUseCaches(false);// set request headerif (requestHeaderMap != null) {for (Map.Entry<String, String> entry : requestHeaderMap.entrySet()) {String key = entry.getKey();String val = entry.getValue();if (key != null && val != null) {urlConnection.setRequestProperty(key, val);}}}if (isPost) {urlConnection.setDoOutput(true);ByteArrayOutputStream bufOut = new ByteArrayOutputStream();boolean firstParam = true;for (Map.Entry<String, String> entry : paramMap.entrySet()) {String encName = URLEncoder.encode(entry.getKey(), "UTF-8");if (firstParam) {firstParam = false;} else {bufOut.write((byte) '&');}String encValue = URLEncoder.encode(entry.getValue(),"UTF-8");bufOut.write(encName.getBytes("UTF-8"));bufOut.write((byte) '=');bufOut.write(encValue.getBytes("UTF-8"));}byte[] postContent = bufOut.toByteArray();if (urlConnection instanceof HttpURLConnection) {((HttpURLConnection) urlConnection).setFixedLengthStreamingMode(postContent.length);}OutputStream postOut = urlConnection.getOutputStream();postOut.write(postContent);postOut.flush();postOut.close();}httpUrlConnection.connect();int responseCode = httpUrlConnection.getResponseCode();// We handle redirects ourselfif (responseCode == HttpURLConnection.HTTP_MOVED_PERM|| responseCode == HttpURLConnection.HTTP_MOVED_TEMP) {String location = httpUrlConnection.getHeaderField("Location");URL newAction = new URL(url, location);// RecurseStringBuffer newUrlSb = new StringBuffer(newAction.getProtocol()+ "://" + newAction.getHost());if (newAction.getPort() != -1) {newUrlSb.append(":" + newAction.getPort());}if (newAction.getPath() != null) {newUrlSb.append(newAction.getPath());}if (newAction.getQuery() != null) {newUrlSb.append("?" + newAction.getQuery());}if (newAction.getRef() != null) {newUrlSb.append("#" + newAction.getRef());}return curl("POST", newUrlSb.toString(), paramMap, requestHeaderMap,isOnlyReturnHeader,path);} else if (responseCode == HttpURLConnection.HTTP_OK|| responseCode == HttpURLConnection.HTTP_CREATED) {byte[] bytes = new byte[0];if (!isOnlyReturnHeader) {if(isPost){in = httpUrlConnection.getInputStream();ByteArrayOutputStream bout = new ByteArrayOutputStream();byte[] buf = new byte[1024];while (true) {int rc = in.read(buf);if (rc <= 0) {break;} else {bout.write(buf, 0, rc);}}bytes = bout.toByteArray();in.close();}else{ DataInputStream ins = new DataInputStream(httpUrlConnection      .getInputStream()); //验证码的位置    DataOutputStream out = new DataOutputStream(new FileOutputStream(      path+"/code.bmp"));    byte[] buffer = new byte[4096];    int count = 0;    while ((count = ins.read(buffer)) > 0) {     out.write(buffer, 0, count);    }        out.close();    ins.close();}}// only fetch Content-Length and Last-Modified headerString encoding = null;if (encoding == null) {encoding = getEncodingFromContentType(httpUrlConnection.getHeaderField(CONTENT_TYPE));}content = new Content(sUrl, new String(bytes, encoding),httpUrlConnection.getHeaderFields());}} catch (Exception e) {return null;} finally {if (httpUrlConnection != null) {httpUrlConnection.disconnect();}}return content;}public static String getEncodingFromContentType(String contentType) {String encoding = null;if (contentType == null) {return null;}StringTokenizer tok = new StringTokenizer(contentType, ";");if (tok.hasMoreTokens()) {tok.nextToken();while (tok.hasMoreTokens()) {String assignment = tok.nextToken().trim();int eqIdx = assignment.indexOf('=');if (eqIdx != -1) {String varName = assignment.substring(0, eqIdx).trim();if ("charset".equalsIgnoreCase(varName)) {String varValue = assignment.substring(eqIdx + 1).trim();if (varValue.startsWith("\"")&& varValue.endsWith("\"")) {// substring works on indicesvarValue = varValue.substring(1,varValue.length() - 1);}if (Charset.isSupported(varValue)) {encoding = varValue;}}}}}if (encoding == null) {return "UTF-8";}return encoding;}public static void main(String[] args) {// login//System.out.println(getHtmlReadLine("http://www.zhaopin.com/"));//验证码的位置Content content = curl("GET", "http://www.haoshijia.com.cn/register/index/verify", null, null, false,"d:/");// build request headers & do rate of user reviewList<String> lsit = content.getHeaders().get("Set-Cookie");Map<String, String> resmap = new HashMap<String, String>();if (lsit != null) {StringBuffer sb = new StringBuffer();boolean isLast = false;int i = 0;for (String val : lsit) {i++;if (i == lsit.size()) {isLast = true;}int pos = val.indexOf("=");if (pos != -1) {String cookieName = val.substring(0, pos);String cookieVal = val.substring(pos + 1);System.out.println(cookieName+":"+cookieVal);cookieVal = cookieVal.split(";")[0];if (isLast) {sb.append(cookieName + "=" + cookieVal);} else {sb.append(cookieName + "=" + cookieVal + ";");}}}System.out.println(sb.toString());resmap.put("Cookie", sb.toString());}String a="";  System.out.print("请输入验证码:");  BufferedReader strin=new BufferedReader(new InputStreamReader(System.in));  try {a=strin.readLine();}  catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}  System.out.println("输入的数是:"+a); String email = "xsl0218";String pass = "zqs021823";String loginUrl = "http://www.haoshijia.com.cn/register/index/logincheck";String rateReviewUrl = "http://www.haoshijia.com.cn/member/index/index";Map<String, String> paramMap = new HashMap<String, String>();paramMap.put("login_name", email);paramMap.put("login_password", pass);//paramMap.put("login", "1");paramMap.put("login_code", a+"");content = curl("POST", loginUrl, paramMap, resmap, false,"");//System.out.println(content.getBody());// build request headers & do rate of user reviewparamMap = new HashMap<String, String>();content = curl("POST", rateReviewUrl, paramMap, resmap, false,"");inFile(content.getBody(), "D:/sss.txt");System.out.println(content.getBody());}// 这个是输出public static boolean inFile(String content, String path) {PrintWriter out = null;File file = new File(path);try {if (!file.exists()) {file.createNewFile();}out = new PrintWriter(new FileWriter(file));out.write(content);out.flush();return true;} catch (Exception e) {e.printStackTrace();} finally {out.close();}return false;}public static String getHtmlReadLine(String httpurl){String CurrentLine="";String TotalString="";InputStream urlStream;String content="";try {URL url = new URL(httpurl);HttpURLConnection connection = (HttpURLConnection)url.openConnection();connection.connect();System.out.println(connection.getResponseCode());urlStream = connection.getInputStream();BufferedReader reader = new BufferedReader(new InputStreamReader(urlStream,"utf-8"));while ((CurrentLine = reader.readLine()) != null) {TotalString += CurrentLine+"\n";}content = TotalString;} catch (Exception e) {}return content;}}class Content {private String url;private String body;private Map<String, List<String>> m_mHeaders = new HashMap<String, List<String>>();public Content(String url, String body, Map<String, List<String>> headers) {this.url = url;this.body = body;this.m_mHeaders = headers;}public String getUrl() {return url;}public String getBody() {return body;}public Map<String, List<String>> getHeaders() {return m_mHeaders;}}