java爬虫
来源:互联网 发布:linux mount命令详解 编辑:程序博客网 时间:2024/05/16 10:51
package com.elan.fenjainli.login;
import java.io.*;
import java.sql.*;
import java.util.*;
import org.apache.http.*;
import org.apache.http.client.*;
import org.apache.http.client.entity.*;
import org.apache.http.client.methods.*;
import org.apache.http.impl.client.*;
import org.apache.http.message.*;
import org.apache.http.util.*;
public class FenJianLiSimulate {
@SuppressWarnings("deprecation")
private static HttpClient httpClient = new DefaultHttpClient();
/**
* 模拟登陆
*/
public static void fenResume() {
// 预访问的网址
String login_src = "http://www.*.com/login/login.htm";
// 创建请求Post实例
HttpPost httpPost = new HttpPost(login_src);
// 设置头部信息进行浏览器模拟行为 通过抓包获得。
httpPost.addHeader(new BasicHeader("Host", "www.*.com"));
httpPost.addHeader("Origin", "http://www.*.com");
httpPost.addHeader("Referer", "http://www.*.com/login/home.htm");
httpPost.addHeader("User-Agent",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36");
httpPost.addHeader("X-Requested-With", "XMLHttpRequest");
// 构建参数,即模拟需要输入的参数。通过抓包获得。
List<NameValuePair> list = new ArrayList<NameValuePair>();
list.add(new BasicNameValuePair("username", "用户名")); //
list.add(new BasicNameValuePair("password", "密码")); //
list.add(new BasicNameValuePair("rememberMe", "1"));
// 向对方服务器发送Post请求
try {
// 向后台请求数据,登陆网站
httpPost.setEntity(new UrlEncodedFormEntity(list));
HttpResponse response = httpClient.execute(httpPost);
HttpEntity entity = response.getEntity();
// String result =
EntityUtils.toString(entity, "GBK");
// System.out.println(result);
System.out.println("登陆成功!");
} catch (Exception e) {
e.printStackTrace();
}
}
// 登陆后,便可以输入一个或者多个url,进行请求
public static String gethtml(String redirectLocation) throws ParseException, IOException {
// Create a response handler
HttpResponse response = getRawHtml(httpClient, redirectLocation);
// 获取状态
int StatusCode = response.getStatusLine().getStatusCode();
// System.out.println("StatusCode =====> " + StatusCode); // 输出状态
String s = "";
if (StatusCode == 200) {
// 由于此方法总是出现乱码
s = EntityUtils.toString(response.getEntity(), "UTF-8");
// 解析实体内容
EntityUtils.consume(response.getEntity());
} else {
// 关闭HttpEntity的流实体
EntityUtils.consume(response.getEntity());
}
return s;
}
/**
* 获取网页数据
*
* @param client
* @param personalUrl
* 网址
* @return
*/
public static HttpResponse getRawHtml(HttpClient client, String personalUrl) {
// 创建请求方法,这里可采用get方式
HttpGet getMethod = new HttpGet(personalUrl);
HttpResponse response = new BasicHttpResponse(HttpVersion.HTTP_1_1, HttpStatus.SC_OK, "OK");
try {
// 执行get方法,请求数据
response = client.execute(getMethod);
} catch (Exception e) {
e.printStackTrace();
}
return response;
}
public static void main(String[] args) throws ClientProtocolException, IOException, SQLException {
// 模拟登陆程序
fenResume();
// 请求个人简历页面
String html = gethtml("");
// String html = gethtml("http://www.fenjianli.com/search/search.htm?keywords=%E8%BD%AF%E4%BB%B6%E5%BC%80%E5%8F%91&rows=30&sortBy=1&sortType=1&offset=0&_random=0.2025503635047159");
System.out.println(html);
// 解析个人页面
// Parse.parseurl(html);
}
import java.io.*;
import java.sql.*;
import java.util.*;
import org.apache.http.*;
import org.apache.http.client.*;
import org.apache.http.client.entity.*;
import org.apache.http.client.methods.*;
import org.apache.http.impl.client.*;
import org.apache.http.message.*;
import org.apache.http.util.*;
public class FenJianLiSimulate {
@SuppressWarnings("deprecation")
private static HttpClient httpClient = new DefaultHttpClient();
/**
* 模拟登陆
*/
public static void fenResume() {
// 预访问的网址
String login_src = "http://www.*.com/login/login.htm";
// 创建请求Post实例
HttpPost httpPost = new HttpPost(login_src);
// 设置头部信息进行浏览器模拟行为 通过抓包获得。
httpPost.addHeader(new BasicHeader("Host", "www.*.com"));
httpPost.addHeader("Origin", "http://www.*.com");
httpPost.addHeader("Referer", "http://www.*.com/login/home.htm");
httpPost.addHeader("User-Agent",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36");
httpPost.addHeader("X-Requested-With", "XMLHttpRequest");
// 构建参数,即模拟需要输入的参数。通过抓包获得。
List<NameValuePair> list = new ArrayList<NameValuePair>();
list.add(new BasicNameValuePair("username", "用户名")); //
list.add(new BasicNameValuePair("password", "密码")); //
list.add(new BasicNameValuePair("rememberMe", "1"));
// 向对方服务器发送Post请求
try {
// 向后台请求数据,登陆网站
httpPost.setEntity(new UrlEncodedFormEntity(list));
HttpResponse response = httpClient.execute(httpPost);
HttpEntity entity = response.getEntity();
// String result =
EntityUtils.toString(entity, "GBK");
// System.out.println(result);
System.out.println("登陆成功!");
} catch (Exception e) {
e.printStackTrace();
}
}
// 登陆后,便可以输入一个或者多个url,进行请求
public static String gethtml(String redirectLocation) throws ParseException, IOException {
// Create a response handler
HttpResponse response = getRawHtml(httpClient, redirectLocation);
// 获取状态
int StatusCode = response.getStatusLine().getStatusCode();
// System.out.println("StatusCode =====> " + StatusCode); // 输出状态
String s = "";
if (StatusCode == 200) {
// 由于此方法总是出现乱码
s = EntityUtils.toString(response.getEntity(), "UTF-8");
// 解析实体内容
EntityUtils.consume(response.getEntity());
} else {
// 关闭HttpEntity的流实体
EntityUtils.consume(response.getEntity());
}
return s;
}
/**
* 获取网页数据
*
* @param client
* @param personalUrl
* 网址
* @return
*/
public static HttpResponse getRawHtml(HttpClient client, String personalUrl) {
// 创建请求方法,这里可采用get方式
HttpGet getMethod = new HttpGet(personalUrl);
HttpResponse response = new BasicHttpResponse(HttpVersion.HTTP_1_1, HttpStatus.SC_OK, "OK");
try {
// 执行get方法,请求数据
response = client.execute(getMethod);
} catch (Exception e) {
e.printStackTrace();
}
return response;
}
public static void main(String[] args) throws ClientProtocolException, IOException, SQLException {
// 模拟登陆程序
fenResume();
// 请求个人简历页面
String html = gethtml("");
// String html = gethtml("http://www.fenjianli.com/search/search.htm?keywords=%E8%BD%AF%E4%BB%B6%E5%BC%80%E5%8F%91&rows=30&sortBy=1&sortType=1&offset=0&_random=0.2025503635047159");
System.out.println(html);
// 解析个人页面
// Parse.parseurl(html);
}
}
阅读全文
1 0
- JS爬虫,Java爬虫
- JAVA爬虫
- Java 爬虫
- Java 爬虫
- Java 爬虫
- java爬虫
- java 爬虫
- Java爬虫
- java爬虫
- Java爬虫
- java 爬虫
- java爬虫
- java爬虫
- java 爬虫
- java爬虫
- java爬虫
- Java爬虫
- java爬虫
- 返回键
- 为什么析构函数常定义成虚函数
- 浮点数在内存中的存储形式
- 遍历List集合的三种方法
- MongoDB的聚合查询(两种方式)
- java爬虫
- 防止文本框自动获取焦点
- 用户注册
- 第五周 项目3
- 《数据结构与算法-Python语言描述》读书笔记(1)第1章绪论(关键词:数据结构/算法/Python/时间复杂度/空间复杂度)
- JAVA Freemarker(1)--入门示例
- commons双向Map和Bag包
- 最佳的75个安全工具
- MAC CPU-only Python3 安装 Caffe遇到的坑(弃疗)