java抓取网页源代码《转载》
来源:互联网 发布:开淘宝网店怎样找货源 编辑:程序博客网 时间:2024/05/22 06:07
public static String getHtmlContent(URL url, String encode) {
StringBuffer contentBuffer = new StringBuffer();
int responseCode = -1;
HttpURLConnection con = null;
try {
con = (HttpURLConnection) url.openConnection();
con.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理进行下载
con.setConnectTimeout(60000);
con.setReadTimeout(60000);
// 获得网页返回信息码
responseCode = con.getResponseCode();
if (responseCode == -1) {
System.out.println(url.toString() + " : connection is failure...");
con.disconnect();
return null;
}
if (responseCode >= 400) // 请求失败
{
System.out.println("请求失败:get response code: " + responseCode);
con.disconnect();
return null;
}
InputStream inStr = con.getInputStream();
InputStreamReader istreamReader = new InputStreamReader(inStr, encode);
BufferedReader buffStr = new BufferedReader(istreamReader);
String str = null;
while ((str = buffStr.readLine()) != null)
contentBuffer.append(str);
inStr.close();
} catch (IOException e) {
e.printStackTrace();
contentBuffer = null;
System.out.println("error: " + url.toString());
} finally {
con.disconnect();
}
return contentBuffer.toString();
}
public static String getHtmlContent(String url, String encode) {
if (!url.toLowerCase().startsWith("http://")) {
url = "http://" + url;
}
try {
URL rUrl = new URL(url);
return getHtmlContent(rUrl, encode);
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
StringBuffer contentBuffer = new StringBuffer();
int responseCode = -1;
HttpURLConnection con = null;
try {
con = (HttpURLConnection) url.openConnection();
con.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理进行下载
con.setConnectTimeout(60000);
con.setReadTimeout(60000);
// 获得网页返回信息码
responseCode = con.getResponseCode();
if (responseCode == -1) {
System.out.println(url.toString() + " : connection is failure...");
con.disconnect();
return null;
}
if (responseCode >= 400) // 请求失败
{
System.out.println("请求失败:get response code: " + responseCode);
con.disconnect();
return null;
}
InputStream inStr = con.getInputStream();
InputStreamReader istreamReader = new InputStreamReader(inStr, encode);
BufferedReader buffStr = new BufferedReader(istreamReader);
String str = null;
while ((str = buffStr.readLine()) != null)
contentBuffer.append(str);
inStr.close();
} catch (IOException e) {
e.printStackTrace();
contentBuffer = null;
System.out.println("error: " + url.toString());
} finally {
con.disconnect();
}
return contentBuffer.toString();
}
public static String getHtmlContent(String url, String encode) {
if (!url.toLowerCase().startsWith("http://")) {
url = "http://" + url;
}
try {
URL rUrl = new URL(url);
return getHtmlContent(rUrl, encode);
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
0 0
- java抓取网页源代码《转载》
- java简单抓取网页源代码
- 转载:如何用java实现抓取网页?
- C++抓取网页源代码
- c#信息抓取一:抓取网页源代码
- 使用Socket抓取网页源代码
- java网页数据抓取源代码(抓取电话和身份证信息为例)
- PHP抓取网页内容获得网页源代码
- PHP抓取网页内容获得网页源代码
- JAVA 抓取网页内容
- 【JAVA】 抓取网页内容
- java抓取网页
- JAVA 抓取网页内容
- java抓取网页数据
- Java网页抓取例子
- java 抓取网页内容
- JAVA抓取网页
- java网页抓取问题
- android设计的布局在阿拉伯语下界面错乱的解决方法
- Hibernate - DetachedCriteria
- Bridge模式详解--设计模式(7)
- JAVA WordVec 的一个实现
- 实现的radio图片选择按钮效果
- java抓取网页源代码《转载》
- 28. Implement strStr()
- 高精度乘法实例
- 100. Same Tree 树是否相同
- 递归
- 查看linux系统版本和系统位数
- Mapper.xml映射文件
- 分类器评价指标--ROC曲线及AUC值
- 读写sd卡的文件