利用HttpURLConnection抓取网页取名
来源:互联网 发布:java 提高开发效率 编辑:程序博客网 时间:2024/04/29 11:32
闲来无事,利用Java 的HttpURLConnection,使用多线程来抓取网页,计算名字的分数。
仅供娱乐。
程序如下
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.log4j.Logger;
public class XingMing {
static final Logger log = Logger.getLogger(XingMing.class);
public static String read(String urlStr) {
try {
URL url = new URL(urlStr);
HttpURLConnection connection = (HttpURLConnection) url
.openConnection();
connection.connect();
InputStream in = connection.getInputStream();
BufferedReader read = new BufferedReader(new InputStreamReader(in));
StringBuffer buf = new StringBuffer();
String line = null;
while ((line = read.readLine()) != null) {
buf.append(line);
}
return buf.toString();
} catch (MalformedURLException e) {
return null;
} catch (IOException e) {
return null;
}
}
public static String find(String str, String beginStr, String endStr) {
final int length = beginStr.length();
int index = str.indexOf(beginStr);
String result = null;
if (index != -1) {
int index2 = str.indexOf(endStr, index + length);
if (index2 != -1) {
result = str.substring(index + beginStr.length(), index2);
}
}
return result;
}
public static String findName(String source, String name) {
// value=我的姓名『XX』的分析:
return find(source, "value=我的姓名『", "』的分析");
}
public static String findScore(String source, String name) {
// <font size=3>姓名评分:</font><font color=0000ff size=5FONT-SIZE: 10pt;">
// BT,楷体">99.5</font>
return find(
source,
"<font size=3>姓名评分:</font><font color=0000ff size=5 BT,楷体/">",
"</font>");
}
public static void main(String[] args) throws IOException {
final char firstChar = '一';
final char lastChar = '龥';
// 最大开启100个线程,可以加快查询速度.
int maxThread = 100;
int step = (lastChar - firstChar) / maxThread;
for (int i = 0; i < maxThread; i++) {
char start = (char) (firstChar + i * step);
char end = (char) (firstChar + i * step + step - 1);
System.out.println("开启" + (i + 1) + "处理:" + start + "-" + end
+ (char) (end + 1));
new CallThread(start, end).start();
}
}
static class CallThread extends Thread {
private charstart;
private charend;
private String info;
CallThread(char start, char end) {
this.start = start;
this.end = end;
this.info = this.start + "-" + this.end;
}
public void run() {
// 姓
final char youname1 = '赵';
final String url = "http://www.xingming.net/cmjg-mz.asp?sex=男&youname1="
+ youname1 + "&youname2=";
String youname2;
String webinfo = null;
for (char i = start; i <= end; i++, webinfo = null) {
// 名字规则自己取吧.
// youname2 = "良" + i;
// youname2 = "" + i + i;
youname2 = i + "敏";
for (int j = 0; j < 5 && webinfo == null; j++) {
webinfo = XingMing.read(url + youname2);
}
if (webinfo == null) {
log.warn("获取名字[" + youname1 + youname2 + "]失败");
continue;
}
String webName = XingMing.findName(webinfo, "[" + youname2
+ "]");
String webScore = XingMing.findScore(webinfo, "[" + youname2
+ "]");
try {
if (Float.parseFloat(webScore) >= 90) {
System.out.println(youname2 + ":" + webName + ":"
+ webScore);
}
} catch (Exception e) {
}
log.info(this.info + ":" + webName + ":" + webScore);
if ((i - start) % 100 == 0) {
System.out.println(this.info + "处理了" + (i - start) + "个");
}
}
System.out.println(this.info + "结束了.....");
}
}
}
最新程序:
最终版宝宝取名程序,java版,我家宝宝名字已经确定。
http://blog.csdn.net/z3h/archive/2008/01/16/2047420.aspx
- 利用HttpURLConnection抓取网页取名
- 用HttpUrlConnection抓取网页内容
- 用HttpUrlConnection抓取网页内容
- 用HttpUrlConnection抓取网页内容
- 利用httpclient抓取网页内容
- 利用htmlparser抓取网页内容
- 利用httpclient抓取网页内容
- 利用Java抓取网页数据
- 利用WebDriver抓取网页内容
- 通过代理抓取网页code方法 proxy httpurlconnection
- 利用htmlparser抓取网页内容(一)
- asp 利用 xmlhttp 抓取网页内容
- ASP利用XMLHTTP抓取网页内容
- 利用Python抓取和解析网页(下)
- 利用Python抓取和解析网页(1)
- 利用Python抓取和解析网页(2)
- 利用Python抓取和解析网页(3)
- 利用Python抓取和解析网页(4)
- 交换机和路由器各自实现的原理
- 用DLL方式封装MDI子窗体是一种常用的软件开发技术
- 一个简单的RMI入门例程(转贴)
- 超酷右下角浮出窗口《修订版》--关闭时缓慢下降并消失
- 一个“Spring轮子”引发的“血案”(6)
- 利用HttpURLConnection抓取网页取名
- ST的Smart Card IC[资料整理]
- Manually and automatically mount windows file system on Linux
- VS2005为Windows移动程序创建安装文件(转载)
- 郁闷啊!
- Windows Mobile 开发常见问题集(一)(转载)
- [转]dw的默认文档类型(识别ftl为html文档)
- sip 资源
- 正则表达式