获取网页源代码并解决字符乱码的问题

来源:互联网 发布:东莞plc编程培训机构 编辑:程序博客网 时间:2024/05/19 20:37

每当闲的时候就会刷新一下访问次数,还得刷新页面,作为一个程序员当然不会这么low

写一个程序获得访问次数

package com.lean.zzh;import java.io.ByteArrayOutputStream;import java.io.IOException;import java.io.InputStream;import java.net.HttpURLConnection;import java.net.MalformedURLException;import java.net.URL;public class VisitorVolume {/** * @param args * @throws IOException */public static void main(String[] args) throws IOException {// TODO Auto-generated method stubURL url = new URL("http://blog.csdn.net/yueloveme");System.out.println(getNum(get(url)));}/** * 获取网页上的源码 *  * @throws IOException */public static String get(URL url) throws IOException {String times = null;HttpURLConnection conn = (HttpURLConnection) url.openConnection();ByteArrayOutputStream outStream = new ByteArrayOutputStream();conn.setRequestMethod("GET");conn.setConnectTimeout(5 * 1000);InputStream read = conn.getInputStream();byte[] infor = new byte[1024];int len = 0;while ((len = read.read(infor)) != -1) {// 网页的编码格式是utf-8的,所以每一次转换都得指定格式才不会乱码times += new String(infor, 0, len, "utf-8");}read.close();return times;}/** * 获取访问量 */public static String getNum(String information) {String num = null;int start = information.indexOf("blog_rank");start = information.indexOf("<span>", start + 1);int end = information.indexOf("</span>", start);num = information.substring(start + 6, end);return num;}}