网站sickipedia 笑话抓取程序

来源:互联网 发布:茉莉机器人3.3源码 编辑:程序博客网 时间:2024/06/06 15:50

应宿舍一哥们邀请,写了一个简单的网站笑话抓取程序,不用看一个笑话就重刷整个网页了。大笑

发现有时用浏览器打不开网站,网速卡的时候,但是用程序还是可以拿得到,貌似java很牛叉!

简单写了一下,没有考虑太多,别捡砖头啊!

 

package com.alec;import java.io.BufferedReader;import java.io.InputStreamReader;import java.net.MalformedURLException;import java.net.URL;import java.util.HashMap;import java.util.Map;public class Test {private static int sum=200;private static Map<String,String> jokes=new HashMap<String,String>();private static String jokeID;/** * @param args * @throws Exception  */public static void main(String[] args) throws Exception {// TODO Auto-generated method stub Test t=new Test();while(sum-->0){t.getJokes("http://www.sickipedia.org/getjokes/random#");}t.printJokes();}public void getJokes(String urlString){URL url;try {url = new URL(urlString);BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream(), "utf-8"));             String line = reader.readLine();        while (line != null) {        line=line.trim();        if(line.startsWith("<div style='display:none'>")){        jokeID=line.substring(line.indexOf(">")+1,line.lastIndexOf("<"));        }else if(line.startsWith("<td style='color: #000000'>")){        line = reader.readLine().trim();        String temp=reader.readLine().trim();        while(!temp.startsWith("</td>")){        line = line + temp;        temp=reader.readLine().trim();        }        jokes.put(jokeID, line.replace("<br />", " "));        }        line = reader.readLine();        }} catch (MalformedURLException e) {System.out.println("URL format not right.");e.printStackTrace();} catch (Exception e) {// TODO Auto-generated catch blocke.printStackTrace();}}public void printJokes(){for(String joke:jokes.keySet()){System.out.println("joke "+joke +" :" + jokes.get(joke));}}}