基于HttpClient 多线程爬虫实践

来源:互联网 发布:有什么代练接单软件 编辑:程序博客网 时间:2024/05/29 04:34

1、首先添加Maven 依赖

        <dependency>            <groupId>junit</groupId>            <artifactId>junit</artifactId>            <version>4.11</version>            <scope>test</scope>        </dependency>        <dependency>            <groupId>org.apache.httpcomponents</groupId>            <artifactId>fluent-hc</artifactId>            <version>4.3.3</version>        </dependency>

2、上 QQUtil工具类  : 使用junit 运行 mkdir() 方法 创建保存的文件夹

package com.eliteams.index.study.httpclient;import java.io.File;import java.util.Random;import org.junit.Test;/** * QQ 工具类 *  * @author StarZou * @since 2014年5月26日 下午8:07:59 **/public class QQUtil {    /**     * QQ用户头像地址     */    public static String QQ_LOGO_URL = "http://qlogo4.store.qq.com/qzone/QQNumber/QQNumber/100";    /**     * QQ 信息URl     */    public static String QQ_INFO_URL = "http://r.cnc.qzone.qq.com/cgi-bin/user/cgi_personal_card?uin=954443045&fupdate=1&g_tk=2082931194&rd=1401115199";    /**     * 头像保存的路径     */    public static String FILE_PATH = "F:/qlogo/";    /**     * 文件的后缀     */    public static String FILE_SUFFIX = ".jpg";    /**     * 生成QQ号码的随机器     */    public static Random qqNumRandom = new Random();    /**     * QQ号码 起始基数     */    public static int QQ_NUM_CARDINAL = 100000;    /**     * QQ号码 max 基数     */    public static int QQ_MAX_NUM = 999900000;    /**     * 生成QQ号码     *      * @return qq     */    public static String generateQQNum() {        return String.valueOf((QQ_NUM_CARDINAL + QQUtil.qqNumRandom.nextInt(QQ_MAX_NUM)));    }    /**     * 生成头像URL     *      * @param qq     * @return url     */    public static String generateLogoUrl(String qq) {        return QQ_LOGO_URL.replaceAll("QQNumber", qq);    }    /**     * 生成保存在本地的路径     *      * @param qq     * @return path     */    public static String generatePath(String qq) {        return new StringBuffer(FILE_PATH).append(qq).append(FILE_SUFFIX).toString();    }    /**     * 创建文件夹     */    @Test    public void mkdir() {        File file = new File(FILE_PATH);        if (!file.exists()) {            file.mkdirs();        }        System.out.println("创建文件夹:" + FILE_PATH + "成功.");    }    /**     * 删除文件夹     */    @Test    public void rmdir() {        File file = new File(FILE_PATH);        if (file.exists()) {            File files[] = file.listFiles();            for (int i = 0; i < files.length; i++) {                files[i].delete();            }            file.delete();        }        System.out.println("删除文件夹:" + FILE_PATH + "成功.");    }}


3、重量级人物来也 ,FindBeautyTherad.java

package com.eliteams.index.study.httpclient;import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.util.ArrayList;import java.util.List;import org.apache.http.HttpEntity;import org.apache.http.HttpResponse;import org.apache.http.client.ClientProtocolException;import org.apache.http.client.methods.HttpGet;import org.apache.http.impl.client.CloseableHttpClient;import org.apache.http.impl.client.HttpClients;import org.apache.http.util.EntityUtils;/** * 寻找QQ美女的 线程 *  * @author StarZou * @since 2014年5月26日 下午8:06:32 **/public class FindBeautyTherad implements Runnable {    public static CloseableHttpClient httpClient = HttpClients.createDefault();    public void run() {        while (true) {            // 生成QQ号码,头像URL,本地保存路径            final String qq = QQUtil.generateQQNum();            final String qqLogoUrl = QQUtil.generateLogoUrl(qq);            final String path = QQUtil.generatePath(qq);            HttpGet httpGet = new HttpGet(qqLogoUrl);            // 模拟谷歌 爬虫            httpGet.setHeader("User-Agent", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)");            try {                // 执行请求                HttpResponse response = httpClient.execute(httpGet);                HttpEntity entity = response.getEntity();                // entity不为空,且不是默认的图片,则保存                if (entity != null && entity.getContentLength() != 2055) {                    File storeFile = new File(path);                    FileOutputStream output = new FileOutputStream(storeFile);                    InputStream instream = entity.getContent();                    byte b[] = new byte[8192];                    int j = 0;                    while ((j = instream.read(b)) != -1) {                        output.write(b, 0, j);                    }                    instream.close();                    output.flush();                    output.close();                    EntityUtils.consume(entity);                    System.out.println(qq + " 下载完成...");                }            } catch (ClientProtocolException e) {                e.printStackTrace();            } catch (IOException e) {                e.printStackTrace();            }        }    }    public static void main(String[] args) {        // 目标线程        FindBeautyTherad target = new FindBeautyTherad();        // 代理线程        List<Thread> proxy = new ArrayList<Thread>();        for (int i = 0; i < 10; i++) {            proxy.add(new Thread(target, "Thread-" + i));        }        // 启动线程        for (Thread thread : proxy) {            thread.start();        }    }}

运行main方法,这时你就看F:/qlogo/ 下那个美女图片哗哗的被 下载下来.


1 0
原创粉丝点击