文章标题

来源:互联网 发布:ipad换壁纸软件 编辑:程序博客网 时间:2024/06/14 20:55

爬虫
`
import java.io.*;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Spider {
private String base = “http://meinv666.com“; //基地址
private String sourceUrl; //源网页链接
private StringBuilder html; //网页文本
private List imgUrl = new ArrayList<>();

private static int count = 0;public Spider(String sourceUrl) throws IOException {    this.sourceUrl = sourceUrl;    getHtml();    getImgURL();}/*得到网页源码*/public void getHtml() throws IOException {    URL url = new URL(sourceUrl);    Scanner scanner = new Scanner(url.openStream());    html = new StringBuilder();    while (scanner.hasNext()){        html.append(scanner.nextLine() + "\n");    }}/*找出图片链接*/public List<String> getImgURL() throws IOException {    Pattern pattern = Pattern.compile("<img.*?src=\\\"([^\"]*?)\\\"\\s*alt=.*?>");    Matcher m = pattern.matcher(html);    while (m.find()){        imgUrl.add(base + m.group(1));    }    return imgUrl;}/*下一页*/public String getPage(){    Pattern pattern = Pattern.compile("<a\\s*href=\\\"([^\"]*?)\\\">&raquo;</a>");    Matcher m = pattern.matcher(html);    if (m.find())        sourceUrl = base + m.group(1);    return sourceUrl;}/*下载图片*/public void downLoad() throws IOException, InterruptedException {    Iterator<String> iter = imgUrl.iterator();    while (iter.hasNext()){        String temp = iter.next();        String name = temp.substring(temp.lastIndexOf("/") + 1,temp.length());        URL url = new URL(temp);        OutputStream ops = new FileOutputStream(new File("D:\\Downloads\\pic\\" + name));        InputStream ips = url.openStream();        int num = 0;        while ((num = ips.read()) != -1){            ops.write(num);        }        ips.close();        ops.close();        System.out.println("完成" + ++count + "张!");        Thread.sleep(2000);    }}

}
`

主方法
import java.io.IOException;

public class Demo {
public static void main(String[] args) throws IOException, InterruptedException {
String source = “http://meinv666.com/sex“;

    int count = 8;    while (count-- > 0 && source != null){        Spider spider = new Spider(source);        spider.downLoad();        source = spider.getPage();    }    System.out.println("完成惹。。");}

}

原创粉丝点击