Java爬虫(一)利用GET和POST发送请求,获取服务器返回信息

来源:互联网 发布:linux定时器函数 编辑:程序博客网 时间:2024/05/21 12:08

本人所使用软件

  • eclipse
  • fiddle
  • UC浏览器

分析请求信息

以知乎(https://www.zhihu.com)为例,模拟登陆请求,获取登陆后首页,首先就是分析请求信息。

用UC浏览器F12,点击Network,按F5刷新。使用自己账号登陆知乎后,点www.zhihu.com网址后,出现以下界面

这里写图片描述 
在General中,看到请求方式是GET,在fiddle里请求构造中,方法选定GET。 
下拉后,看到Request Header,将里面所有的内容复制下来,粘贴到fiddle的请求构造里 
这里写图片描述

这里写图片描述

点击Execute,在fiddle中点击访问的网址,点击嗅探,点击下方的TextView,发现并没有显示内容

这里写图片描述

删除部分无用的Request Header,点击Execute,发现返回数据成功!

这里写图片描述 
这里写图片描述

发送请求信息,获取数据

从以上的分析可以知道,访问知乎需要的Request Header,只需要有COOKIE就足够了,因此,我们将请求构造里帮我们格式化的内容,复制到txt文件内

文件名为(requestheader.txt)

接下来就是使用Java发送请求信息了。发送请求信息很简单,就直接贴出代码共同讨论。

package Main;

import java.io.IOException; 
import java.io.InputStream; 
import java.net.HttpURLConnection; 
import java.util.HashMap; 
import java.util.Map;

import Utils.HttpUtils; 
import Utils.HttpUtils.OnVisitingListener; 
import Utils.StreamUtils; 
import Utils.StreamUtils.OnGetStringListener;

public class Main { 
public static void main(String[] args) { 
// 获取网页数据 
getWebData(); 
// 设置参数 
// 得到返回数据 
}

private static void getWebData() {    HttpUtils httpUtils = HttpUtils.newInstance();    httpUtils.setOnVisitingListener(new OnVisitingListener() {        @Override        public void onSuccess(HttpURLConnection conn) {            try {                InputStream inputStream = conn.getInputStream();                String string = StreamUtils.getString(inputStream);                System.out.println(string);            } catch (IOException e) {                e.printStackTrace();            }        }        @Override        public void onSetDetails(HttpURLConnection conn, HttpUtils httpUtils) {            Map<String, String> map = new HashMap<String, String>();            StreamUtils.getString("requestheader.txt", new OnGetStringListener() {                @Override                public void onGeted() {                }                @Override                public void onGetString(String line) {                    System.out.println(line);                    String[] strings = line.split(":");                    map.put(strings[0], strings[1]);                }            });            httpUtils.setRequestProperties(map);        }        @Override        public void onFail(IOException e) {        }    }).startConnenction("https://www.zhihu.com/", "GET");}}
  • Utils封装工具类 
    package Utils;

import java.io.IOException; 
import java.io.PrintWriter; 
import java.net.HttpURLConnection; 
import java.net.URL; 
import java.util.Iterator; 
import java.util.Map; 
import java.util.Set;

/** 
* Created by admin on 2016/3/2. 
*/ 
public class HttpUtils { 
private HttpURLConnection conn;

public void setConnection(String fileUrl, String method) throws IOException {    URL url = new URL(fileUrl);    conn = (HttpURLConnection) url.openConnection();    conn.setRequestMethod(method);    conn.setConnectTimeout(5000);    conn.setReadTimeout(5000);    listener.onSetDetails(conn, this);    conn.connect();}OnVisitingListener listener;public interface OnVisitingListener {    void onSuccess(HttpURLConnection conn);    void onSetDetails(HttpURLConnection conn, HttpUtils httpUtils);    void onFail(IOException e);}public HttpUtils setOnVisitingListener(OnVisitingListener listener) {    this.listener = listener;    return this;}public void startConnenction(String url, String method) {    try {        setConnection(url, method);        if (conn.getResponseCode() == 200) {            listener.onSuccess(conn);        } else {            throw new IOException();        }    } catch (IOException e) {        listener.onFail(e);    }    // if (conn != null) {    // conn.disconnect();    // }}public void setRequestProperties(Map<String, String> map) {    String key;    String value;    Set<String> set = map.keySet();    Iterator<String> it = set.iterator();    while (it.hasNext()) {        key = it.next();        value = map.get(key);        conn.setRequestProperty(key, value);    }}public void setRequestBody(String body) {    try {        PrintWriter writer = new PrintWriter(conn.getOutputStream());        writer.write(body);        writer.flush();        writer.close();    } catch (IOException e) {        e.printStackTrace();    }}public void setRequestProperty(String type, String value) {    conn.setRequestProperty(type, value);}public static HttpUtils newInstance() {    return new HttpUtils();}

}

package Utils;

import java.util.ArrayList; 
import java.util.List; 
import java.util.regex.Matcher; 
import java.util.regex.Pattern;

public class RegexUtils { 
public static String RegexGroup(String targetStr, String patternStr, int which) { 
Pattern pattern = Pattern.compile(patternStr); 
Matcher matcher = pattern.matcher(targetStr); 
if (matcher.find()) { 
return matcher.group(which); 

return “Nothing!”; 
}

public static List<String> RegexGroups(String targetStr, String patternStr, int which) {    Pattern pattern = Pattern.compile(patternStr);    Matcher matcher = pattern.matcher(targetStr);    List<String> list = new ArrayList<String>();    while (matcher.find()) {        list.add(matcher.group(which));    }    return list;}public static String RegexString(String targetStr, String patternStr) {    Pattern pattern = Pattern.compile(patternStr);    Matcher matcher = pattern.matcher(targetStr);    if (matcher.find()) {        return matcher.group();    }    return "Nothing!";}}

package Utils;

import java.io.BufferedReader; 
import java.io.BufferedWriter; 
import java.io.ByteArrayOutputStream; 
import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileNotFoundException; 
import java.io.FileOutputStream; 
import java.io.FileReader; 
import java.io.FileWriter; 
import java.io.IOException; 
import java.io.InputStream; 
import java.io.InputStreamReader; 
import java.io.OutputStream; 
import java.io.OutputStreamWriter; 
import java.io.Reader; 
import java.io.UnsupportedEncodingException; 
import java.io.Writer;

/** 
* Created by admin on 2016/2/18. 
*/ 
public class StreamUtils { 
public static String readFromStream(InputStream inputStream) throws IOException { 
ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); 
int len = 0; 
byte[] buffer = new byte[1024];

    while ((len = inputStream.read(buffer)) != -1) {        outputStream.write(buffer, 0, len);    }    String result = outputStream.toString();    inputStream.close();    outputStream.close();    return result;}private static String line;public static FileReader createFileReader(File file) throws FileNotFoundException {    return new FileReader(file);}public static FileWriter createFileWriter(File file) throws IOException {    return new FileWriter(file);}public static InputStreamReader createInputStreamReader(Object obj) {    if (obj instanceof File)        if (!((File) obj).exists())            ((File) obj).getParentFile().mkdirs();    try {        return new InputStreamReader(new FileInputStream((File) obj), "utf-8");    } catch (UnsupportedEncodingException e) {        e.printStackTrace();    } catch (FileNotFoundException e) {        e.printStackTrace();    }    return null;}public static OutputStreamWriter createOutputStreamWriter(Object obj) {    if (obj instanceof File)        if (!((File) obj).exists())            ((File) obj).getParentFile().mkdirs();    try {        return new OutputStreamWriter(new FileOutputStream((File) obj, true), "utf-8");    } catch (UnsupportedEncodingException e) {        e.printStackTrace();    } catch (FileNotFoundException e) {        e.printStackTrace();    }    return null;}public static BufferedReader createBufferedReader(Object obj, String cd) throws IOException {    if (obj instanceof String)        return new BufferedReader(createInputStreamReader(new File((String) obj)));    if (obj instanceof InputStream) {        if (cd == null)            return new BufferedReader(new InputStreamReader((InputStream) obj));        else            return new BufferedReader(new InputStreamReader((InputStream) obj, cd));    }    if (obj instanceof File) {        if (!((File) obj).exists())            ((File) obj).createNewFile();        return new BufferedReader(createFileReader((File) obj));    }    if (obj instanceof Reader)        return new BufferedReader((Reader) obj);    if (obj instanceof BufferedReader)        return (BufferedReader) obj;    return null;}public static BufferedWriter createBufferedWriter(Object obj) throws IOException {    if (obj instanceof String)        return new BufferedWriter(createOutputStreamWriter(new File((String) obj)));    if (obj instanceof OutputStream)        return new BufferedWriter(new OutputStreamWriter((OutputStream) obj, "utf-8"));    if (obj instanceof File)        return new BufferedWriter(createOutputStreamWriter(obj));    if (obj instanceof Writer)        return new BufferedWriter((Writer) obj);    if (obj instanceof BufferedWriter)        return (BufferedWriter) obj;    return null;}public interface OnGetStringListener {    void onGetString(String line);    void onGeted();}public static void getString(Object obj, OnGetStringListener listener) {    BufferedReader br;    try {        br = createBufferedReader(obj, null);        if (br != null) {            while ((line = br.readLine()) != null) {                listener.onGetString(line);            }            listener.onGeted();            br.close();        }    } catch (IOException e) {        e.printStackTrace();    }}public static String getString(Object obj) {    BufferedReader br;    String str = "";    try {        br = createBufferedReader(obj, "utf-8");        if (br != null) {            while ((line = br.readLine()) != null) {                str += line + "\n";            }        }    } catch (IOException e) {        e.printStackTrace();    }    return str;}public static void writeString(Object obj, String str) {    BufferedWriter bw;    try {        bw = createBufferedWriter(obj);        if (bw != null) {            bw.write(str);            bw.close();        }    } catch (IOException e) {        e.printStackTrace();    }}

发送POST请求

发送POST请求和GET没有太大区别,只不过POST请求需要设置Request Body。在连接之前,得到输出流,写入fiddle里面的Request Body数据就可以。

0 0