使用Socket抓取网页源代码

来源:互联网 发布:scilab矩阵运算 编辑:程序博客网 时间:2024/05/16 09:57
import java.io.BufferedReader;import java.io.IOException;import java.io.InputStreamReader;import java.io.PrintWriter;import java.net.InetAddress;import java.net.Socket;public class HttpDemo {/** * 使用Socket抓取网页源码 */public static void main(String[] args) throws IOException {//想要抓取的网页主机名//域名www.baidu.com,旗下有好多主机,例如tieba.baidu.com,map.baidu.comString host = "map.baidu.com";//根据主机名获取ip地址InetAddress ip = InetAddress.getByName(host);//建立连接Socket s = new Socket(ip,80);//向服务器端写入http协议请求PrintWriter pw = new PrintWriter(s.getOutputStream());// "/"请求根页面pw.println("GET / HTTP/1.1");pw.println("Host: "+host);pw.println("Content-Type: text/html");pw.println();pw.flush();//将获取到的页面输出在控制台BufferedReader br = new BufferedReader(new InputStreamReader(s.getInputStream()));String str = "";while((str=br.readLine())!=null){System.out.println(str);}br.close();pw.close();s.close();}}
                                             
0 0
原创粉丝点击