Jsoup实现HelloWorld

来源:互联网 发布:淘宝二手可以退货吗 编辑:程序博客网 时间:2024/06/06 17:32


通过Jsoup 提取 百度 的标题




创建一个maven 项目

在pom.xml贴上依赖

  <dependency>      <groupId>org.apache.httpcomponents</groupId>      <artifactId>httpclient</artifactId>      <version>4.5.3</version>    </dependency>    <dependency>      <groupId>org.jsoup</groupId>      <artifactId>jsoup</artifactId>      <version>1.9.2</version>    </dependency>


案例


package com.gcx.test;import org.apache.http.HttpEntity;import org.apache.http.client.methods.CloseableHttpResponse;import org.apache.http.client.methods.HttpGet;import org.apache.http.impl.client.CloseableHttpClient;import org.apache.http.impl.client.HttpClients;import org.apache.http.util.EntityUtils;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;/** * Hello world! * */public class App {    public static void main(String[] args) throws Exception{        CloseableHttpClient httpclient = HttpClients.createDefault(); // 创建httpclient实例        HttpGet httpget = new HttpGet("https://www.baidu.com"); // 创建httpget实例        CloseableHttpResponse response = httpclient.execute(httpget); // 执行get请求        HttpEntity entity=response.getEntity(); // 获取返回实体        String webContent= EntityUtils.toString(entity, "utf-8");        // System.out.println("网页内容:"+webContent); // 指定编码打印网页内容        response.close(); // 关闭流和释放系统资源        Document doc= Jsoup.parse(webContent);        Elements elements=doc.getElementsByTag("title");        Element element=elements.get(0);        String title=element.text();        System.out.println("网页标题是:"+title);    }}


运行输出: