bootstrap+jsoup+jsp智联页面抓取系统

来源:互联网 发布:linux mysql 日志 编辑:程序博客网 时间:2024/06/04 19:15

bootstrap+jsoup+jsp智联页面抓取系统

1.文件列表


2.项目步骤:

1.准备所需jar包和文件

所需jar包:jsoup-1.8.1.jar
复制bootstrap下的dist文件到webroot下

2.需要编写的文件源码:
Down.java:
package com.tzxy.download;import java.io.BufferedReader;import java.io.IOException;import java.io.InputStreamReader;import java.net.MalformedURLException;import java.net.URL;import java.net.URLConnection;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Set;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;public class Down {/** * @param args */    public static String getHtml(String url,String encoding){    BufferedReader br=null;    StringBuffer sb=new StringBuffer();    try {    //获取UrlURL urlObj=new URL(url);//打开链接URLConnection uc=urlObj.openConnection();//创建写入流br=new BufferedReader(new InputStreamReader(uc.getInputStream(),encoding));String temp=null;while((temp=br.readLine())!=null){sb.append(temp+"\n");}} catch (MalformedURLException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{if(br!=null){try {br.close();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}    return sb.toString();    }    public static List<HashMap<String,String>> getJobInfo(String url,String encoding){    //获取页面    String html=getHtml(url,encoding);    //解析html    Document docu=Jsoup.parse(html);    //根据Id获取页面元素,"newlist_list_content_table"为实际页面值    Element element=docu.getElementById("newlist_list_content_table");    //根据Id获取页面元素组    Elements elements=docu.getElementsByClass("newlist");    List<HashMap<String,String>> maps=new ArrayList<HashMap<String,String>>();    //遍历元素组并放入Map集合    for (Element el : elements) {HashMap<String,String> map=new HashMap<String, String>();String textTitle=el.getElementsByClass("gsmc").text();String jobName=el.getElementsByClass("zwmc").text();String address=el.getElementsByClass("gzdd").text();String money=el.getElementsByClass("zwyx").text();String date=el.getElementsByClass("gxsj").text();map.put("textTitle", textTitle);map.put("jobName", jobName);map.put("address", address);map.put("money", money);map.put("date", date);maps.add(map);}    return maps;    }    public static void main(String[] args) {// TODO Auto-generated method stub    }}

index.jsp:
<%@ page language="java" import="java.util.*,com.tzxy.download.*" pageEncoding="UTF-8"%><%@taglib prefix="c"  uri="http://java.sun.com/jstl/core_rt"%><%String path = request.getContextPath();String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";%><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"><html>  <head>    <base href="<%=basePath%>">        <title>My JSP 'index.jsp' starting page</title><meta http-equiv="pragma" content="no-cache"><meta http-equiv="cache-control" content="no-cache"><meta http-equiv="expires" content="0">    <meta http-equiv="keywords" content="keyword1,keyword2,keyword3"><meta http-equiv="description" content="This is my page"><!--<link rel="stylesheet" type="text/css" href="styles.css">-->  <link rel="stylesheet" href="dist/css/bootstrap.min.css" type="text/css"></link></head>  <style type="text/css">  .search{  width:100%;  height:200px;  border:1px red solid;  }  .h{  color: red;  text-align: center;  }  .con{  width:700px;  height:50px;  margin: auto;  }  .con .text{  height:35px;  width: 400px;  padding-left: 10px;  }  .con .sub{  width: 50px;  height:35px;  }  .lab{  color:red;  }   .con .text:FOCUS {   box-shadow:1px 1px 2px green;   animation:shadow 3s linear infinite;   }   @keyframes shadow{   from{ box-shadow:1px 1px 2px green,-1px -1px 2px green;}   50%{box-shadow:0px 0px 0px green,-0px -0px 0px green;}   to{box-shadow:1px 1px 2px green,-1px -1px 2px green;}   }  </style>  <body>  <%  String url=request.getParameter("url");  if(url!=""){  List<HashMap<String,String>> list=Down.getJobInfo(url,"utf-8");  session.setAttribute("jobList",list);  }  else{  session.setAttribute("msg","请输入网址");  }  %>  <div class="search">  <form action="index.jsp">    <P align="center" style="font-size: 30px">智联网站抓取系统</P>                   <div class="con">请输入网址:<input name="url" type="text"  class="text"/>   <input type="submit" class="sub"/>   <label class="lab">${msg}</label>   </div> </form>  </div>  <h2 class="h">搜索信息如下:</h2>  <table class="table table-striped table-bordered table-hover">  <c:forEach items="${jobList}" var="a">   <tr>  <td>${a.textTitle}</td>  <td>${a.jobName}</td>  <td>${a.money}</td>  <td>${a.address}</td>  <td>${a.date}</td>   </tr>  </c:forEach>  <c:remove var="msg" scope="session"/>  </table>  </body></html>





0 0