Jsoup解析HTML

来源:互联网 发布:rt809f编程器怎么安装 编辑:程序博客网 时间:2024/05/23 22:17

1 在解析HTML之前还需导入jsoup-1.10.2.jar

2 解析HTML,代码如下:

package com.od.cn;import java.io.BufferedWriter;import java.io.FileWriter;import java.io.IOException;import java.text.SimpleDateFormat;import java.util.ArrayList;import java.util.Date;import java.util.List;import org.apache.log4j.Logger;import org.apache.log4j.PropertyConfigurator;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;public class JsoupParserHtml {private static final Logger LOGGER=Logger.getLogger(JsoupParserHtml.class);//从网上把天气爬下来private List<Weather> parserHtmlByHttp(String url){List<Weather> weathers=new ArrayList<Weather>();try {Document document=Jsoup.connect(url).get();Elements classes=document.getElementsByClass("part_se");for(Element ele:classes){String data_role=ele.attr("data-role");if("collapsible".equals(data_role)){Elements h1=ele.select("h1");Elements td=ele.select("td");Weather weather=new Weather();weather.setArea(h1.text());weather.setAirTemperature(td.get(1).text());weather.setRainFall(td.get(3).text());weather.setRelativeWet(td.get(5).text());weather.setWindPower(td.get(7).text());weather.setWindDirection(td.get(9).text());weather.setDate(td.get(11).text());weathers.add(weather);}}} catch (IOException e) {LOGGER.error("解析网页异常:"+e.getMessage());}LOGGER.info("成功获取网页数据");return weathers;}//以json的格式保存到文本中private void saveFile(List<Weather> weathers){if(weathers!=null){SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd");StringBuffer buffer=new StringBuffer();buffer.append("{date:\""+sdf.format(new Date())+"\",data[");for(int i=0;i<weathers.size();i++){Weather weather=weathers.get(i);if(i==weathers.size()-1){buffer.append("{area:\""+weather.getArea()+"\",airTemperature:\""+weather.getAirTemperature()+"\",rainFall:\""+weather.getRainFall()+"\",relativeWet:\""+weather.getRelativeWet()+"\",windPower:\""+weather.getWindPower()+"\",windDirection:\""+weather.getWindDirection()+"\",dateTime:\""+weather.getDate()+"\"}");}else{buffer.append("{area:\""+weather.getArea()+"\",airTemperature:\""+weather.getAirTemperature()+"\",rainFall:\""+weather.getRainFall()+"\",relativeWet:\""+weather.getRelativeWet()+"\",windPower:\""+weather.getWindPower()+"\",windDirection:\""+weather.getWindDirection()+"\",dateTime:\""+weather.getDate()+"\"},");}}buffer.append("]}");BufferedWriter bw=null;try { bw=new BufferedWriter(new FileWriter("d:\\weather.txt"));bw.write(buffer.toString());bw.flush();LOGGER.info("已保存文件");} catch (IOException e) {LOGGER.error("保存文件异常:"+e.getMessage());}finally{if(bw!=null){try {bw.close();} catch (IOException e) {LOGGER.error("关闭流异常:"+e.getMessage());}}}}}public static void main(String[] args) {PropertyConfigurator.configure("WebRoot/conf/log4j.properties");LOGGER.info("启动程序");JsoupParserHtml jph=new JsoupParserHtml();List<Weather> weathers=jph.parserHtmlByHttp("http://www.zhpmsc.org.cn/WeChat/monitorController/zoneSk?winzoom=1#");jph.saveFile(weathers);LOGGER.info("程序结束");}}


0 0