抓取车辆信息

来源:互联网 发布:钱钟书围城赏析 知乎 编辑:程序博客网 时间:2024/04/29 13:56

抓取汽车之家的车辆信息和车辆图片


CarBrands.java

/* * @author : TF-BJ-C064 * @creation : 2014-8-19 上午9:57:38 * @description :  * */package com.car;import java.util.ArrayList;import java.util.List;import java.util.regex.Matcher;import java.util.regex.Pattern;public class CarBrands {private String name;private String url;private String bid;private List<CarSerie> series = new ArrayList<CarSerie>();public void add(CarSerie cb ){series.add(cb);}public CarBrands(){}public CarBrands(String name, String href) {this.name = name;this.setUrl(href);}public String getName() {return name;}public void setName(String name) {this.name = name;}public String getUrl() {return url;}//e.g: http://car.autohome.com.cn/price/brand-22.html ==> bid = b22 public void setUrl(String url) {this.url = url;if( url != null ){String regex = "-(\\d+).html";Pattern pattern = Pattern.compile(regex);Matcher matcher = pattern.matcher(url); if (matcher.find()) {String group = matcher.group(1);this.setBid(group);}else {//System.out.println("no matches!!");} }}public String getBid() {return bid;}public void setBid(String bid) {this.bid = bid;}public List<CarSerie> getSeries() {return series;}public void setSeries(List<CarSerie> series) {this.series = series;}}


CarModels.java

/** @author : TF-BJ-C064* @creation : 2014-8-19 下午2:26:13* @description : **/package com.car;import java.util.ArrayList;import java.util.List;import java.util.regex.Matcher;import java.util.regex.Pattern;//车辆类型public class CarModels {private String name;private String url;private String price;//指导价格private String level;//级别private String structure;//车身结构private String Engine;//发动机private String Transmission;//变速箱private String size;//车身尺寸private String imageurl;private List<CarSerieImage> images = new ArrayList<CarSerieImage>();public boolean add(CarSerieImage imageurl){return images.add(imageurl);}public List<CarSerieImage> getImages() {return images;}public void setImages(List<CarSerieImage> images) {this.images = images;}public String getName() {return name;}public void setName(String name) {this.name = name;}//e.g parurl =  http://www.autohome.com.cn/buycar.html?specid=19460&#pvareaid=101622public void parseAsetUrl(String parurl){if(parurl==null)return ;String regex = "specid=(\\d+)&";Pattern pattern = Pattern.compile(regex);Matcher matcher = pattern.matcher(parurl); int pagesum = 0;if (matcher.find()) {String group = matcher.group(1);this.setUrl(group);}}public String getUrl() {return url;}public void setUrl(String url) {this.url = url;}public String getPrice() {return price;}public void setPrice(String price) {this.price = price;}public String getLevel() {return level;}public void setLevel(String level) {this.level = level;}public String getStructure() {return structure;}public void setStructure(String structure) {this.structure = structure;}public String getEngine() {return Engine;}public void setEngine(String engine) {Engine = engine;}public String getTransmission() {return Transmission;}public void setTransmission(String transmission) {Transmission = transmission;}public String getImageurl() {return imageurl;}public void setImageurl(String imageurl) {this.imageurl = imageurl;}public String getSize() {return size;}public void setSize(String size) {this.size = size;}}


CarSerie.java

/* * @author : TF-BJ-C064 * @creation : 2014-8-19 上午11:48:14 * @description :  * */package com.car;import java.util.ArrayList;import java.util.List;public class CarSerie {private String name;private String url;private String price;//指导价格private String level;//级别private String structure;//车身结构private String Engine;//发动机private String Transmission;//变速箱private String extInfo="";private String extInfoHtml;private List<CarYear> carYearList = new ArrayList<CarYear>();public boolean add(CarYear cy){return this.carYearList.add(cy);}public CarSerie(){}public CarSerie(String name, String href){this.name = name;// http://car.autohome.com.cn/pic/series/66.html;int index = href.lastIndexOf(".html");this.url = href ; //href.substring(0, index) + "-1.html";}public String getName() {return name;}public void setName(String name) {this.name = name;}public String getUrl() {return url;}public void setUrl(String url) {this.url = url;}public List<CarYear> getCarYearList() {return carYearList;}public void setCarYearList(List<CarYear> carYearList) {this.carYearList = carYearList;}public String getPrice() {return price;}public void setPrice(String price) {this.price = price;}public String getLevel() {return level;}public void setLevel(String level) {this.level = level;}public String getStructure() {return structure;}public void setStructure(String structure) {this.structure = structure;}public String getEngine() {return Engine;}public void setEngine(String engine) {Engine = engine;}public String getTransmission() {return Transmission;}public void setTransmission(String transmission) {Transmission = transmission;}public String getExtInfo() {return extInfo;}public void addExtInfo(String extInfoIn) {if(extInfoIn==null)return ;if(this.extInfo!=null && !extInfo.trim().isEmpty())this.extInfo += ", ";this.extInfo += extInfoIn;}public void setExtInfo(String extInfoIn) {if(extInfoIn!=null)this.extInfo = extInfo;}public String getExtInfoHtml() {return extInfoHtml;}public void setExtInfoHtml(String extInfoHtml) {this.extInfoHtml = extInfoHtml;}}


CarSerieImage.java

/** @author : TF-BJ-C064* @creation : 2014-8-19 上午11:52:54* @description : **/package com.car;public class CarSerieImage {private String title ;private String src ;public CarSerieImage(){}public CarSerieImage(String title, String src){this.title = title;this.src = src;}public String getTitle() {if(title==null || title.trim().isEmpty())return ""+System.currentTimeMillis();return title;}public void setTitle(String title) {this.title = title;}public String getSrc() {return src;}public void setSrc(String src) {this.src = src;}}


CarTree.java

/** @author : TF-BJ-C064* @creation : 2014-8-19 上午9:59:06* @description : **/package com.car;import java.util.ArrayList;import java.util.List;public class CarTree {private List<CarBrands> tree = new ArrayList<CarBrands>();public boolean add(CarBrands carbs){return tree.add(carbs);}public List<CarBrands> getTree() {return tree;}public void setTree(List<CarBrands> tree) {this.tree = tree;}}


CarYear.java

/** @author : TF-BJ-C064* @creation : 2014-8-19 下午2:48:56* @description : **/package com.car;import java.util.ArrayList;import java.util.List;// 车辆款式,如2013款public class CarYear {private String name;private List<CarModels> carModels = new ArrayList<CarModels>();public boolean add(CarModels cm){return this.carModels.add(cm);}public CarYear(){};public CarYear(String name){this.name = name;}public String getName() {return name;}public void setName(String name) {this.name = name;}public List<CarModels> getCarModels() {return carModels;}public void setCarModels(List<CarModels> carModels) {this.carModels = carModels;}}


QCZJmain.java

/* * @author : TF-BJ-C064 * @creation : 2014-8-19 上午9:31:38 * @description :  * */package com.car;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.text.SimpleDateFormat;import java.util.Date;import org.apache.commons.httpclient.params.HttpMethodParams;import org.apache.http.HttpResponse;import org.apache.http.client.ClientProtocolException;import org.apache.http.client.HttpClient;import org.apache.http.client.methods.HttpGet;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.http.params.CoreConnectionPNames;import org.apache.http.util.EntityUtils;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import org.xvolks.jnative.exceptions.NativeException;import sun.audio.AudioPlayer;import sun.audio.AudioStream;/** * 抓取汽车之家汽车数据 * <p>Title: QCZJmain</p> * <p>Description:  * args: * args[0]: 车辆信息存储根路径 * args[1]: 是否下载图片,默认false * args[2]:是否抓取车辆款式,default=true *  e.g: *   run.bat  echo off color 0ajava -jar CarInfoCrawl.jar D:/craw/car20140821 truepause * </p> * <p>Company: </p>  * @author  * @date2014-8-21 */public class QCZJmain {public static boolean debug = false;public static int MAX_DEBUG_LINE = 5;private String DIR_ROOT = "data/cars";public static String BASE_URL = "http://car.autohome.com.cn";public static String CAR_SPEC_URL = "http://www.autohome.com.cn/spec/"; // http://www.autohome.com.cn/spec/19390public static String Item_URL = BASE_URL+"/AsLeftMenu/As_LeftListNew.ashx?"; // AsLeftMenu/As_LeftListNew.ashx?typeId=1&brandId=34&fctId=0&seriesId=0public String carhtml = "<div class='cartree-letter'>A</div><ul><li  id='b34'><h3>" +"<a href='/price/brand-34.html'><i class='icon10 icon10-sjr'></i>阿尔法罗密欧<em>(3)</em></a></h3></li><li id='b35'><h3><a href='/price/brand-35.html'><i class='icon10 icon10-sjr'></i>阿斯顿·马丁<em>(38)</em></a></h3></li><li id='b33'><h3><a href='/price/brand-33.html'><i class='icon10 icon10-sjr'></i>奥迪<em>(523)</em></a></h3></li></ul><div class='cartree-letter'>B</div><ul><li  id='b140'><h3><a href='/price/brand-140.html'><i class='icon10 icon10-sjr'></i>巴博斯<em>(9)</em></a></h3></li><li id='b120'><h3><a href='/price/brand-120.html'><i class='icon10 icon10-sjr'></i>宝骏<em>(82)</em></a></h3></li><li id='b15'><h3><a href='/price/brand-15.html'><i class='icon10 icon10-sjr'></i>宝马<em>(537)</em></a></h3></li><li id='b40'><h3><a href='/price/brand-40.html'><i class='icon10 icon10-sjr'></i>保时捷<em>(148)</em></a></h3></li><li id='b27'><h3><a href='/price/brand-27.html'><i class='icon10 icon10-sjr'></i>北京汽车<em>(20)</em></a></h3></li><li id='b203'><h3><a href='/price/brand-203.html'><i class='icon10 icon10-sjr'></i>北汽幻速<em>(8)</em></a></h3></li><li id='b143'><h3><a href='/price/brand-143.html'><i class='icon10 icon10-sjr'></i>北汽威旺<em>(50)</em></a></h3></li><li id='b208'><h3><a href='/price/brand-208.html'><i class='icon10 icon10-sjr'></i>北汽新能源<em>(3)</em></a></h3></li><li id='b154'><h3><a href='/price/brand-154.html'><i class='icon10 icon10-sjr'></i>北汽制造<em>(29)</em></a></h3></li><li id='b36'><h3><a href='/price/brand-36.html'><i class='icon10 icon10-sjr'></i>奔驰<em>(399)</em></a></h3></li><li id='b95'><h3><a href='/price/brand-95.html'><i class='icon10 icon10-sjr'></i>奔腾<em>(103)</em></a></h3></li><li id='b14'><h3><a href='/price/brand-14.html'><i class='icon10 icon10-sjr'></i>本田<em>(265)</em></a></h3></li><li id='b75'><h3><a href='/price/brand-75.html'><i class='icon10 icon10-sjr'></i>比亚迪<em>(251)</em></a></h3></li><li id='b13'><h3><a href='/price/brand-13.html'><i class='icon10 icon10-sjr'></i>标致<em>(292)</em></a></h3></li><li id='b38'><h3><a href='/price/brand-38.html'><i class='icon10 icon10-sjr'></i>别克<em>(266)</em></a></h3></li><li id='b39'><h3><a href='/price/brand-39.html'><i class='icon10 icon10-sjr'></i>宾利<em>(36)</em></a></h3></li><li id='b37'><h3><a href='/price/brand-37.html'><i class='icon10 icon10-sjr'></i>布加迪<em>(3)</em></a></h3></li></ul><div class='cartree-letter'>C</div><ul><li  id='b79'><h3><a href='/price/brand-79.html'><i class='icon10 icon10-sjr'></i>昌河<em>(49)</em></a></h3></li><li id='b76'><h3><a href='/price/brand-76.html'><i class='icon10 icon10-sjr'></i>长安<em>(196)</em></a></h3></li><li id='b163'><h3><a href='/price/brand-163.html'><i class='icon10 icon10-sjr'></i>长安商用<em>(101)</em></a></h3></li><li id='b77'><h3><a href='/price/brand-77.html'><i class='icon10 icon10-sjr'></i>长城<em>(273)</em></a></h3></li><li id='b196'><h3><a href='/price/brand-196.html'><i class='icon10 icon10-sjr'></i>成功汽车<em>(7)</em></a></h3></li></ul><div class='cartree-letter'>D</div><ul><li  id='b169'><h3><a href='/price/brand-169.html'><i class='icon10 icon10-sjr'></i>DS<em>(29)</em></a></h3></li><li id='b92'><h3><a href='/price/brand-92.html'><i class='icon10 icon10-sjr'></i>大发<em>(13)</em></a></h3></li><li id='b1'><h3><a href='/price/brand-1.html'><i class='icon10 icon10-sjr'></i>大众<em>(863)</em></a></h3></li><li id='b41'><h3><a href='/price/brand-41.html'><i class='icon10 icon10-sjr'></i>道奇<em>(21)</em></a></h3></li><li id='b32'><h3><a href='/price/brand-32.html'><i class='icon10 icon10-sjr'></i>东风<em>(33)</em></a></h3></li><li id='b187'><h3><a href='/price/brand-187.html'><i class='icon10 icon10-sjr'></i>东风风度<em>(114)</em></a></h3></li><li id='b113'><h3><a href='/price/brand-113.html'><i class='icon10 icon10-sjr'></i>东风风神<em>(86)</em></a></h3></li><li id='b165'><h3><a href='/price/brand-165.html'><i class='icon10 icon10-sjr'></i>东风风行<em>(253)</em></a></h3></li><li id='b142'><h3><a href='/price/brand-142.html'><i class='icon10 icon10-sjr'></i>东风小康<em>(71)</em></a></h3></li><li id='b81'><h3><a href='/price/brand-81.html'><i class='icon10 icon10-sjr'></i>东南<em>(133)</em></a></h3></li></ul><div class='cartree-letter'>F</div><ul><li  id='b42'><h3><a href='/price/brand-42.html'><i class='icon10 icon10-sjr'></i>法拉利<em>(18)</em></a></h3></li><li id='b11'><h3><a href='/price/brand-11.html'><i class='icon10 icon10-sjr'></i>菲亚特<em>(89)</em></a></h3></li><li id='b3'><h3><a href='/price/brand-3.html'><i class='icon10 icon10-sjr'></i>丰田<em>(547)</em></a></h3></li><li id='b141'><h3><a href='/price/brand-141.html'><i class='icon10 icon10-sjr'></i>福迪<em>(11)</em></a></h3></li><li id='b197'><h3><a href='/price/brand-197.html'><i class='icon10 icon10-sjr'></i>福汽启腾<em>(4)</em></a></h3></li><li id='b8'><h3><a href='/price/brand-8.html'><i class='icon10 icon10-sjr'></i>福特<em>(312)</em></a></h3></li><li id='b96'><h3><a href='/price/brand-96.html'><i class='icon10 icon10-sjr'></i>福田<em>(276)</em></a></h3></li></ul><div class='cartree-letter'>G</div><ul><li  id='b112'><h3><a href='/price/brand-112.html'><i class='icon10 icon10-sjr'></i>GMC<em>(35)</em></a></h3></li><li id='b152'><h3><a href='/price/brand-152.html'><i class='icon10 icon10-sjr'></i>观致<em>(10)</em></a></h3></li><li id='b116'><h3><a href='/price/brand-116.html'><i class='icon10 icon10-sjr'></i>光冈<em>(3)</em></a></h3></li><li id='b82'><h3><a href='/price/brand-82.html'><i class='icon10 icon10-sjr'></i>广汽传祺<em>(69)</em></a></h3></li><li id='b108'><h3><a href='/price/brand-108.html'><i class='icon10 icon10-sjr'></i>广汽吉奥<em>(136)</em></a></h3></li></ul><div class='cartree-letter'>H</div><ul><li  id='b24'><h3><a href='/price/brand-24.html'><i class='icon10 icon10-sjr'></i>哈飞<em>(75)</em></a></h3></li><li id='b181'><h3><a href='/price/brand-181.html'><i class='icon10 icon10-sjr'></i>哈弗<em>(210)</em></a></h3></li><li id='b150'><h3><a href='/price/brand-150.html'><i class='icon10 icon10-sjr'></i>海格<em>(35)</em></a></h3></li><li id='b86'><h3><a href='/price/brand-86.html'><i class='icon10 icon10-sjr'></i>海马<em>(245)</em></a></h3></li><li id='b43'><h3><a href='/price/brand-43.html'><i class='icon10 icon10-sjr'></i>悍马<em>(5)</em></a></h3></li><li id='b164'><h3><a href='/price/brand-164.html'><i class='icon10 icon10-sjr'></i>恒天<em>(15)</em></a></h3></li><li id='b91'><h3><a href='/price/brand-91.html'><i class='icon10 icon10-sjr'></i>红旗<em>(11)</em></a></h3></li><li id='b85'><h3><a href='/price/brand-85.html'><i class='icon10 icon10-sjr'></i>华普<em>(41)</em></a></h3></li><li id='b87'><h3><a href='/price/brand-87.html'><i class='icon10 icon10-sjr'></i>华泰<em>(108)</em></a></h3></li><li id='b97'><h3><a href='/price/brand-97.html'><i class='icon10 icon10-sjr'></i>黄海<em>(54)</em></a></h3></li></ul><div class='cartree-letter'>J</div><ul><li  id='b46'><h3><a href='/price/brand-46.html'><i class='icon10 icon10-sjr'></i>Jeep<em>(155)</em></a></h3></li><li id='b25'><h3><a href='/price/brand-25.html'><i class='icon10 icon10-sjr'></i>吉利汽车<em>(437)</em></a></h3></li><li id='b84'><h3><a href='/price/brand-84.html'><i class='icon10 icon10-sjr'></i>江淮<em>(411)</em></a></h3></li><li id='b119'><h3><a href='/price/brand-119.html'><i class='icon10 icon10-sjr'></i>江铃<em>(79)</em></a></h3></li><li id='b210'><h3><a href='/price/brand-210.html'><i class='icon10 icon10-sjr'></i>江铃集团轻汽<em>(12)</em></a></h3></li><li id='b44'><h3><a href='/price/brand-44.html'><i class='icon10 icon10-sjr'></i>捷豹<em>(86)</em></a></h3></li><li id='b83'><h3><a href='/price/brand-83.html'><i class='icon10 icon10-sjr'></i>金杯<em>(198)</em></a></h3></li><li id='b145'><h3><a href='/price/brand-145.html'><i class='icon10 icon10-sjr'></i>金龙<em>(56)</em></a></h3></li><li id='b175'><h3><a href='/price/brand-175.html'><i class='icon10 icon10-sjr'></i>金旅<em>(7)</em></a></h3></li><li id='b151'><h3><a href='/price/brand-151.html'><i class='icon10 icon10-sjr'></i>九龙<em>(16)</em></a></h3></li></ul><div class='cartree-letter'>K</div><ul><li  id='b109'><h3><a href='/price/brand-109.html'><i class='icon10 icon10-sjr'></i>KTM<em>(1)</em></a></h3></li><li id='b156'><h3><a href='/price/brand-156.html'><i class='icon10 icon10-sjr'></i>卡尔森<em>(4)</em></a></h3></li><li id='b199'><h3><a href='/price/brand-199.html'><i class='icon10 icon10-sjr'></i>卡威<em>(4)</em></a></h3></li><li id='b101'><h3><a href='/price/brand-101.html'><i class='icon10 icon10-sjr'></i>开瑞<em>(98)</em></a></h3></li><li id='b47'><h3><a href='/price/brand-47.html'><i class='icon10 icon10-sjr'></i>凯迪拉克<em>(114)</em></a></h3></li><li id='b100'><h3><a href='/price/brand-100.html'><i class='icon10 icon10-sjr'></i>科尼赛克<em>(4)</em></a></h3></li><li id='b9'><h3><a href='/price/brand-9.html'><i class='icon10 icon10-sjr'></i>克莱斯勒<em>(27)</em></a></h3></li></ul><div class='cartree-letter'>L</div><ul><li  id='b48'><h3><a href='/price/brand-48.html'><i class='icon10 icon10-sjr'></i>兰博基尼<em>(21)</em></a></h3></li><li id='b118'><h3><a href='/price/brand-118.html'><i class='icon10 icon10-sjr'></i>劳伦士<em>(6)</em></a></h3></li><li id='b54'><h3><a href='/price/brand-54.html'><i class='icon10 icon10-sjr'></i>劳斯莱斯<em>(17)</em></a></h3></li><li id='b215'><h3><a href='/price/brand-215.html'><i class='icon10 icon10-sjr'></i>雷丁<em>(5)</em></a></h3></li><li id='b52'><h3><a href='/price/brand-52.html'><i class='icon10 icon10-sjr'></i>雷克萨斯<em>(148)</em></a></h3></li><li id='b10'><h3><a href='/price/brand-10.html'><i class='icon10 icon10-sjr'></i>雷诺<em>(99)</em></a></h3></li><li id='b124'><h3><a href='/price/brand-124.html'><i class='icon10 icon10-sjr'></i>理念<em>(15)</em></a></h3></li><li id='b80'><h3><a href='/price/brand-80.html'><i class='icon10 icon10-sjr'></i>力帆<em>(154)</em></a></h3></li><li id='b89'><h3><a href='/price/brand-89.html'><i class='icon10 icon10-sjr'></i>莲花汽车<em>(60)</em></a></h3></li><li id='b78'><h3><a href='/price/brand-78.html'><i class='icon10 icon10-sjr'></i>猎豹汽车<em>(90)</em></a></h3></li><li id='b51'><h3><a href='/price/brand-51.html'><i class='icon10 icon10-sjr'></i>林肯<em>(15)</em></a></h3></li><li id='b53'><h3><a href='/price/brand-53.html'><i class='icon10 icon10-sjr'></i>铃木<em>(359)</em></a></h3></li><li id='b204'><h3><a href='/price/brand-204.html'><i class='icon10 icon10-sjr'></i>陆地方舟<em>(7)</em></a></h3></li><li id='b88'><h3><a href='/price/brand-88.html'><i class='icon10 icon10-sjr'></i>陆风<em>(131)</em></a></h3></li><li id='b49'><h3><a href='/price/brand-49.html'><i class='icon10 icon10-sjr'></i>路虎<em>(145)</em></a></h3></li><li id='b50'><h3><a href='/price/brand-50.html'><i class='icon10 icon10-sjr'></i>路特斯<em>(11)</em></a></h3></li></ul><div class='cartree-letter'>M</div><ul><li  id='b20'><h3><a href='/price/brand-20.html'><i class='icon10 icon10-sjr'></i>MG<em>(120)</em></a></h3></li><li id='b56'><h3><a href='/price/brand-56.html'><i class='icon10 icon10-sjr'></i>MINI<em>(75)</em></a></h3></li><li id='b58'><h3><a href='/price/brand-58.html'><i class='icon10 icon10-sjr'></i>马自达<em>(176)</em></a></h3></li><li id='b57'><h3><a href='/price/brand-57.html'><i class='icon10 icon10-sjr'></i>玛莎拉蒂<em>(27)</em></a></h3></li><li id='b55'><h3><a href='/price/brand-55.html'><i class='icon10 icon10-sjr'></i>迈巴赫<em>(4)</em></a></h3></li><li id='b129'><h3><a href='/price/brand-129.html'><i class='icon10 icon10-sjr'></i>迈凯伦<em>(8)</em></a></h3></li><li id='b168'><h3><a href='/price/brand-168.html'><i class='icon10 icon10-sjr'></i>摩根<em>(11)</em></a></h3></li></ul><div class='cartree-letter'>N</div><ul><li  id='b130'><h3><a href='/price/brand-130.html'><i class='icon10 icon10-sjr'></i>纳智捷<em>(44)</em></a></h3></li><li id='b213'><h3><a href='/price/brand-213.html'><i class='icon10 icon10-sjr'></i>南京金龙<em>(5)</em></a></h3></li></ul><div class='cartree-letter'>O</div><ul><li  id='b60'><h3><a href='/price/brand-60.html'><i class='icon10 icon10-sjr'></i>讴歌<em>(27)</em></a></h3></li><li id='b59'><h3><a href='/price/brand-59.html'><i class='icon10 icon10-sjr'></i>欧宝<em>(48)</em></a></h3></li><li id='b146'><h3><a href='/price/brand-146.html'><i class='icon10 icon10-sjr'></i>欧朗<em>(10)</em></a></h3></li></ul><div class='cartree-letter'>Q</div><ul><li  id='b26'><h3><a href='/price/brand-26.html'><i class='icon10 icon10-sjr'></i>奇瑞<em>(429)</em></a></h3></li><li id='b122'><h3><a href='/price/brand-122.html'><i class='icon10 icon10-sjr'></i>启辰<em>(32)</em></a></h3></li><li id='b62'><h3><a href='/price/brand-62.html'><i class='icon10 icon10-sjr'></i>起亚<em>(407)</em></a></h3></li></ul><div class='cartree-letter'>R</div><ul><li  id='b63'><h3><a href='/price/brand-63.html'><i class='icon10 icon10-sjr'></i>日产<em>(423)</em></a></h3></li><li id='b19'><h3><a href='/price/brand-19.html'><i class='icon10 icon10-sjr'></i>荣威<em>(126)</em></a></h3></li><li id='b174'><h3><a href='/price/brand-174.html'><i class='icon10 icon10-sjr'></i>如虎<em>(2)</em></a></h3></li><li id='b103'><h3><a href='/price/brand-103.html'><i class='icon10 icon10-sjr'></i>瑞麒<em>(50)</em></a></h3></li></ul><div class='cartree-letter'>S</div><ul><li  id='b45'><h3><a href='/price/brand-45.html'><i class='icon10 icon10-sjr'></i>smart<em>(57)</em></a></h3></li><li id='b64'><h3><a href='/price/brand-64.html'><i class='icon10 icon10-sjr'></i>萨博<em>(16)</em></a></h3></li><li id='b68'><h3><a href='/price/brand-68.html'><i class='icon10 icon10-sjr'></i>三菱<em>(249)</em></a></h3></li><li id='b149'><h3><a href='/price/brand-149.html'><i class='icon10 icon10-sjr'></i>陕汽通家<em>(26)</em></a></h3></li><li id='b155'><h3><a href='/price/brand-155.html'><i class='icon10 icon10-sjr'></i>上汽大通<em>(60)</em></a></h3></li><li id='b173'><h3><a href='/price/brand-173.html'><i class='icon10 icon10-sjr'></i>绅宝<em>(14)</em></a></h3></li><li id='b66'><h3><a href='/price/brand-66.html'><i class='icon10 icon10-sjr'></i>世爵<em>(1)</em></a></h3></li><li id='b90'><h3><a href='/price/brand-90.html'><i class='icon10 icon10-sjr'></i>双环<em>(82)</em></a></h3></li><li id='b69'><h3><a href='/price/brand-69.html'><i class='icon10 icon10-sjr'></i>双龙<em>(111)</em></a></h3></li><li id='b162'><h3><a href='/price/brand-162.html'><i class='icon10 icon10-sjr'></i>思铭<em>(2)</em></a></h3></li><li id='b65'><h3><a href='/price/brand-65.html'><i class='icon10 icon10-sjr'></i>斯巴鲁<em>(167)</em></a></h3></li><li id='b67'><h3><a href='/price/brand-67.html'><i class='icon10 icon10-sjr'></i>斯柯达<em>(198)</em></a></h3></li></ul><div class='cartree-letter'>T</div><ul><li  id='b202'><h3><a href='/price/brand-202.html'><i class='icon10 icon10-sjr'></i>泰卡特<em>(7)</em></a></h3></li><li id='b133'><h3><a href='/price/brand-133.html'><i class='icon10 icon10-sjr'></i>特斯拉<em>(3)</em></a></h3></li><li id='b161'><h3><a href='/price/brand-161.html'><i class='icon10 icon10-sjr'></i>腾势<em>(2)</em></a></h3></li></ul><div class='cartree-letter'>W</div><ul><li  id='b102'><h3><a href='/price/brand-102.html'><i class='icon10 icon10-sjr'></i>威麟<em>(27)</em></a></h3></li><li id='b99'><h3><a href='/price/brand-99.html'><i class='icon10 icon10-sjr'></i>威兹曼<em>(7)</em></a></h3></li><li id='b70'><h3><a href='/price/brand-70.html'><i class='icon10 icon10-sjr'></i>沃尔沃<em>(241)</em></a></h3></li><li id='b114'><h3><a href='/price/brand-114.html'><i class='icon10 icon10-sjr'></i>五菱汽车<em>(69)</em></a></h3></li><li id='b167'><h3><a href='/price/brand-167.html'><i class='icon10 icon10-sjr'></i>五十铃<em>(12)</em></a></h3></li></ul><div class='cartree-letter'>X</div><ul><li  id='b98'><h3><a href='/price/brand-98.html'><i class='icon10 icon10-sjr'></i>西雅特<em>(13)</em></a></h3></li><li id='b12'><h3><a href='/price/brand-12.html'><i class='icon10 icon10-sjr'></i>现代<em>(418)</em></a></h3></li><li id='b185'><h3><a href='/price/brand-185.html'><i class='icon10 icon10-sjr'></i>新凯<em>(3)</em></a></h3></li><li id='b71'><h3><a href='/price/brand-71.html'><i class='icon10 icon10-sjr'></i>雪佛兰<em>(263)</em></a></h3></li><li id='b72'><h3><a href='/price/brand-72.html'><i class='icon10 icon10-sjr'></i>雪铁龙<em>(289)</em></a></h3></li></ul><div class='cartree-letter'>Y</div><ul><li  id='b111'><h3><a href='/price/brand-111.html'><i class='icon10 icon10-sjr'></i>野马汽车<em>(20)</em></a></h3></li><li id='b110'><h3><a href='/price/brand-110.html'><i class='icon10 icon10-sjr'></i>一汽<em>(218)</em></a></h3></li><li id='b144'><h3><a href='/price/brand-144.html'><i class='icon10 icon10-sjr'></i>依维柯<em>(19)</em></a></h3></li><li id='b73'><h3><a href='/price/brand-73.html'><i class='icon10 icon10-sjr'></i>英菲尼迪<em>(109)</em></a></h3></li><li id='b192'><h3><a href='/price/brand-192.html'><i class='icon10 icon10-sjr'></i>英致<em>(6)</em></a></h3></li><li id='b93'><h3><a href='/price/brand-93.html'><i class='icon10 icon10-sjr'></i>永源<em>(70)</em></a></h3></li></ul><div class='cartree-letter'>Z</div><ul><li  id='b206'><h3><a href='/price/brand-206.html'><i class='icon10 icon10-sjr'></i>知豆<em>(1)</em></a></h3></li><li id='b22'><h3><a href='/price/brand-22.html'><i class='icon10 icon10-sjr'></i>中华<em>(190)</em></a></h3></li><li id='b74'><h3><a href='/price/brand-74.html'><i class='icon10 icon10-sjr'></i>中兴<em>(66)</em></a></h3></li><li id='b94'><h3><a href='/price/brand-94.html'><i class='icon10 icon10-sjr'></i>众泰<em>(117)</em></a></h3></li></ul>";private CarTree cartree = new CarTree(); private boolean bDownloadImage = false;//是否下载图片,默认falseprivate boolean bGetModelDetail = true;public static void main(String[] args) {QCZJmain q = new QCZJmain();if(args!=null){if(args.length >=1 ){q.setDIR_ROOT(args[0]);System.out.println("reset DIR_ROOT : "+q.getDIR_ROOT());}if(args.length >=2 ){if(args[1].equals("true"))q.setbDownloadImage(true);elseq.setbDownloadImage(false);}if(args.length >=3 ){if(args[1].equals("true"))q.setbGetModelDetail(true);elseq.setbGetModelDetail(false);}}q.init();File ftemp = new File(q.getDIR_ROOT());System.out.println("DIR_ROOT : " + ftemp.getAbsolutePath());boolean bsucces = false;while(!bsucces){q.readConfig();//q.play();for(int i=5; i>0; i--){try {System.out.println("wait "+i+" s");Thread.sleep(1000);} catch (InterruptedException e1) {e1.printStackTrace();}}try {bsucces = q.start();} catch (Exception e) {e.printStackTrace();}if(!bsucces){println("Retry.. in 5 Minutes ");try {Thread.sleep(5000);} catch (InterruptedException e) {e.printStackTrace();}}}println("=============== Finish ==============");}public void play(){try {InputStream is = this.getClass().getResourceAsStream("/[000279].wav");AudioStream as=new AudioStream(is);AudioPlayer.player.start(as);}catch (Exception e) {e.printStackTrace();}}//private  int carBrandsNumber = 0;private  int carSerieNumber = 0;private  int carYearNumber = 0;private  int carImagesNumber = 0;private  int intTemp = 0;private String configFilePath = this.DIR_ROOT + "/config.ini";private String exceptionLogFilePath = this.DIR_ROOT + "/error.log";public void init(){File froot = new File(DIR_ROOT);if(!froot.exists())froot.mkdirs();configFilePath = this.DIR_ROOT + "/config.ini";carserielistPath = this.DIR_ROOT + "/carserielist.txt";exceptionLogFilePath = this.DIR_ROOT + "/error.log";}public void readConfig(){//read  config.iniFile configfile = new File(this.configFilePath);if(configfile.exists()){try {FileInputStream fis = new FileInputStream(configfile);BufferedReader dr=new BufferedReader(new InputStreamReader(fis));while(true){try {String line =  dr.readLine();if(line==null)break ;if(line!=null && !line.trim().isEmpty()){String items[] = line.split(",");if(items!=null && items.length >=4){intTemp =  Integer.parseInt(items[0]);carBrandsNumber = intTemp;intTemp =  Integer.parseInt(items[1]);carSerieNumber = intTemp;intTemp =  Integer.parseInt(items[2]);carImagesNumber = intTemp;intTemp =  Integer.parseInt(items[3]);carYearNumber = intTemp;}}} catch (IOException e) {e.printStackTrace();}}} catch (FileNotFoundException e) {e.printStackTrace();}}//read carserielist.txtSystem.out.println("readConfig  "+configfile.getAbsolutePath());System.out.println("carBrandsNumber carSerieNumber   carImagesNumber  carYearNumber");System.out.println(carBrandsNumber +" \t    "+ carSerieNumber +" \t    "+ carImagesNumber +"    \t "+ carYearNumber);}public void saveConfig(int sum1, int sum2, int sum4, int sum3, FileOutputStream cofigout){try {String str = sum1 + "," + sum2 +"," +sum4+ ","+sum3 + ", "+sdf.format(new Date())+"\r\n";cofigout.write( str.getBytes() );} catch (IOException e2) {e2.printStackTrace();}}public static void print(String str){System.out.print(str);}public static void println(String str){System.out.println(str);}// get root car, 获取品牌列表CarBrandspublic void Step1(){println("=======  Step 1 ======");Document html = Jsoup.parse(carhtml);Elements emItem = html.select("ul li em");if(emItem!=null)emItem.remove();Elements items = html.select("ul li a");for(Element em : items){if(em!=null){String href = BASE_URL + em.attr("href");String name = em.text();cartree.add(new CarBrands(name, href));}}println("cartree.size=" + cartree.getTree().size());}private String carserielistPath = this.DIR_ROOT + "/carserielist.txt";//get series 获取各个品牌CarBrands的各个系列CarSeriepublic void Step2() throws ClientProtocolException, IOException{println("=======  Step 2 ======");HttpClient httpclient = new DefaultHttpClient();httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");HttpGet get = null;HttpResponse httpResponse = null;File carserielistFile = new File(carserielistPath);FileOutputStream out = new FileOutputStream(carserielistFile);int sum = 0;String str;for(CarBrands carbs : cartree.getTree()){sum ++ ;if(debug){if(sum > this.MAX_DEBUG_LINE)break;}str = "Brand="+carbs.getName() + " \r\n";out.write(str.getBytes());//get series url with price urlget = new HttpGet( Item_URL + "typeId=1&brandId="+carbs.getBid()+"&fctId=0&seriesId=0");httpResponse = httpclient.execute(get);String htmlstring = EntityUtils.toString(httpResponse.getEntity());Document html = Jsoup.parse(htmlstring);Element curli = html.getElementById("b"+carbs.getBid());if(curli!=null){Elements aem = curli.select("dl dd a em");if(aem!=null)aem.remove();Elements series = curli.select("dl dd a");//系列println(carbs.getName() + " ");for(Element serie : series){String href = this.BASE_URL + serie.attr("href");String name = serie.text();CarSerie serieCarbrands= new CarSerie(name, href);//ADD SERIEcarbs.add(serieCarbrands);str = "    serie="+serieCarbrands.getName()+" = " + serieCarbrands.getUrl() + "\r\n";print( str );out.write(str.getBytes());}}}out.close();get.releaseConnection();}SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");//获取各个品牌CarBrands的各个系列CarSerie的各个车型CarYear/配置Modelspublic boolean Step3() throws IOException {boolean bok = true;println("=======  Step 3 ======");httpclient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT,  20000);//连接时间20shttpclient.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT,  60000);httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");File configFile = new File(configFilePath);FileOutputStream cofigout = new FileOutputStream(configFile, true);File carlistfile = new File( this.DIR_ROOT+"/carlist.txt"); // "/carlist_"+sdf.format(new Date())+".txt"FileOutputStream out = new FileOutputStream(carlistfile, true);File exceptionLogFile = new File( exceptionLogFilePath ); // "/carlist_"+sdf.format(new Date())+".txt"FileOutputStream exceptionout = new FileOutputStream(exceptionLogFile, true);String tempstr="";String str = "";int sumcars = 0;int sumserie = 0;int sumcaryear = 0;int sum1 = 0;int sum2 = 0;int sum3 = 0;int sum4 = 0;boolean bcontinue = true;for(CarBrands carbs : cartree.getTree()){ //CarBrandsif(debug && sum1 > this.MAX_DEBUG_LINE){break;}if( carBrandsNumber>0 && sum1 < this.carBrandsNumber){println("carBrandsNumber: "+ sum1 +" < "+carBrandsNumber );sum1 ++;continue;}else{carBrandsNumber = 0;}this.saveConfig(sum1, sum2, sum4, sum3, cofigout);println(sum1+" :   "+ carbs.getName());//品牌名称str = "brand=" + carbs.getName() + "\r\n" ;try {out.write( str.getBytes() );} catch (IOException e1) {tempstr = sdf.format(new Date())+"\r\n"+e1.getMessage();exceptionout.write( tempstr.getBytes());e1.printStackTrace();}File fcar = new File(DIR_ROOT + "/"+ carbs.getName());if(!fcar.exists())fcar.mkdirs();sum2 = 0;for(CarSerie serie : carbs.getSeries()){ // CarSerieif(debug && sum2 > MAX_DEBUG_LINE){break;}if( carSerieNumber>0 && sum2 < this.carSerieNumber){println("    carSerieNumber: "+ sum2 +" < "+carSerieNumber );sum2 ++;continue;}else{carSerieNumber = 0;}this.saveConfig(sum1, sum2, -1, -1, cofigout);print( "    serie=" +serie.getName() );//系列名称str = "        serie="+serie.getName() + " " ;try {out.write( str.getBytes() );} catch (IOException e1) {tempstr = sdf.format(new Date())+"\r\n"+e1.getMessage()  + e1.getLocalizedMessage();exceptionout.write( tempstr.getBytes());e1.printStackTrace();}File fcarserie = new File( fcar.getAbsolutePath()+ "/"+ serie.getName());if(!fcarserie.exists())fcarserie.mkdirs();try {get = new HttpGet( serie.getUrl() );//点击系列链接,获取该系列详情httpResponse = httpclient.execute(get);String htmlstring = EntityUtils.toString( httpResponse.getEntity() );Document html = Jsoup.parse(htmlstring);//获取该系列的级别、车身结构、指导价、发动机、变速箱信息GetDetailSerie(serie, html);str = " [指导价:  "+ serie.getPrice() + "," + serie.getExtInfo()+" ]";println(str);try{out.write( str.getBytes() );}catch(Exception e){tempstr = sdf.format(new Date())+"\r\n"+e.getMessage()  + e.getLocalizedMessage();exceptionout.write( tempstr.getBytes());e.printStackTrace();}if(this.bGetModelDetail){//获取该系列的在售、停手、预售各个款式Elements brandtabs = html.select(".row .brandtab-cont .tab-nav ul li a"); //获取在售、停手、预售链接if(brandtabs!=null){for(Element brandtabItem : brandtabs){if(brandtabItem!=null){String brandtabhref = brandtabItem.attr("href");//在售、停手、预售链接println("           "+ brandtabItem.text());if(brandtabhref!=null && !brandtabhref.trim().isEmpty()){get = new HttpGet( this.BASE_URL + brandtabhref );//获取在售、停手、预售车辆信息httpResponse = httpclient.execute(get);String htmlstringBrand = EntityUtils.toString( httpResponse.getEntity() );Document htmlBrand = Jsoup.parse(htmlstringBrand);{Elements interval01List = htmlBrand.select("div.intervalcont .interval01 .interval01-list li ");sum3 = 0;for(Element interval : interval01List){Elements carsinfo = interval.select(".interval01-list-cars .infor-title a");if(carsinfo != null){CarYear caryear = new CarYear( carsinfo.text() );//款式名称if(debug && sum3 > MAX_DEBUG_LINE){break;}if( carYearNumber>0 && sum3 < this.carYearNumber){println("       carYearNumber: "+ sum3  +" < "+carYearNumber );sum3 ++;continue;}else{carYearNumber = 0;}print( "        " +caryear.getName() );str = "        "+caryear.getName() + " " ;try {out.write( str.getBytes() );//款式名称} catch (IOException e1) {tempstr = sdf.format(new Date())+"\r\n"+e1.getMessage()   + e1.getLocalizedMessage();exceptionout.write( tempstr.getBytes());e1.printStackTrace();}CarModels carModel = new CarModels();{carModel.setName(caryear.getName());//款式名称//1Elements guidance = interval.select(".interval01-list-guidance .guidance-price");//指导价if(guidance != null)carModel.setPrice(guidance.text());//指导价//2carModel.setUrl(carsinfo.attr("href"));//根据此url获取配置信息GetDetailModel(carModel);//3 Elements related = interval.select(".interval01-list-related a[href^=/pic]");//图片if(related!=null){carModel.setImageurl( this.BASE_URL + related.attr("href") );println( "        imageurl = "+carModel.getImageurl() );//load images File fcaryear= new File( fcarserie.getAbsolutePath()+ "/"+ caryear.getName());if(!fcaryear.exists())fcaryear.mkdirs();if(this.bDownloadImage){sum4 = SetImagesList(carModel.getImageurl(), carModel, fcaryear, cofigout, sum1, sum2, sum3);}else{this.saveConfig(sum1, sum2, 0, sum3, cofigout);}}str = ", 指导价="+carModel.getPrice() + " ,  车身结构="+carModel.getStructure()+", 发动机="+carModel.getEngine()+", 变速箱="+carModel.getTransmission()+", imagepageurl="+carModel.getImageurl()+"  \r\n" ; //级别="+carModel.getLevel()+",try{out.write( str.getBytes() );}catch(Exception e){tempstr = sdf.format(new Date())+"\r\n"+e.getMessage()   + e.getLocalizedMessage();exceptionout.write( tempstr.getBytes());e.printStackTrace();}}caryear.add(carModel);serie.add(caryear);this.saveConfig(sum1, sum2, sum4, sum3, cofigout);}sum3 ++ ;}//end of for(Element interval : interval01List)}}}}}}} catch (ClientProtocolException e) {if(get!=null)get.releaseConnection();e.printStackTrace();tempstr = sdf.format(new Date())+"\r\n"+e.getMessage();exceptionout.write( tempstr.getBytes());return false;} catch (IOException e) {if(get!=null)get.releaseConnection();e.printStackTrace();tempstr = sdf.format(new Date())+"\r\n"+e.getMessage();exceptionout.write( tempstr.getBytes());return false;}sum2 ++;}sum1 ++;}if(get!=null)get.releaseConnection();try {cofigout.close();out.close();exceptionout.close();} catch (IOException e) {e.printStackTrace();return false;}return true;}//获取该系列的级别、车身结构、发动机、变速箱、指导价信息public void GetDetailSerie(CarSerie serie, Document html){Elements carCont = html.select(".car-cont .list-cont-main .main-lever");if(carCont!=null){Elements priceItem = carCont.select(".main-lever-right .lever-price");//指导价if(priceItem!=null)serie.setPrice( priceItem.text() ) ;//指导价Elements carcolors = carCont.select(".main-lever-left ul.lever-ul .lever-ul-color");//车身颜色if(carcolors!=null)carcolors.remove();Elements carExt= carCont.select(".main-lever-left ul.lever-ul li");//获取该系列的级别、车身结构、发动机、变速箱信息if(carExt!=null){serie.setExtInfoHtml(carExt.html());for(Element item : carExt){serie.addExtInfo(item.text());}}}}//根据此url获取配置信息public void GetDetailModel( CarModels carModel){if(carModel.getUrl()==null)return ;get = new HttpGet( carModel.getUrl() );try{httpResponse = httpclient.execute(get);String htmlstring = EntityUtils.toString(httpResponse.getEntity());Document html = Jsoup.parse(htmlstring);Elements cardetails = html.select(".cardetail-infor .cardetail-infor-car li");for(Element cardetail : cardetails){Elements em = cardetail.getElementsContainingText("车身尺寸");if(em!=null && em.size()>0){cardetail.select("span").remove();carModel.setSize(cardetail.text());continue;}em = cardetail.getElementsContainingText("车身结构");if(em!=null && em.size()>0){cardetail.select("span").remove();carModel.setStructure(cardetail.text());continue;}em = cardetail.getElementsContainingText("机");if(em!=null && em.size()>0){cardetail.select("span").remove();carModel.setEngine(cardetail.text());continue;}em = cardetail.getElementsContainingText("箱");if(em!=null && em.size()>0){cardetail.select("span").remove();carModel.setTransmission(cardetail.text());continue;}}}catch(Exception e){}}public int SetImagesList(String imagepage, CarModels carModel, File fcaryear, FileOutputStream cofigout,int sum1, int sum2, int sum3){int sum4 = 0;try{get = new HttpGet( imagepage );httpResponse = httpclient.execute(get);String htmlstring = EntityUtils.toString(httpResponse.getEntity());Document html = Jsoup.parse(htmlstring);Elements imagesElements = html.select(".row .column .uibox .uibox-con ul li>a"); //图片列表sum4 = 0;for(Element em : imagesElements){//遍历图片列表 liif(debug && sum4 > MAX_DEBUG_LINE){break;}if(this.carImagesNumber>0 && sum4 < this.carImagesNumber){println("           carImagesNumber: "+sum4 +" < "+carImagesNumber );sum4 ++;continue;}else{carImagesNumber = 0;}String imageName = carModel.getName()+"_"+sum4+".jpg";File storeFile = new File( fcaryear.getAbsolutePath() + "/" + imageName );if(storeFile.exists()){println("ignore exist file @ "+storeFile.getAbsolutePath());continue;}if(em!=null){String href = this.BASE_URL + em.attr("href");//获取图片浏览页面链接try{get = new HttpGet( href );httpResponse = httpclient.execute(get);//打开图片浏览页面String htmlstring2 = EntityUtils.toString(httpResponse.getEntity());Document html2 = Jsoup.parse(htmlstring2);Element img = html2.getElementById("img");//获取大图链接if(img!=null){CarSerieImage  im = new CarSerieImage( carModel.getName(), img.attr("src"));carModel.add(im);print( "        " +im.getTitle()+"  img = "+im.getSrc() );downloadPhotos(im.getSrc(), fcaryear.getAbsolutePath(), imageName);//下载图片println("");}}catch(Exception e){e.printStackTrace();}}sum4 ++ ;this.saveConfig(sum1, sum2, sum4, sum3, cofigout);}}catch(Exception e){e.printStackTrace();}return sum4;}HttpClient httpclient = new DefaultHttpClient();HttpGet get = null;HttpResponse httpResponse = null;public void downloadPhotos (String url, String savePath, String saveNamge){httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");try {get = new HttpGet( url );httpResponse = httpclient.execute(get);File storeFile = new File( savePath + "/" + saveNamge );  FileOutputStream output = new FileOutputStream(storeFile);  //得到网络资源的字节数组,并写入文件  output.write( EntityUtils.toByteArray(httpResponse.getEntity()) );  output.close();  print( "    saved image @ "+storeFile.getAbsolutePath() );} catch (Exception e) {e.printStackTrace();}}//save nams 2 carlist.txtpublic void Step4() throws ClientProtocolException, IOException{println("=======  Step 4  save picture data ======");File froot = new File(DIR_ROOT);if(!froot.exists())froot.mkdirs();File carlistfile = new File(froot.getAbsolutePath()+"/carlist.txt");FileOutputStream out = new FileOutputStream(carlistfile);String str = "";int sumcars = 0;int sumserie = 0;int sumcaryear = 0;for(CarBrands carbs : cartree.getTree()){sumcars ++;str = carbs.getName() + "\r\n" ;out.write( str.getBytes() );for(CarSerie serie : carbs.getSeries()){sumserie ++;str = "    "+serie.getName() + "\r\n" ;out.write( str.getBytes() );for(CarYear caryear : serie.getCarYearList()){sumcaryear ++;str = "        "+caryear.getName() + "\r\n" ;out.write( str.getBytes() );for(CarModels carModel : caryear.getCarModels()){str = ",  指导价="+carModel.getPrice() + " ,  车身结构="+carModel.getStructure()+", 发动机="+carModel.getEngine()+", 变速箱="+carModel.getTransmission()+", imagepageurl="+carModel.getImageurl()+" \r\n" ; //级别="+carModel.getLevel()+",out.write( str.getBytes() );}}}}out.close();println("sumcars = "+sumcars + "  sumserie = "+sumserie);try {Thread.sleep(3000);} catch (InterruptedException e) {e.printStackTrace();}}boolean bstarted = false;public boolean start() throws ClientProtocolException, IOException{boolean bsuccess = false;long t1 = System.currentTimeMillis();if(bstarted==false){this.Step1();//获取品牌列表CarBrandsthis.Step2();//获取各个品牌CarBrands的各个系列CarSerie}bstarted = true;bsuccess = this.Step3();//获取各个品牌CarBrands的各个系列CarSerie的各个车型CarYear/配置Models//this.Step4();//存储文本数据和图片//this.Step5();//获取图片并存储图片long t2 = System.currentTimeMillis();long diff = (t2 -t1)/1000;long hour = diff/3600;long minite = (diff - hour*3600)/(60);long sec = diff % 60;println("start at "+sdf.format(new Date(t1)));println("end at "+sdf.format(new Date(t2)));println("it takes "+hour+" h "+minite+" m "+sec+" s ." );return bsuccess;}//get pictrue urlpublic void Step5(){println("=======  Step 5 ======");HttpClient httpclient = new DefaultHttpClient();HttpGet get = null;HttpResponse httpResponse = null;httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");File froot = new File(DIR_ROOT);if(!froot.exists())froot.mkdirs();int sum = 0;for(CarBrands carbs : cartree.getTree()){println(carbs.getName());File fcar = new File(DIR_ROOT + "/"+ carbs.getName());if(!fcar.exists())fcar.mkdirs();sum ++ ;if(debug && sum > MAX_DEBUG_LINE){break;}int sum2 = 0;for(CarSerie serie : carbs.getSeries()){File fcarserie = new File( fcar.getAbsolutePath()+ "/"+ serie.getName());if(!fcarserie.exists())fcarserie.mkdirs();sum2++;if(debug && sum2 > MAX_DEBUG_LINE){break;}println( "    " +serie.getName() );get = new HttpGet( serie.getUrl() );try{httpResponse = httpclient.execute(get);String htmlstring = EntityUtils.toString(httpResponse.getEntity());Document html = Jsoup.parse(htmlstring);Elements imagesElements = html.select(".row .column .uibox .uibox-con ul li>a"); //图片列表int sum3 = 0;for(Element em : imagesElements){sum3 ++ ;if(debug && sum3 > MAX_DEBUG_LINE){break;}if(em!=null){String href = this.BASE_URL + em.attr("href");get = new HttpGet( href );httpResponse = httpclient.execute(get);String htmlstring2 = EntityUtils.toString(httpResponse.getEntity());Document html2 = Jsoup.parse(htmlstring2);Element img = html2.getElementById("img");if(img!=null){CarSerieImage  im = new CarSerieImage(em.attr("title"), img.attr("src"));//serie.add(im);print( "        " +im.getTitle()+"  img = "+im.getSrc() );downloadPhotos(im.getSrc(), fcarserie.getAbsolutePath(), im.getTitle()+"_"+sum3+".jpg");println("");}}}}catch(Exception e){e.printStackTrace();}}}get.releaseConnection();}public String getDIR_ROOT() {return DIR_ROOT;}public void setDIR_ROOT(String dIR_ROOT) {DIR_ROOT = dIR_ROOT;}public boolean isbDownloadImage() {return bDownloadImage;}public void setbDownloadImage(boolean bDownloadImage) {this.bDownloadImage = bDownloadImage;}public boolean isbGetModelDetail() {return bGetModelDetail;}public void setbGetModelDetail(boolean bGetModelDetail) {this.bGetModelDetail = bGetModelDetail;}}


0 0
原创粉丝点击