webmagic在tomcat上https报错问题

来源:互联网 发布:淘宝店铺里的图片变形 编辑:程序博客网 时间:2024/06/06 08:53

我是直接把process中的page.getHtml()替换为我自己得到的

import java.io.BufferedReader;import java.io.InputStreamReader;import java.net.HttpURLConnection;import java.net.URL;import java.security.SecureRandom;import java.security.cert.CertificateException;import java.security.cert.X509Certificate;import javax.net.ssl.HostnameVerifier;import javax.net.ssl.HttpsURLConnection;import javax.net.ssl.SSLContext;import javax.net.ssl.SSLSession;import javax.net.ssl.SSLSocketFactory;import javax.net.ssl.TrustManager;import javax.net.ssl.X509TrustManager;import us.codecraft.webmagic.Page;import us.codecraft.webmagic.Site;import us.codecraft.webmagic.Spider;import us.codecraft.webmagic.processor.PageProcessor;import us.codecraft.webmagic.selector.Html;public class TestWebMagic implements PageProcessor{private static Html html;private static String baseUrl;@Overridepublic void process(Page page) {//https协议得到页面htmlhtml = new Html(getHtmlByHttps(baseUrl, "UTF-8"));System.out.println(html);page.putField("imgs", html.$("img", "src-medium").regex(".*800x800.jpg.*").all());}@Overridepublic Site getSite() {return Site.me().setRetryTimes(3).setSleepTime(1000);}    public static String getHtmlByHttps(String u, String encoding){    try {        SSLContext sc = SSLContext.getInstance("SSL", "SunJSSE");    sc.init(null, new TrustManager[] { new X509TrustManager() {              @Override              public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {                        }              @Override              public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {                        }              @Override              public X509Certificate[] getAcceptedIssuers() {                  return null;              }          } }, new SecureRandom());    HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());          HttpsURLConnection.setDefaultHostnameVerifier(new HostnameVerifier() {              @Override              public boolean verify(String arg0, SSLSession arg1) {                  return true;              }          });                SSLSocketFactory ssf = sc.getSocketFactory();        URL url = new URL(null, u, new sun.net.www.protocol.https.Handler());        HttpsURLConnection conn = (HttpsURLConnection) url.openConnection();        conn.setSSLSocketFactory(ssf);        conn.setDoOutput(true);          conn.setDoInput(true);        conn.setUseCaches(false);        conn.setRequestMethod("GET");        conn.connect();          BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(), encoding));        StringBuffer sb = new StringBuffer();        String line;          while ((line = br.readLine()) != null)              sb.append(line);          return sb.toString();    } catch (Exception e) {          e.printStackTrace();      }      return null;    }public static void main(String[] args) {baseUrl = "https://product.suning.com/0070137013/149868717.html";// 创建默认的httpClient实例        Spider.create(new TestWebMagic())                .addUrl("http://fanyi.baidu.com")//随便写个html协议可以得到的链接                //启动爬虫                .run();    }}

阅读全文
0 0
原创粉丝点击