java抓取网页源代码《转载》

来源:互联网 发布:开淘宝网店怎样找货源 编辑:程序博客网 时间:2024/05/22 06:07
public static String getHtmlContent(URL url, String encode) { 
          StringBuffer contentBuffer = new StringBuffer(); 
   
          int responseCode = -1; 
          HttpURLConnection con = null; 
          try { 
              con = (HttpURLConnection) url.openConnection(); 
              con.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理进行下载 
              con.setConnectTimeout(60000); 
              con.setReadTimeout(60000); 
              // 获得网页返回信息码 
              responseCode = con.getResponseCode(); 
              if (responseCode == -1) { 
                  System.out.println(url.toString() + " : connection is failure..."); 
                 con.disconnect(); 
                  return null; 
              } 
              if (responseCode >= 400) // 请求失败 
              { 
                  System.out.println("请求失败:get response code: " + responseCode); 
                  con.disconnect(); 
                  return null; 
              } 
  
              InputStream inStr = con.getInputStream(); 
              InputStreamReader istreamReader = new InputStreamReader(inStr, encode); 
              BufferedReader buffStr = new BufferedReader(istreamReader); 
   
              String str = null; 
              while ((str = buffStr.readLine()) != null) 
                contentBuffer.append(str); 
              inStr.close(); 
          } catch (IOException e) { 
              e.printStackTrace(); 
              contentBuffer = null; 
              System.out.println("error: " + url.toString()); 
          } finally { 
              con.disconnect(); 
         } 
          return contentBuffer.toString(); 
      } 
  
      public static String getHtmlContent(String url, String encode) { 
          if (!url.toLowerCase().startsWith("http://")) { 
             url = "http://" + url; 
          } 
          try { 
              URL rUrl = new URL(url); 
              return getHtmlContent(rUrl, encode); 
          } catch (Exception e) { 
              e.printStackTrace(); 
              return null; 
          } 
      } 
0 0
原创粉丝点击