java下载html页面---把网页内容保存成本地html

来源:互联网 发布:设置淘宝子账号 编辑:程序博客网 时间:2024/05/22 03:31


我们在前面讲到httpclient抓取网页内容的时候 通常都是获取到页面的源代码content存入数据库。

详见下文:

HTTPClient模块的HttpGet和HttpPost

httpclient常用基本抓取类

那么如果我们除了获得页面源代码之外 还想把页面保存到本地存成html应该怎么做呢?


其实很简单 我们先来看访问页面获取content的代码

private static String getUrlContent(DefaultHttpClient httpPostClient,String urlString) throws IOException, ClientProtocolException {HttpGet httpGet = new HttpGet(urlString);HttpResponse httpGetResponse = httpPostClient.execute(httpGet);// 其中HttpGet是HttpUriRequst的子类httpPostClient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 10000);// 连接时间20shttpPostClient.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT, 8000);// 数据传输时间60sif (httpGetResponse.getStatusLine().getStatusCode() == 200) {HttpEntity httpEntity = httpGetResponse.getEntity();if (httpEntity.getContentEncoding() != null) {if ("gzip".equalsIgnoreCase(httpEntity.getContentEncoding().getValue())) {httpEntity = new GzipDecompressingEntity(httpEntity);} else if ("deflate".equalsIgnoreCase(httpEntity.getContentEncoding().getValue())) {httpEntity = new DeflateDecompressingEntity(httpEntity);}}String result = enCodetoString(httpEntity, encode);// 取出应答字符串// System.out.println(result);return result;}return "";}

public static String enCodetoStringDo(final HttpEntity entity,Charset defaultCharset) throws IOException, ParseException {if (entity == null) {throw new IllegalArgumentException("HTTP entity may not be null");}InputStream instream = entity.getContent();if (instream == null) {return null;}try {if (entity.getContentLength() > Integer.MAX_VALUE) {throw new IllegalArgumentException("HTTP entity too large to be buffered in memory");}int i = (int) entity.getContentLength();if (i < 0) {i = 4096;}Charset charset = null;try {// ContentType contentType = ContentType.get(entity);// if (contentType != null) {// charset = contentType.getCharset();// }} catch (final UnsupportedCharsetException ex) {throw new UnsupportedEncodingException(ex.getMessage());}if (charset == null) {charset = defaultCharset;}if (charset == null) {charset = HTTP.DEF_CONTENT_CHARSET;}Reader reader = new InputStreamReader(instream, charset);CharArrayBuffer buffer = new CharArrayBuffer(i);char[] tmp = new char[1024];int l;while ((l = reader.read(tmp)) != -1) {buffer.append(tmp, 0, l);}return buffer.toString();} finally {instream.close();}}

我们得到content之后就可以直接 把它存成本地文件 就 可以了。

我们可以参考

 

java读写txt


把txt后缀改成html即可

  1. public static void writeToFile(String fileName, String content) {  
  2.         String time = DATE_FORMAT.format(Calendar.getInstance().getTime());  
  3.           
  4.         File dirFile = null;  
  5.         try {  
  6.             dirFile = new File("e:\\" + time);  
  7.             if (!(dirFile.exists()) && !(dirFile.isDirectory())) {  
  8.                 boolean creadok = dirFile.mkdirs();  
  9.                 if (creadok) {  
  10.                     System.out.println(" ok:创建文件夹成功! ");  
  11.                 } else {  
  12.                     System.out.println(" err:创建文件夹失败! ");  
  13.                 }  
  14.             }  
  15.         } catch (Exception e) {  
  16.             e.printStackTrace();  
  17.         }  
  18.         String fullPath = dirFile + "/" + fileName + ".txt";  
  19.         write(fullPath, content);  
  20.     }  
  21.   
  22.     /** 
  23.      * 写文件 
  24.      *  
  25.      * @param path 
  26.      * @param content 
  27.      */  
  28.     public static boolean write(String path, String content) {  
  29.         String s = new String();  
  30.         String s1 = new String();  
  31.         BufferedWriter output = null;  
  32.         try {  
  33.             File f = new File(path);  
  34.             if (f.exists()) {  
  35.             } else {  
  36.                 System.out.println("文件不存在,正在创建...");  
  37.                 if (f.createNewFile()) {  
  38.                     System.out.println("文件创建成功!");  
  39.                 } else {  
  40.                     System.out.println("文件创建失败!");  
  41.                 }  
  42.             }  
  43.             BufferedReader input = new BufferedReader(new FileReader(f));  
  44.             while ((s = input.readLine()) != null) {  
  45.                 s1 += s + "\n";  
  46.             }  
  47.             System.out.println("原文件内容:" + s1);  
  48.             input.close();  
  49.             s1 += content;  
  50.             output = new BufferedWriter(new FileWriter(f));  
  51.             output.write(s1);  
  52.             output.flush();  
  53.             return true;  
  54.         } catch (Exception e) {  
  55.             e.printStackTrace();  
  56.             return false;  
  57.         } finally {  
  58.             if (output != null) {  
  59.                 try {  
  60.                     output.close();  
  61.                 } catch (IOException e) {  
  62.                     e.printStackTrace();  
  63.                 }  
  64.             }  
  65.         }  
  66.     }  



0 0
原创粉丝点击