java自动根据文件内容的编码来读取避免乱码

来源:互联网 发布:淘宝卖家层级金额 编辑:程序博客网 时间:2024/05/23 01:18

通过cpdetector这个开源的jar包可以自动判断当前文件的内容编码,从而在读取的时候选择正确的编码读取,避免乱码问题。

 原创不易,转载请注明出处:java自动根据文件内容的编码来读取避免乱码

测试结果,提供截图:

GBK文件内容

UTF8文件内容

 

运行结果:

package com.zuidaima.test;import info.monitorenter.cpdetector.io.ASCIIDetector;import info.monitorenter.cpdetector.io.CodepageDetectorProxy;import info.monitorenter.cpdetector.io.JChardetFacade;import info.monitorenter.cpdetector.io.ParsingDetector;import info.monitorenter.cpdetector.io.UnicodeDetector;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.InputStreamReader;public class Main {public static String getContent(String path) throws Exception {File file = new File(path);CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();detector.add(new ParsingDetector(false));detector.add(JChardetFacade.getInstance());detector.add(ASCIIDetector.getInstance());detector.add(UnicodeDetector.getInstance());java.nio.charset.Charset charset = null;try {charset = detector.detectCodepage(file.toURI().toURL());} catch (Exception ex) {ex.printStackTrace();}String charsetName = null;if (charset != null) {charsetName = charset.name();} else {charsetName = "UTF-8";}BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), charsetName));String line = null;String lines = "";while ((line = reader.readLine()) != null) {lines += line + "\n";}reader.close();return lines;}public static void main(String[] args) throws Exception {System.out.println(getContent("bin/gbk.txt"));System.out.println(getContent("bin/utf8.txt"));}}    

代码下载地址:http://www.zuidaima.com/share/1550463235574784.htm

0 1