自动识别文件编码

来源:互联网 发布:非主流伤感的网络歌曲 编辑:程序博客网 时间:2024/05/19 03:44

public static Reader readTranslateFile(File bsFile) {
        final int BOM_SIZE = 4;
        byte[] bom = new byte[BOM_SIZE];
        BufferedReader reader = null;
        PushbackInputStream pushbackInputStream = null;
        try {
            // 获取编码
            String sourceEncoding = CpdetectorUtil.perceiveCode(bsFile);
            // 读取BOM
            pushbackInputStream = new PushbackInputStream(new FileInputStream(
                bsFile), BOM_SIZE);
            int n = pushbackInputStream.read(bom, 0, bom.length);
            int unread = n;
            // 判断是否有BOM
            unread = ishasBOM(bom, n, unread);
            // 读取偏移
            pushbackInputStream.unread(bom, (n - unread), unread);
            reader = new BufferedReader(new InputStreamReader(
                pushbackInputStream, sourceEncoding));
            return reader;
        } catch (MalformedURLException e) {
            Logger.getLogger().error(FileUtil.class, e);
        } catch (IOException e) {
            Logger.getLogger().error(FileUtil.class, e);
        }
        return null;
    }
 
 private static final CodepageDetectorProxy detector = CodepageDetectorProxy
        .getInstance();

    static {
        detector.add(new ParsingDetector(false));
        detector.add(JChardetFacade.getInstance());
        detector.add(UnicodeDetector.getInstance());
        detector.add(ASCIIDetector.getInstance());
    }

    private CpdetectorUtil() {
    }

    /**
     *
     * 感知文件编码方式。
     *
     * @param 待探测文件
     * @return 编码字符串,为空表示没有找到该编码
     * @throws IOException
     * @throws MalformedURLException
     */
    public static String perceiveCode(File file) throws IOException {
        if (null == file || !file.exists()) {
            return null;
        }
        Charset charset = null;
        charset = detector.detectCodepage(file.toURI().toURL());

        if (charset != null) {
            return charset.name();
        }
        return null;
    }

cpdetector_1.0.10.jar

chardet-1.0.jar

antlr-2.7.4.jar

原创粉丝点击