正则表达式检查工程中的中文

来源：互联网发布：linux 查看文件夹数量编辑：程序博客网时间：2024/06/05 03:11

WEB项目中需要国际化，但是由于开发进度紧张，或者开发经验不足，经常会忘记国际化，导致项目中留下许多中文硬编码，参考网上的一些方法，写了一个方法来检查工程中的中文。缺点是有些注释内容没有屏蔽掉，后面有时间再完善吧。

import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStreamReader;import java.io.OutputStreamWriter;import java.util.regex.Matcher;import java.util.regex.Pattern;import cpdetector.io.ASCIIDetector;import cpdetector.io.CodepageDetectorProxy;import cpdetector.io.JChardetFacade;import cpdetector.io.ParsingDetector;import cpdetector.io.UnicodeDetector;public class CheckCN{    static int count = 0;//含有中文字符的文件数    static String regEx = "[\\u4e00-\\u9fa5]";    static Pattern pat = Pattern.compile(regEx);    static FileOutputStream fos = null;    static OutputStreamWriter osw = null;    private static final String FILE_PATH = "D:\\project\\myeclips6.5proj_for_xg\\XXX\\WebRoot\\pages\\xxx\\page";    private static final String OUTPUT_PATH = "C:\\Users\\YangXiang\\Desktop\\cnFile.txt";    public static void main(String[] args)    {        try        {            //打开输出流            fos = new FileOutputStream(new File(OUTPUT_PATH), true);            osw = new OutputStreamWriter(fos, "UTF-8");            checkFileExists(OUTPUT_PATH);            //开始检查文件            refreshFileList(FILE_PATH);        }        catch (IOException e)        {            e.printStackTrace();        }        finally        {            try            {                fos.close();                osw.close();            }            catch (IOException e)            {                e.printStackTrace();            }        }        //console输出检查结果        System.out.println(count + " files containing the Chinese ,please check:" + OUTPUT_PATH);    }    /**     * 检查文件    * @Title: refreshFileList     * @Description: TODO(这里用一句话描述这个方法的作用)     * @param strPath    * @throws IOException    设定文件     * @return void    返回类型      */    private static void refreshFileList(String strPath) throws IOException    {        File dir = new File(strPath);        File[] files = dir.listFiles();        if (files == null)            return;        for (int i = 0; i < files.length; i++)        {            int flag = 0;            if (files[i].isDirectory())            {                refreshFileList(files[i].getAbsolutePath());            }            else            {                String strFileName = files[i].getAbsolutePath().toLowerCase();                //System.out.println(getFileEncode(files[i].getAbsolutePath())+" ----" +files[i].getName());                //截取文件格式                String fileName = strFileName.substring(strFileName.lastIndexOf(".") + 1, strFileName.length());                //此处排除掉这class文件和jar文件不参与判断                if ("js".equals(fileName.toLowerCase()) || "jsp".equals(fileName.toLowerCase()))                {                    //开始输入文件流，检查文件                    String enCode = getFileEncode(files[i].getAbsolutePath());                    if ("void".equals(enCode))                    {                        enCode = "UTF-8";                    }                    if ("windows-1252".equals(enCode))                    {                        enCode = "GBK";                    }                    FileInputStream fis = new FileInputStream(files[i].getAbsolutePath());                    InputStreamReader in = new InputStreamReader(fis, enCode);                    BufferedReader br = new BufferedReader(in);                    //用于记录行数  确定文件哪一行有中文                    int lineCount = 0;                    String line = null;                    //逐行检查文件                    while ((line = br.readLine()) != null)                    {                        /////使用正则表达式进行判断                        lineCount++;                        Matcher matcher = pat.matcher(line.trim());                        if (matcher.find() && !line.trim().contains("//"))                        { //将含有中文的文件名称和中文所在行数写入文件夹                            osw.write(files[i].getAbsolutePath() + " ----@line " + lineCount + ": " + line.trim()                                    + "\r\n");                            osw.flush();                            System.out.println(files[i].getAbsolutePath() + "have chinese");                            flag++;                        }                    }                    //flag!=0 说明该文件中含有中文                    if (flag != 0)                        count++;                    br.close();                    in.close();                    fis.close();                }            }        }    }    /**     *     * @Title: getFileEncode     * @Description: TODO(这里用一句话描述这个方法的作用)     * @param path    * @return    设定文件     * @return String    返回类型      */    public static String getFileEncode(String path)    {        /*         * detector是探测器，它把探测任务交给具体的探测实现类的实例完成。         * cpDetector内置了一些常用的探测实现类，这些探测实现类的实例可以通过add方法 加进来，如ParsingDetector、         * JChardetFacade、ASCIIDetector、UnicodeDetector。         * detector按照“谁最先返回非空的探测结果，就以该结果为准”的原则返回探测到的         * 字符集编码。使用需要用到三个第三方JAR包：antlr.jar、chardet.jar和cpdetector.jar         * cpDetector是基于统计学原理的，不保证完全正确。         */        CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();        /*         * ParsingDetector可用于检查HTML、XML等文件或字符流的编码,构造方法中的参数用于         * 指示是否显示探测过程的详细信息，为false不显示。         */        detector.add(new ParsingDetector(false));        /*         * JChardetFacade封装了由Mozilla组织提供的JChardet，它可以完成大多数文件的编码         * 测定。所以，一般有了这个探测器就可满足大多数项目的要求，如果你还不放心，可以         * 再多加几个探测器，比如下面的ASCIIDetector、UnicodeDetector等。         */        detector.add(JChardetFacade.getInstance());// 用到antlr.jar、chardet.jar        // ASCIIDetector用于ASCII编码测定        detector.add(ASCIIDetector.getInstance());        // UnicodeDetector用于Unicode家族编码的测定        detector.add(UnicodeDetector.getInstance());        java.nio.charset.Charset charset = null;        File f = new File(path);        try        {            charset = detector.detectCodepage(f.toURI().toURL());        }        catch (Exception ex)        {            ex.printStackTrace();        }        if (charset != null)            return charset.name();        else            return null;    }    /**     * 检查结果文件，如果已存在就删除    * @Title: checkFileExists     * @Description: TODO(这里用一句话描述这个方法的作用)     * @param path    设定文件     * @return void    返回类型      */    public static void checkFileExists(String path)    {        File file = new File(path);        if (file.isFile() && file.exists())        {            file.delete();        }    }}

结果示例：

D:\project\myeclips6.5proj_for_xg\XXX\WebRoot\pages\xxx\page\vmware\host\host.js ----@line 2440: fieldLabel: 'NTP服务状态',D:\project\myeclips6.5proj_for_xg\XXX\WebRoot\pages\xxx\page\vmware\host\host.js ----@line 2443: {boxLabel: '关闭', name: 'zxveHost.extra.ntpStatus', inputValue: '0'},D:\project\myeclips6.5proj_for_xg\XXX\WebRoot\pages\xxx\page\vmware\host\host.js ----@line 2444: {boxLabel: '开启', name: 'zxveHost.extra.ntpStatus', inputValue: '1' }D:\project\myeclips6.5proj_for_xg\XXX\WebRoot\pages\xxx\page\vmware\host\host.js ----@line 2480: fieldLabel: 'NTP服务器',D:\project\myeclips6.5proj_for_xg\XXX\WebRoot\pages\xxx\page\vmware\host\host.js ----@line 2488: title: 'NTP服务设置',

0 0