用shell脚本配合awk文本处理工具进行文件处理

来源:互联网 发布:淘宝电子商务运营模式 编辑:程序博客网 时间:2024/05/19 18:41

由于项目是运行在linux环境下。需要统计文件中的记录金额总和等功能,于是提供了shell脚本处理文件的工具类

package com.lancy.common.util;import java.io.BufferedInputStream;import java.io.BufferedReader;import java.io.File;import java.io.IOException;import java.io.InputStreamReader;import java.util.HashMap;import java.util.Map;import org.apache.logging.log4j.LogManager;import org.apache.logging.log4j.Logger;public class ShellUtil {    private static Logger   logger  = LogManager.getLogger(ShellUtil.class);    /**     * 获取文件内某一列的总数,并且是以tab为分隔符     *      * @param file     *            文件路径     * @param colNum     *            列号 由1开始     * @return 总和     */    public static double sumColumn(String file, int colNum) {        String cmds = "awk -F '\t' '{a=a+$" + colNum + "}END{printf(\"%.3f\\n\", a)}' " + file;        String result = execWithChannel(cmds);        return result == null || "".equals(result) ? 0 : Double.parseDouble(result);    }    /**     * 获取文件总行数 效率相对与直接调用java方法慢,不建议使用     *      * @param file     *            文件路径     * @return 总行数     */    @Deprecated    public static int countLines(String file) {        String cmd = "awk 'END{print NR}' " + file;        String result = execWithChannel(cmd);        return result == null || "".equals(result) ? 0 : Integer.parseInt(result);    }    /**     * 执行MongoDB数据导入脚本     *      * @param filePath     *            文件路径     * @return     */    public static String execMongoImportScript(String filePath, String collection, String field) {        String cmds = "source /etc/profile && sh ${PROG_ROOT}/shell/script/mongo_data_import.sh " + filePath + " "                + collection + " " + field;        return execWithChannel(cmds);    }    /**     * 执行MongoDB数据导入脚本     *      * @param filePath     *            文件路径     * @return     */    public static String execMongoImportScript(String filePath, String collection, String field, String cardType) {        if (cardType == null || cardType.length() <= 0) {            return execMongoImportScript(filePath, collection, field);        }        String cmds = "source /etc/profile && sh ${PROG_ROOT}/shell/script/mongo_data_import.sh " + filePath + " "                + collection + " " + field + " " + cardType;        return execWithChannel(cmds);    }    /**     * 执行MongoDB票卡导出-去重-导入脚本     *      * @param fileDir     *            票卡数据输出临时目录     * @return     */    public static String execMongoCardScript(String fileDir, String host, String db) {        String cmds = "sh /home/lnt/shell/script/mongo_card_import.sh " + fileDir + " " + host + " " + db;        return execWithChannel(cmds);    }    /**     * 执行shell命令     *      * @param cmds     *            shell命令     * @return     */    public static String execute(String cmds) {        return execWithChannel(cmds);    }    /**     * 文件数据重复性检查     *      * @param filePath     *            原始文件路径,字符串形式     * @param fields     *            查重字段,参数样式: new String[]{"1","2"}     * @return 结果文件,在原始文件每行记录前面添加一个标识,0表示未重复,1表示重复,原始记录与标识以&分隔     */    public static File repetitionCheck(String filePath, String[] fields) {        StringBuilder sb = new StringBuilder();        for (int i = 0; i < fields.length; i++) {            sb.append("$").append(fields[i]);        }        File srcFile = new File(filePath);        File tmpFile = new File(filePath + ".TMP");        StringBuilder cmds = new StringBuilder();        cmds.append("awk '{if(record[").append(sb.toString()).append("]==0){record[").append(sb.toString())                .append("]++;flag=0;}else{flag=1;}print flag\"&\"$0}' ").append(srcFile).append(" > ").append(tmpFile);        String result = execWithChannel(cmds.toString());        System.out.println(result);        return tmpFile;    }    /**     * 文件内排序     *      * @param file     *            文件路径     * @param dist     *            保存到的文件路径,可为源文件     * @param column     *            排序的列     */    public static void sort(String file, String dist, int column) {        sort(file, dist, new int[] { column }, false);    }    /**     * 文件内排序     *      * @param file     *            文件路径     * @param dist     *            保存到的文件路径,可为源文件     * @param column     *            排序的列     * @param desc     *            是否倒序     */    public static void sort(String file, String dist, int column, boolean desc) {        sort(file, dist, new int[] { column }, desc);    }    /**     * 文件内排序     *      * @param file     *            文件路径     * @param dist     *            保存到的文件路径,可为源文件     * @param columns     *            排序的多列     * @param desc     *            是否倒序     */    public static void sort(String file, String dist, int[] columns, boolean desc) {        StringBuilder cmds = new StringBuilder("sort");        for (int column : columns) {            cmds.append(" -k").append(column);            if (desc) {                cmds.append("r");            }            cmds.append(",").append(column);        }        cmds.append(" ").append(file);        if (dist != null && dist.length() > 0) {            if (dist.equals(file)) {                cmds.append(" -o ");            } else {                cmds.append(" > ");            }            cmds.append(dist);        }        System.out.println(cmds.toString());        String result = execWithChannel(cmds.toString());        System.out.println(result);    }    /**     * 文件内排序,支持不同列可具有不同的排序方式(正序或倒序)     *      * @param file     *            文件路径     * @param dist     *            保存到的文件路径,可为源文件     * @param columns     *            排序的多列     */    public static void sort(String file, String dist, Map<Integer, Boolean> columns) {        StringBuilder cmds = new StringBuilder("sort");        for (Map.Entry<Integer, Boolean> entry : columns.entrySet()) {            cmds.append(" -k").append(entry.getKey());            if (entry.getValue()) {                cmds.append("r");            }            cmds.append(",").append(entry.getKey());        }        cmds.append(" ").append(file);        if (dist != null && dist.length() > 0) {            if (dist.equals(file)) {                cmds.append(" -o ");            } else {                cmds.append(" > ");            }            cmds.append(dist);        }        System.out.println(cmds.toString());        String result = execWithChannel(cmds.toString());        System.out.println(result);    }    /**     * 带管道符命令执行方法     *      * @param cmds     * @return     */    private static String execWithChannel(String cmds) {        Process process = null;        BufferedInputStream in = null;        BufferedReader reader = null;        StringBuilder result = new StringBuilder();        try {            process = Runtime.getRuntime().exec(new String[] { "/bin/sh", "-c", cmds });            in = new BufferedInputStream(process.getInputStream());            reader = new BufferedReader(new InputStreamReader(in));            String lineStr = null;            while ((lineStr = reader.readLine()) != null) {                result.append(lineStr.trim()).append("\n");            }            int exitValue = process.waitFor();            logger.info(">>>shell执行命令返回结果:exitValue = " + exitValue);        } catch (IOException | InterruptedException e) {            logger.error(">>>shell执行命令异常>>>" + e.getMessage());            result.append("999999999999");        } finally {            if (reader != null) {                try {                    reader.close();                } catch (IOException e) {                    logger.error(">>>shell执行命令异常>>>" + e.getMessage());                }            }            if (in != null) {                try {                    in.close();                } catch (IOException e) {                    e.printStackTrace();                }            }        }        logger.info(">>>shell执行命令返回结果>>>" + result.toString());        return result.toString();    }    /**     * 执行命令     *      * @param cmds     * @return     */    private static String exec(String[] cmds) {        Process process = null;        BufferedInputStream in = null;        BufferedReader reader = null;        StringBuilder result = new StringBuilder();        try {            process = Runtime.getRuntime().exec(cmds);            in = new BufferedInputStream(process.getInputStream());            reader = new BufferedReader(new InputStreamReader(in));            String lineStr = null;            while ((lineStr = reader.readLine()) != null) {                result.append(lineStr.trim()).append("\n");            }        } catch (IOException e) {            e.printStackTrace();        } finally {            if (reader != null) {                try {                    reader.close();                } catch (IOException e) {                    e.printStackTrace();                }            }            if (in != null) {                try {                    in.close();                } catch (IOException e) {                    e.printStackTrace();                }            }        }        return result.toString();    }}
原创粉丝点击