合并hive仓库中小文件

来源:互联网 发布:天猫抢优惠券软件 编辑:程序博客网 时间:2024/06/15 14:08

使用lijie库下的test表做实验

1.查看hdfs下该表的文件存储:

hadoop dfs -ls /user/hive/warehouse/lijie.db/test

结果如下:
这里写图片描述

2.模拟小文件

insert into table test select * from test;上面语句执行多次

这里写图片描述

3.合并

insert overwrite table test select * from test;

没错就是这一条语句搞定!
如图:

这里写图片描述

4.如果用java程序合并小文件,可以参考下面的程序:

package com.lijie.hebingxiaowenjian;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.FileUtil;import org.apache.hadoop.fs.LocalFileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.fs.PathFilter;import org.apache.hadoop.io.IOUtils;public class HeBing {    public static void main(String[] args) throws IllegalArgumentException, Exception {        uploadCom(new Path("F://hebing/*"), new Path("/hb"));    }    public static void uploadCom(Path src,Path dest) throws Exception{        Configuration conf = new Configuration();        URI uri = new URI("hdfs://lijie:9000");        FileSystem fs = FileSystem.get(uri, conf);        LocalFileSystem lfs = FileSystem.getLocal(conf);        FileStatus[] gs1 = lfs.globStatus(src, new NoFilter("^.*svn$"));        Path[] ps1 = FileUtil.stat2Paths(gs1);        FSDataInputStream in = null;        FSDataOutputStream out = null;        for (Path path : ps1) {            String name = path.getName().replaceAll("-", "");            FileStatus[] gs2 = lfs.globStatus(new Path(path+"/*"), new YesFilter("^.*txt$"));            Path[] ps2 = FileUtil.stat2Paths(gs2);            Path destNow = new Path(dest+"/"+name+".txt");            out = fs.create(destNow);            for (Path path2 : ps2) {                in = lfs.open(path2);                IOUtils.copyBytes(in, out, 4096, false);                in.close();            }            out.close();        }    }}class YesFilter implements PathFilter{    private String reg;    public YesFilter(String reg) {        super();        this.reg = reg;    }    @Override    public boolean accept(Path arg0) {        // TODO Auto-generated method stub        return arg0.toString().matches(reg);    }}class NoFilter implements PathFilter{    private String reg;    public NoFilter(String reg) {        super();        this.reg = reg;    }    @Override    public boolean accept(Path arg0) {        return !arg0.toString().matches(reg);    }}
0 0
原创粉丝点击