MapReduce从HBase多路径导出数据到Hive

来源:互联网 发布:淘宝联盟怎么身份认证 编辑:程序博客网 时间:2024/04/27 21:20
package com.zz.hbase.ccrc;import java.io.IOException;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.Map.Entry;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.client.Result;import org.apache.hadoop.hbase.client.Scan;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;import org.apache.hadoop.hbase.mapreduce.TableMapper;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import com.zz.hive.Constants;import com.zz.util.DateUtil;public class HBaseToHiveJob {    public static class HBaseToHiveMapper extends TableMapper<Text, Text> {        @Override        protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {            StringBuilder sb = new StringBuilder("");            Map<String, String> m = new HashMap<String, String>();            List<String> temp = new ArrayList<String>();            List<String> a = new ArrayList<String>();            String sp = "-";            String tableName = "";            String str = "";            String keyStr = "";            for (Entry<byte[], byte[]> entry : value.getFamilyMap("aaaa".getBytes()).entrySet()) {                str = new String(entry.getValue());                if (str != null) {                    keyStr = new String(entry.getKey());                    if ("event".equals(keyStr)) {                        tableName = str;                    } else {                        m.put(keyStr, str);                    }                }            }            sb = sort(tableName, m, sb, temp, sp, a);            context.write(new Text(tableName), new Text(new String(sb)));        }        /**         * 按照event值类型重新组合数据存储到hdfs上         * @param tableName         * @param map         * @param sb         * @param t         * @param sp         * @param a         * @return         */        private StringBuilder sort(String tableName, Map<String, String> map, StringBuilder sb, List<String> t, String sp, List<String> a) {            if("Summary".equals(tableName)) {                a = com.zz.option.Constants.SUMMARY_COLUMN;//其中一个表定义的好的List,下面也是一样的                return handler(a, t, sb, sp, map);            }            return sb;        }        /**         * 每条数据的各个值之间加上分隔符,值为null的用“-”占位         * @param a         * @param t         * @param sb         * @param sp         * @param m         * @return         */        private StringBuilder handler(List<String> a, List<String> t, StringBuilder sb, String sp, Map<String, String> m) {            String mapValue;            for (String val : a) {                mapValue = m.get(val);                sb.append(mapValue != null ? mapValue : sp);                sb.append(Constants.BLANK_CHAR);            }            return sb.deleteCharAt(sb.length() - 1);        }    }    public static class HBaseToHiveReducer extends Reducer<Text, Text, NullWritable, Text> {        private MultipleOutputs<NullWritable, Text> out;        @Override        protected void setup(Context context) {            out = new MultipleOutputs<NullWritable, Text>(context);        }        @Override        protected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {            String tableName = key.toString();            String path = tableName + "/" + tableName;            for (Text val : values) {                out.write(NullWritable.get(), new Text(val), path);            }        }        @Override        protected void cleanup(Context context) throws IOException, InterruptedException {            out.close();        }    }    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {        Configuration conf = HBaseConfiguration.create();        Job job = Job.getInstance(conf, "Fetch HBase2Hive" + DateUtil.getNowTime());        job.setJarByClass(HBaseToHiveJob.class);        Scan scan = new Scan();        scan.setCaching(500);        scan.setCacheBlocks(false);        scan.addFamily(Bytes.toBytes("aaaa"));        Path output = new Path(args[0]);        final FileSystem fileSystem = output.getFileSystem(conf);        fileSystem.delete(output, true);        TableMapReduceUtil.initTableMapperJob("Fetch", scan, HBaseToHiveMapper.class, Text.class, Text.class, job, false);        job.setReducerClass(HBaseToHiveReducer.class);        job.setOutputFormatClass(TextOutputFormat.class);        TextOutputFormat.setOutputPath(job, output);        job.setMapOutputKeyClass(Text.class);        job.setMapOutputValueClass(Text.class);        job.setOutputKeyClass(NullWritable.class);        job.setOutputValueClass(Text.class);        System.exit(job.waitForCompletion(true) ? 0 : 1);    }}
1 0
原创粉丝点击