Mapreduce构建hbase二级索引

来源:互联网 发布:windows无法格式化 编辑:程序博客网 时间:2024/05/22 07:51
import java.io.IOException;import java.util.HashMap;import java.util.Map;import java.util.Set;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.client.Result;import org.apache.hadoop.hbase.client.Scan;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat;import org.apache.hadoop.hbase.mapreduce.TableInputFormat;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;import org.apache.hadoop.hbase.mapreduce.TableMapper;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.util.GenericOptionsParser;public class IndexBuilder {    private class MyMapper extends TableMapper<ImmutableBytesWritable, Put> {        private Map<byte[], ImmutableBytesWritable> indexes = new HashMap<byte[], ImmutableBytesWritable>();        private String columnFamily;        @Override        protected void map(ImmutableBytesWritable key, Result value,                Context context) throws IOException, InterruptedException {            Set<byte[]> keys = indexes.keySet();            for (byte[] k : keys) {                ImmutableBytesWritable indexTableName = indexes.get(k);                byte[] val = value.getValue(Bytes.toBytes(columnFamily), k);                Put put = new Put(val);// 索引表的rowkey为原始表的值                put.add(Bytes.toBytes("f1"), Bytes.toBytes("id"), key.get());// 索引表的内容为原始表的rowkey                context.write(indexTableName, put);            }        }        @Override        protected void setup(Context context) throws IOException,                InterruptedException {            Configuration conf = context.getConfiguration();            String tableName = conf.get("tableName");            columnFamily = conf.get("columnFamily");            String[] qualifiers = conf.getStrings("qualifiers");            // indexes的key为列名,value为索引表名            for (String q : qualifiers) {                indexes.put(                        Bytes.toBytes(q),                        new ImmutableBytesWritable(Bytes.toBytes(tableName                                + "-" + q)));            }        }    }    public static void main(String[] args) throws IOException,            ClassNotFoundException, InterruptedException {        Configuration conf = HBaseConfiguration.create();        String[] otherargs = new GenericOptionsParser(conf, args)                .getRemainingArgs();// 去除掉没有用的命令行参数        // 输入参数:表名,列族名,列名        if (otherargs.length < 3) {            System.exit(-1);        }        String tableName = otherargs[0];        String columnFamily = otherargs[1];        conf.set("tableName", tableName);        conf.set("columnFamily", columnFamily);        String[] qualifiers = new String[otherargs.length - 2];        for (int i = 0; i < qualifiers.length; i++) {            qualifiers[i] = otherargs[i + 2];        }        conf.setStrings("qualifiers", qualifiers);        Job job = new Job(conf, tableName);        job.setJarByClass(IndexBuilder.class);        job.setMapperClass(MyMapper.class);        job.setNumReduceTasks(0);        job.setInputFormatClass(TableInputFormat.class);        // 可以输出多张表        job.setOutputFormatClass(MultiTableOutputFormat.class);        Scan scan = new Scan();        scan.setCaching(1000);        TableMapReduceUtil.initTableMapperJob(tableName, scan, MyMapper.class,                ImmutableBytesWritable.class, Put.class, job);        job.waitForCompletion(true);    }}


本文出自 “点滴积累” 博客,请务必保留此出处http://tianxingzhe.blog.51cto.com/3390077/1699774

0 0