Mapreduce构建hbase二级索引
来源:互联网 发布:windows无法格式化 编辑:程序博客网 时间:2024/05/22 07:51
import java.io.IOException;import java.util.HashMap;import java.util.Map;import java.util.Set;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.client.Result;import org.apache.hadoop.hbase.client.Scan;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat;import org.apache.hadoop.hbase.mapreduce.TableInputFormat;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;import org.apache.hadoop.hbase.mapreduce.TableMapper;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.util.GenericOptionsParser;public class IndexBuilder { private class MyMapper extends TableMapper<ImmutableBytesWritable, Put> { private Map<byte[], ImmutableBytesWritable> indexes = new HashMap<byte[], ImmutableBytesWritable>(); private String columnFamily; @Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { Set<byte[]> keys = indexes.keySet(); for (byte[] k : keys) { ImmutableBytesWritable indexTableName = indexes.get(k); byte[] val = value.getValue(Bytes.toBytes(columnFamily), k); Put put = new Put(val);// 索引表的rowkey为原始表的值 put.add(Bytes.toBytes("f1"), Bytes.toBytes("id"), key.get());// 索引表的内容为原始表的rowkey context.write(indexTableName, put); } } @Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String tableName = conf.get("tableName"); columnFamily = conf.get("columnFamily"); String[] qualifiers = conf.getStrings("qualifiers"); // indexes的key为列名,value为索引表名 for (String q : qualifiers) { indexes.put( Bytes.toBytes(q), new ImmutableBytesWritable(Bytes.toBytes(tableName + "-" + q))); } } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = HBaseConfiguration.create(); String[] otherargs = new GenericOptionsParser(conf, args) .getRemainingArgs();// 去除掉没有用的命令行参数 // 输入参数:表名,列族名,列名 if (otherargs.length < 3) { System.exit(-1); } String tableName = otherargs[0]; String columnFamily = otherargs[1]; conf.set("tableName", tableName); conf.set("columnFamily", columnFamily); String[] qualifiers = new String[otherargs.length - 2]; for (int i = 0; i < qualifiers.length; i++) { qualifiers[i] = otherargs[i + 2]; } conf.setStrings("qualifiers", qualifiers); Job job = new Job(conf, tableName); job.setJarByClass(IndexBuilder.class); job.setMapperClass(MyMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(TableInputFormat.class); // 可以输出多张表 job.setOutputFormatClass(MultiTableOutputFormat.class); Scan scan = new Scan(); scan.setCaching(1000); TableMapReduceUtil.initTableMapperJob(tableName, scan, MyMapper.class, ImmutableBytesWritable.class, Put.class, job); job.waitForCompletion(true); }}
本文出自 “点滴积累” 博客,请务必保留此出处http://tianxingzhe.blog.51cto.com/3390077/1699774
0 0
- Mapreduce构建hbase二级索引
- 使用solr构建hbase二级索引
- hbase二级索引
- Hbase- 二级索引
- hbase二级索引
- 关于hbase二级索引
- hbase 二级索引实现
- hbase二级索引
- HBase创建二级索引
- hbase二级索引
- HBase二级索引
- HBase 实现二级索引
- Hbase solr 二级索引
- HBase phoenix二级索引
- hbase二级索引
- hbase 二级索引
- HBase二级索引
- hbase二级索引
- HIVE JDBC连接详解
- HBase API 详细例子(封装的DAO类)
- Hbase协处理器coprocessor
- MapReduce on Hbase
- Hive 混合函数 UDTF UDF UDAF详解
- Mapreduce构建hbase二级索引
- 一致性Hash算法(分布式哈希)
- 多线程同步基础
- flume+kafka+hdfs详解
- java读取Excel文件
- runtime.exec()
- sqoop详解
- spark概述与编程模型
- storm-kafka(storm spout作为kafka的消费端)