HBase 总结之HBase作为输出源

来源:互联网 发布:k60单片机简介 编辑:程序博客网 时间:2024/05/20 13:37

简介:HBase 作为输出源,即从其他存储介质中,使用MapReduce计算后将结果输出到HBase中。


下面直接上代码


  • 主程序
package apache.org.myhbase.asoutput;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.util.GenericOptionsParser;/** * 是 HBaseASOutPutMR 的启动方法,配置作业 * @author 13277 * */public class HBaseMR {    public static void main(String[] args) throws Exception {        Configuration conf = HBaseConfiguration.create();        conf.set(TableOutputFormat.OUTPUT_TABLE, "access-log");        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();        Job job = Job.getInstance(conf, "Hbase_Mr");        job.setNumReduceTasks(0);        job.setJarByClass(HBaseMRTest.class);        job.setMapperClass(HBaseASOutPutMR.class);        job.setOutputKeyClass(ImmutableBytesWritable.class);        job.setOutputValueClass(Put.class);        job.setOutputFormatClass(TableOutputFormat.class);        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));        System.exit(job.waitForCompletion(true) ? 0 : 1);    }}

  • 主程序中设置的Mapper
package apache.org.myhbase.asoutput;import java.io.IOException;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;/** * 从HDFS读取文件内容后,解析成要要插进HBase的记录 * @author 13277 * */public class HBaseASOutPutMR extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {        String[] strs = value.toString().split(" ");        String rowkey = strs[0] + "-" + strs[3].substring(1);        byte[] row = Bytes.toBytes(rowkey);        byte[] family = Bytes.toBytes("info");        byte[] qualifier = Bytes.toBytes("url");        byte[] values = Bytes.toBytes(strs[6]);        Put put = new Put(row);        put.add(family, qualifier, values);        context.write(new ImmutableBytesWritable(row), put);    }}

PS:
在Hadoop任务配置中,使用FileInputFormat从HDFS读取数据,输出需要配置4项内容:输出格式为TableOutputFormat,输出表名,输出数据的Key和Value的类型。

1 0
原创粉丝点击