编写mapreduce实现HBase中数据的迁移

来源:互联网 发布:linux 查看网络通不通 编辑:程序博客网 时间:2024/06/05 17:40

需求:将HBase中user表的列族为info,列名为name和age的数据迁移到另一个表basic中

代码如下

package com.bpf.hbase;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.hbase.Cell;import org.apache.hadoop.hbase.CellUtil;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.client.Result;import org.apache.hadoop.hbase.client.Scan;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;import org.apache.hadoop.hbase.mapreduce.TableMapper;import org.apache.hadoop.hbase.mapreduce.TableReducer;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class User2Basic extends Configured implements Tool {        //Mapper Class      public static class ReadUserMapper extends TableMapper<Text, Put> {        private Text mapOututKey = new Text();        @Override        public void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {            //get rowkey            String rowkey = Bytes.toString(key.get());            mapOututKey.set(rowkey);                        Put put = new Put(key.get());            //iterator            for(Cell cell : value.rawCells()) {                // family:info                if("info".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){                    //column:name                    if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {                        put.add(cell);                    }                    //column:name                    if("age".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {                        put.add(cell);                    }                                    }            }                        context.write(mapOututKey, put);                    }    }        //Reducer Class    public static class WriteBasicReducer extends TableReducer<Text, Put, ImmutableBytesWritable> {        @Override        protected void reduce(Text key, Iterable<Put> values, Context context) throws IOException, InterruptedException {                        for (Put put : values) {                context.write(null, put);            }        }    }    //driver    public int run(String[] arg0) throws Exception {        //create job        Job job = Job.getInstance(this.getConf(),this.getClass().getSimpleName());                //set run job class        job.setJarByClass(this.getClass());                //set job        Scan scan = new Scan();        scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobs        scan.setCacheBlocks(false);  // don't set to true for MR jobs        // set other scan attrs                //set input and mapper        TableMapReduceUtil.initTableMapperJob(          "user",        // input table          scan,               // Scan instance to control CF and attribute selection          ReadUserMapper.class,     // mapper class          Text.class,         // mapper output key          Put.class,  // mapper output value          job);                //set output and reducer        TableMapReduceUtil.initTableReducerJob(          "basic",        // output table          WriteBasicReducer.class,    // reducer class          job);                job.setNumReduceTasks(1);   // at least one, adjust as required                boolean isSuccess = job.waitForCompletion(true);                        return isSuccess ? 0 : 1;    }    public static void main(String[] args) throws Exception {        //get configuration        Configuration conf = HBaseConfiguration.create();                //submit job        int status = ToolRunner.run(conf, new User2Basic(), args);        //exit program        System.exit(status);    }}
打包后(不需要将jar包打进去)上传到HADOOP环境中运行