HBase-mapreduce生成hfile

来源：互联网发布：xshell连接linux 编辑：程序博客网时间：2024/05/22 05:26
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.KeyValue;import org.apache.hadoop.hbase.client.HTable;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class BulkLoadDriver extends Configured implements Tool {    public static void main(String[] args) {        try {            int response = ToolRunner.run(HBaseConfiguration.create(), new BulkLoadDriver(), args);            if(response == 0) {                System.out.println("Job is successfully completed...");            } else {                System.out.println("Job failed...");            }        } catch(Exception exception) {            exception.printStackTrace();        }    }    class BulkLoadMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue> {        public void map(LongWritable key, Text value, Context context){            try {                String[] values = value.toString().split("\t");            byte[] row = Bytes.toBytes(values[0]);            byte[] hbase_val = Bytes.toBytes(values[1]);                ImmutableBytesWritable rowKey = new ImmutableBytesWritable(row);                KeyValue kv = new KeyValue(row, "C1".getBytes(), "c".getBytes(), hbase_val);                context.write(rowKey, kv);            } catch(Exception exception) {                exception.printStackTrace();            }        }    }    public int run(String[] args) throws Exception {        String inputPath = args[0];        String outputPath = args[1];        String tablename = args[3];        /**         * 设置作业参数         */        Configuration configuration = HBaseConfiguration.create();        configuration.set("hbase.zookeeper.quorum", "");        configuration.set("hbase.zookeeper.property.clientPort", "2181");        Job job = Job.getInstance(configuration, "HFile_for_HBase_Table:" + tablename);        job.setJarByClass(BulkLoadDriver.class);        job.setInputFormatClass(TextInputFormat.class);        job.setMapOutputKeyClass(ImmutableBytesWritable.class);//指定输出键类        job.setMapOutputValueClass(KeyValue.class);//指定输出值类        job.setMapperClass(BulkLoadMapper.class);//指定Map函数        FileInputFormat.addInputPaths(job, inputPath);//输入路径        FileSystem fs = FileSystem.get(configuration);        Path output = new Path(outputPath);        if (fs.exists(output)) {            fs.delete(output, true);//如果输出路径存在，就将其删除        }        FileOutputFormat.setOutputPath(job, output);//输出路径        HTable table=new HTable(configuration,tablename);        HFileOutputFormat2.configureIncrementalLoad(job, table);        boolean res=job.waitForCompletion(true);        return res?0:1;    }}
阅读全文
0 0