Hadoop Mapper 阶段将数据直接从 HDFS 导入 Hbase

来源:互联网 发布:讨鬼传极优化 编辑:程序博客网 时间:2024/05/22 10:38

数据源格式如下:

20130512   1   -1 -1 13802   1   2013-05-1207:26:2220130512   1   -1 -1 13802   1   2013-05-1211:18:24


?
我们期待的结果是数据直接从 hdfs 读取后 写入 hbase,没有 reduce 阶段,

代码如下:

package WebsiteAnalysis; import java.io.IOException; import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.KeyValue;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.Writable;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.util.GenericOptionsParser; public class Map2Hdfs {    public static final String NAME = "ImportFromFile";     public enum Counters {        LINES    }     static class ImportMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Writable> {        private byte[] family = null;        private byte[] qualifier = null;         @Override        protected void setup(Context context) throws IOException, InterruptedException {            String column = context.getConfiguration().get("conf.column");            byte[][] colkey = KeyValue.parseColumn(Bytes.toBytes(column));            family = colkey[0];            if (colkey.length > 1) {                qualifier = colkey[1];            }        }         @Override        public void map(LongWritable offset, Text line, Context context) throws IOException {            try {                String[] lineArr = line.toString().split("\t");                Put put = new Put(Bytes.toBytes(offset + ""));                put.add(family, Bytes.toBytes("time"), Bytes.toBytes(lineArr[lineArr.length - 1]));                context.write(new ImmutableBytesWritable(Bytes.toBytes(offset + "")), put);                context.getCounter(Counters.LINES).increment(1);            } catch (Exception e) {                e.printStackTrace();            }        }    }     public static void main(String[] args) throws Exception {        Configuration conf = HBaseConfiguration.create();        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();        conf.set("conf.column", "cf");        String inputPath = "/dsap/middata/lj/ooxx/pv";        Job job = new Job(conf, "TestMap2Hdfs");         job.setJarByClass(Map2Hdfs.class);        job.setMapperClass(ImportMapper.class);        job.setOutputFormatClass(TableOutputFormat.class);        job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, "TestMap2Hdfs");        job.setOutputKeyClass(ImmutableBytesWritable.class);        job.setOutputValueClass(Writable.class);        job.setNumReduceTasks(0);        FileInputFormat.addInputPath(job, new Path(inputPath + "/" + otherArgs[0]));        System.exit(job.waitForCompletion(true) ? 0 : 1);    }}

REF:


http://stackoverflow.com/questions/11061854/hadoop-writing-to-hbase-directly-from-the-mapper

http://blog.sina.com.cn/s/blog_62a9902f0101904h.html  新建表的方式写入

hbase-hdfs MapReduce 数据读写总结

http://blog.pureisle.net/archives/1938.html  hbase hdfs MR 读写的几种情况总结

http://blog.csdn.net/kirayuan/article/details/7001278  hbase表拷贝样例代码


0 0