MapReduce生成HFile入库到HBase
来源:互联网 发布:win10cpu优化 编辑:程序博客网 时间:2024/05/01 09:28
原文参考:
http://shitouer.cn/2013/02/hbase-hfile-bulk-load/
可能需要依赖一写jar包,在这里下载:http://download.csdn.net/detail/q79969786/6933683
主要做了如下修改:
package com.upa.hbase;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.KeyValue;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat;import org.apache.hadoop.hbase.mapreduce.KeyValueSortReducer;import org.apache.hadoop.hbase.mapreduce.SimpleTotalOrderPartitioner;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class HFileGenerator {public static class HFileMapper extendsMapper<LongWritable, Text, ImmutableBytesWritable, KeyValue> {@Overrideprotected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {String line = value.toString();String[] items = line.split(",", -1);ImmutableBytesWritable rowkey = new ImmutableBytesWritable(items[0].getBytes());KeyValue kv = new KeyValue(Bytes.toBytes(items[0]),Bytes.toBytes(items[1]), Bytes.toBytes(items[2]),System.currentTimeMillis(), Bytes.toBytes(items[3]));if (null != kv) {context.write(rowkey, kv);}}}public static void main(String[] args) throws IOException,InterruptedException, ClassNotFoundException {Configuration conf = new Configuration();String[] dfsArgs = new GenericOptionsParser(conf, args).getRemainingArgs();Job job = new Job(conf, "HFile bulk load test");job.setJarByClass(HFileGenerator.class);job.setMapperClass(HFileMapper.class);job.setReducerClass(KeyValueSortReducer.class);job.setMapOutputKeyClass(ImmutableBytesWritable.class);job.setMapOutputValueClass(KeyValue.class);job.setPartitionerClass(SimpleTotalOrderPartitioner.class);FileInputFormat.addInputPath(job, new Path(dfsArgs[0]));FileOutputFormat.setOutputPath(job, new Path(dfsArgs[1]));HFileOutputFormat.configureIncrementalLoad(job,ConnectionUtil.getTable());System.exit(job.waitForCompletion(true) ? 0 : 1);}}
增加了ConnectionUtil:
package com.upa.hbase;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.client.HTable;public class ConnectionUtil {public static Configuration getConfiguration() {Configuration config = HBaseConfiguration.create();return config;}public static HTable getTable() {try {return new HTable(getConfiguration(), "test_hfile");} catch (IOException e) {e.printStackTrace();return null;}}}
loader类没有修改:
package com.unionpay.upa.hbase;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;import org.apache.hadoop.util.GenericOptionsParser; public class HFileLoader { public static void main(String[] args) throws Exception { String[] dfsArgs = new GenericOptionsParser( ConnectionUtil.getConfiguration(), args).getRemainingArgs(); LoadIncrementalHFiles loader = new LoadIncrementalHFiles( ConnectionUtil.getConfiguration()); loader.doBulkLoad(new Path(dfsArgs[0]), ConnectionUtil.getTable()); } }
接下来需要创建一个hbase数据库:
hbase(main):025:0> create 'test_hfile', 't'
上传测试数据到HDFS:
$ hadoop fs -put test_hfile.txt test_hbase/test_hfile
格式大致是这样:
1,t,45,wer,fg,we5r,t,sdfsd,fsd,fwesr,t,wrwer,sdfsdf,sdf,sd,tfg,sd,werr,t,wer,wesdfdsf,sdfd,retrwer,t,werd546,fgh,g,df,ga,ds
接下来执行load:
$ hadoop jar etl.jar com.upa.hbase.HFileGenerator /user/hadoop/test_hbase/test_hfile /user/hadoop/test_hbase/hfile$ hadoop jar etl.jar com.upa.hbase.HFileLoader /user/hadoop/test_hbase/hfile
0 0
- MapReduce生成HFile入库到HBase
- MapReduce生成HFile入库到HBase
- MapReduce生成HFile入库到HBase
- MapReduce生成HFile入库到HBase
- MapReduce生成HFile入库到HBase
- MapReduce生成HFile入库到HBase
- MapReduce生成HFile入库到HBase及源码分析
- 生成HFile以及入库到HBase
- 生成HFile以及入库到HBase
- 生成HFile以及入库到HBase
- 生成HFile以及入库到HBase
- 生成HFile以及入库到HBase
- 生成HFile以及入库到HBase
- HBase-mapreduce生成hfile
- Hadoop生成HFile直接入库HBase心得
- spark生成HFile导入到hbase
- 非mapreduce生成Hfile,然后导入hbase当中
- 非mapreduce生成Hfile,然后导入hbase当中
- 妙手仁心—骨科专家郭永飞
- PCB快速打样规范
- 通过java书写日志文件,换行的实现
- phpize的安装
- 关于SimpleDateFormat安全的时间格式化线程安全问题
- MapReduce生成HFile入库到HBase
- Android 颜色渲染(七) RadialGradient 环形渲染实现水波纹效果
- HART1
- Linux环境下段错误的产生原因及调试方法小结
- 【设计模式原则】设计模式六大原则
- 【零碎JAVA】使用jar命令创建可执行的jar文件
- apache wampserver不能启动 显示服务器离线
- JAVA'Hello world"
- H.264协议