将hdfs上的数据通过MapReduce上传到hbase上
来源:互联网 发布:淘宝创建店铺流程 编辑:程序博客网 时间:2024/06/05 15:19
package hbase; import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; import org.apache.hadoop.hbase.mapreduce.TableReducer; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; /** * Created by Administrator on 2017/3/7. */ public class LoadData extends Configured { public static class LoadDataMapper extends Mapper<LongWritable,Text,LongWritable,Text>{ private Text out = new Text(); SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyyMMddHHmmss"); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 24 27 2481 24681 200 String line = value.toString(); String [] splited = line.split("\t"); String formatedDate = simpleDateFormat.format(new Date(Long.parseLong(splited[0].trim()))); String rowKeyString = splited[1]+":"+formatedDate; out.set(rowKeyString+"\t"+line); //13726230503:201706291728 1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 24 27 2481 24681 200 context.write(key,out); } } public static class LoadDataReducer extends TableReducer<LongWritable,Text,NullWritable>{ public static final String COLUMN_FAMILY = "cf"; @Override protected void reduce(LongWritable key, Iterable<Text> values, Reducer<LongWritable, Text, NullWritable, Mutation>.Context context) throws IOException, InterruptedException { for (Text tx : values) { String[] splited = tx.toString().split("\t"); String rowkey = splited[0]; Put put = new Put(rowkey.getBytes()); // put.add(COLUMN_FAMILY.getBytes(), "raw".getBytes(), tx // .toString().getBytes()); put.add(COLUMN_FAMILY.getBytes(), "reportTime".getBytes(), splited[1].getBytes()); put.add(COLUMN_FAMILY.getBytes(), "msisdn".getBytes(), splited[2].getBytes()); put.add(COLUMN_FAMILY.getBytes(), "apmac".getBytes(), splited[3].getBytes()); put.add(COLUMN_FAMILY.getBytes(), "acmac".getBytes(), splited[4].getBytes()); put.add(COLUMN_FAMILY.getBytes(), "host".getBytes(), splited[5].getBytes()); put.add(COLUMN_FAMILY.getBytes(), "siteType".getBytes(), splited[6].getBytes()); put.add(COLUMN_FAMILY.getBytes(), "upPackNum".getBytes(), splited[7].getBytes()); put.add(COLUMN_FAMILY.getBytes(), "downPackNum".getBytes(), splited[8].getBytes()); put.add(COLUMN_FAMILY.getBytes(), "upPayLoad".getBytes(), splited[9].getBytes()); put.add(COLUMN_FAMILY.getBytes(), "downPayLoad".getBytes(), splited[10].getBytes()); put.add(COLUMN_FAMILY.getBytes(), "httpStatus".getBytes(), splited[11].getBytes()); context.write(NullWritable.get(), put); } } } public static void createHBaseTable(String tableName) throws IOException { HTableDescriptor htd = new HTableDescriptor( TableName.valueOf(tableName)); HColumnDescriptor col = new HColumnDescriptor("cf"); htd.addFamily(col); Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "hive01"); HBaseAdmin admin = new HBaseAdmin(conf); if (admin.tableExists(tableName)) { System.out.println("table exists, trying to recreate table......"); admin.disableTable(tableName); admin.deleteTable(tableName); } System.out.println("create new table:" + tableName); admin.createTable(htd); } public static void main(String[] args) throws Exception { args = new String[] { "hdfs://hive01:8020/input/hbase" }; Configuration conf = HBaseConfiguration.create(); // conf.set("hbaser.rootdir","hdfs://bigdata:8020/hbase"); conf.set("hbase.zookeeper.quorum", "hive01"); conf.set(TableOutputFormat.OUTPUT_TABLE, "phone_log"); createHBaseTable("phone_log"); Job job = Job.getInstance(conf, "LoadData"); job.setJarByClass(LoadData.class); job.setNumReduceTasks(1); // 3.2 map class job.setMapperClass(LoadDataMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); // 3.3 reduce class job.setReducerClass(LoadDataReducer.class); // job.setOutputKeyClass(NullWritable.class); --不需要设置 // job.setOutputValueClass(Mutation.class); --不需要设置 Path inPath = new Path(args[0]); FileInputFormat.addInputPath(job, inPath); job.setOutputFormatClass(TableOutputFormat.class); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
运行结果:在hbase中查看
阅读全文
0 0
- 将hdfs上的数据通过MapReduce上传到hbase上
- HDFS 上的数据导入到Hbase
- 【甘道夫】通过bulk load将HDFS上的数据导入HBase
- 将hdfs上的数据传入hbase表中
- 将HDFS中的数据通过MapReduce产生HFile,然后将HFile导入到HBase具体案例分析
- 将Solr的数据存到Hdfs上
- 自定义MapReduce导入HDFS数据到HBase
- 使用flume将avro文件上传到hdfs上
- Spark上通过BulkLoad快速将海量数据导入到Hbase
- JAVA实现:将文件从本地上传到HDFS上、从HDFS上读取等操作
- 将windows上的文件通过ftp自动上传到linux服务器上
- 利用Wget 直接将数据Put到HDFS 上
- 将数据从HDFS上导入到Greenplum
- Spark将HDFS数据导入到HBase
- Spark将HDFS数据导入到HBase
- Spark将HDFS数据导入到HBase
- Hbase通过 Mapreduce 写入数据到Mysql
- HDFS上数据保存到Hbase运行报错:NoClassDefFoundError: org/apache/hadoop/hbase/HBaseConfiguration
- SYBASE 常用 sp_configure
- dfs走迷宫
- Android通讯录、城市选择器列表的实现
- HDU 6199 DP
- Centos7 ffmpeg
- 将hdfs上的数据通过MapReduce上传到hbase上
- Android开发FrameLayout动态添加控件位置问题
- monkey.patch_all()处理多线程问题
- Java5
- 方阵
- 数据链路层的三个基本问题
- bzoj 1674: [Usaco2005]Part Acquisition(最短路)
- 深度学习中的激活函数与梯度消失
- 洛谷 P2391 白雪皑皑(并查集)