MapReduce从HBase多路径导出数据到Hive
来源:互联网 发布:淘宝联盟怎么身份认证 编辑:程序博客网 时间:2024/04/27 21:20
package com.zz.hbase.ccrc;import java.io.IOException;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.Map.Entry;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.client.Result;import org.apache.hadoop.hbase.client.Scan;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;import org.apache.hadoop.hbase.mapreduce.TableMapper;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import com.zz.hive.Constants;import com.zz.util.DateUtil;public class HBaseToHiveJob { public static class HBaseToHiveMapper extends TableMapper<Text, Text> { @Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { StringBuilder sb = new StringBuilder(""); Map<String, String> m = new HashMap<String, String>(); List<String> temp = new ArrayList<String>(); List<String> a = new ArrayList<String>(); String sp = "-"; String tableName = ""; String str = ""; String keyStr = ""; for (Entry<byte[], byte[]> entry : value.getFamilyMap("aaaa".getBytes()).entrySet()) { str = new String(entry.getValue()); if (str != null) { keyStr = new String(entry.getKey()); if ("event".equals(keyStr)) { tableName = str; } else { m.put(keyStr, str); } } } sb = sort(tableName, m, sb, temp, sp, a); context.write(new Text(tableName), new Text(new String(sb))); } /** * 按照event值类型重新组合数据存储到hdfs上 * @param tableName * @param map * @param sb * @param t * @param sp * @param a * @return */ private StringBuilder sort(String tableName, Map<String, String> map, StringBuilder sb, List<String> t, String sp, List<String> a) { if("Summary".equals(tableName)) { a = com.zz.option.Constants.SUMMARY_COLUMN;//其中一个表定义的好的List,下面也是一样的 return handler(a, t, sb, sp, map); } return sb; } /** * 每条数据的各个值之间加上分隔符,值为null的用“-”占位 * @param a * @param t * @param sb * @param sp * @param m * @return */ private StringBuilder handler(List<String> a, List<String> t, StringBuilder sb, String sp, Map<String, String> m) { String mapValue; for (String val : a) { mapValue = m.get(val); sb.append(mapValue != null ? mapValue : sp); sb.append(Constants.BLANK_CHAR); } return sb.deleteCharAt(sb.length() - 1); } } public static class HBaseToHiveReducer extends Reducer<Text, Text, NullWritable, Text> { private MultipleOutputs<NullWritable, Text> out; @Override protected void setup(Context context) { out = new MultipleOutputs<NullWritable, Text>(context); } @Override protected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException { String tableName = key.toString(); String path = tableName + "/" + tableName; for (Text val : values) { out.write(NullWritable.get(), new Text(val), path); } } @Override protected void cleanup(Context context) throws IOException, InterruptedException { out.close(); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = HBaseConfiguration.create(); Job job = Job.getInstance(conf, "Fetch HBase2Hive" + DateUtil.getNowTime()); job.setJarByClass(HBaseToHiveJob.class); Scan scan = new Scan(); scan.setCaching(500); scan.setCacheBlocks(false); scan.addFamily(Bytes.toBytes("aaaa")); Path output = new Path(args[0]); final FileSystem fileSystem = output.getFileSystem(conf); fileSystem.delete(output, true); TableMapReduceUtil.initTableMapperJob("Fetch", scan, HBaseToHiveMapper.class, Text.class, Text.class, job, false); job.setReducerClass(HBaseToHiveReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, output); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }}
1 0
- MapReduce从HBase多路径导出数据到Hive
- Mapreduce 导出HBase table 数据到HDFS路径
- 从hbase(hive)将数据导出到mysql
- hive导出hbase数据
- 从 MapReduce 到 Hive
- 通过MapReduce把Hive表数据导入到HBase
- sqoop从hbase导出数据到mysql
- sql server 导出数据到 Azure Hbase / Hive 详细步骤
- 1007-使用MapReduce把数据从HDFS导入到HBase
- 从关系库导入数据到hive-hbase表中
- 从关系库导入数据到hive-hbase表中
- 使用Sqoop从MySQL导入数据到Hive和HBase
- hive命令将hbase数据导出到hive然后到本地
- 从hdfs批量导出数据到hbase表中
- [bigdata-028]apache nifi 从mysql导出数据到hbase
- sqoop从关系库导出数据到hive
- sqoop从关系库导出数据到hive
- 使用 sqoop从MySQL增量导出数据到hive
- OkHttp全解析
- ASCII Unicode UTF-8互转
- [bzoj2342][SHOI2011]双倍回文
- NSMutableParagraphStyle 实现文本缩进
- 各种滤波算法的比较
- MapReduce从HBase多路径导出数据到Hive
- 【Android】在任何View上添加红点★★★★★★★
- Android Fragment 真正的完全解析(上)
- execel导出和下载
- HDU 3523 最小费用流或KM
- BOS项目练习(流程定义/实例管理,bos用户角色同步activiti用户表组表)
- dumpbin丢失mspdb 问题
- 第十六周--阅读程序
- 给TextView设置图片的两种实现方法