Hbase基于Mapreduce的编程
来源:互联网 发布:c与java的区别 编辑:程序博客网 时间:2024/05/17 07:21
小试牛刀,将mapreduce的输出结果保存到大型分布式数据库中HBase中,一个例子,求各url的访问pv数据,由于用到rcfile格式需要导入hive-exce包,还需要加载hbase包,如果这两个包都已经被集群管理员放到各节点的hadoop/lib下那就可以省去这一步,废话不说,干货,看代码:
package test.hbase;import java.io.IOException;import java.util.HashSet;import java.util.Set;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.HColumnDescriptor;import org.apache.hadoop.hbase.HTableDescriptor;import org.apache.hadoop.hbase.client.HBaseAdmin;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;import org.apache.hadoop.hbase.mapreduce.TableReducer;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import com.test.dm.common.RCFileInputFormat;public class URLCountHbase {public static class HBaseMap extendsMapper<LongWritable, BytesRefArrayWritable, Text, IntWritable> {private IntWritable i = new IntWritable(1);@Overrideprotected void map(LongWritable key, BytesRefArrayWritable value,Context context) throws IOException, InterruptedException {byte[] url = value.get(4).getBytesCopy();context.write(new Text(url), i);}}public static class HBaseReduce extendsTableReducer<Text, IntWritable, NullWritable> {@Overrideprotected void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {int sum = 0;for (IntWritable i : values) {sum += i.get();}Put put = new Put(Bytes.toBytes(key.toString()));put.add(Bytes.toBytes("type"), Bytes.toBytes("count"),Bytes.toBytes(String.valueOf(sum)));context.write(NullWritable.get(), put);}}public static void createHbaseTable(String tablename) throws IOException {HTableDescriptor htd = new HTableDescriptor(tablename);HColumnDescriptor col = new HColumnDescriptor("type");htd.addFamily(col);HBaseConfiguration config = new HBaseConfiguration();HBaseAdmin admin = new HBaseAdmin(config);if (admin.tableExists(tablename)) {System.out.println("table exists, trying recreate table");admin.disableTable(tablename);admin.deleteTable(tablename);}System.out.println("create new table:" + tablename);admin.createTable(htd);}public static void main(String args[]) throws Exception {String tablename = "urlcount";Configuration conf = new Configuration();final FileSystem fs = FileSystem.getLocal(conf);final HashSet<String> localfiles = new HashSet<String>();localfiles.add("/opt/hadoop/hive-0.8.1/lib/hive-exec-0.8.1.jar");localfiles.add("/opt/hadoop/hbase/hbase-0.92.1.jar");final HashSet<String> files = new HashSet<String>();for (String s : localfiles) {files.add(URLCountHbase.convertPath(s, fs));}URLCountHbase.cacheJars(conf, files);conf.set(TableOutputFormat.OUTPUT_TABLE, tablename);createHbaseTable(tablename);Job job = new Job(conf, "WordCount table with " + args[0]);job.setJarByClass(URLCountHbase.class);job.setNumReduceTasks(3);job.setReducerClass(HBaseReduce.class);job.setMapperClass(HBaseMap.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);job.setOutputFormatClass(TableOutputFormat.class);job.setInputFormatClass(RCFileInputFormat.class);FileInputFormat.setInputPaths(job, new Path(args[0]));System.exit(job.waitForCompletion(true) ? 0 : 1);} private static String convertPath(String path, FileSystem fs) { final Path p = new Path(path); return p.makeQualified(fs).toString(); } private static void cacheJars(Configuration job, Set<String> localUrls) throws IOException { if (localUrls.isEmpty()) { return; } final String tmpjars = job.get("tmpjars"); final StringBuilder sb = new StringBuilder(); if (null != tmpjars) { sb.append(tmpjars); sb.append(","); } sb.append(org.apache.hadoop.util.StringUtils.arrayToString(localUrls.toArray(new String[0]))); job.set("tmpjars", sb.toString()); }}
- Hbase基于Mapreduce的编程
- 基于MapReduce的HBase开发
- 基于MapReduce的HBase开发
- 基于MapReduce的HBase开发
- 基于MapReduce的HBase开发(续)
- 基于MapReduce的HBase开发(续)
- (转)基于MapReduce的HBase开发
- Hbase Mapreduce编程
- 基于hadoop0.20.2的mapreduce结果存入Hbase
- (转)基于MapReduce的HBase开发(续)
- Hbase编程入门之MapReduce
- Hbase编程入门之MapReduce
- Hbase MapReduce的Maven依赖
- 基于MapReduce编程模型的数据挖掘算法
- 基于HBase Hadoop 分布式集群环境下的MapReduce程序开发
- 在window系统中的MyEclipse开发基于HBase的MapReduce错误集锦
- 简明的hadoop 2.5 HA 基于centos6.5 安装部署文档(hdfs,mapreduce,hbase)
- hadoop学习笔记之mapreduce 基于hbase日志数据的最频繁访问ip统计
- 3DMAX导出插件开发过程中需要注意的一些地方
- Git 常用命令详解(二)
- swing开发中问题总结2
- 算法导论 动态规划 矩阵链乘法
- 14寸分辨率1366*768的笔记本玩QQ飞车设置宽屏方法
- Hbase基于Mapreduce的编程
- HDU 1054 Strategic Game
- QT中加载未编译的自带库
- ASP.NET安全设置防御ASPXSpy
- Tomcat的bin目录下的startup.bat和Tomcat7.exe的区别
- 网站安全认证系统的设计变迁-5中网占安全认证方案的chm下载
- 3D MAX插件大全介绍
- COPY命令:为PostgreSQL批量导入数据
- 夜半笙歌