在MapReduce中连接Hbase数据

来源:互联网 发布:两钻淘宝店值多少钱 编辑:程序博客网 时间:2024/06/03 21:42

1.在Hbase中创建EMPLOYEE表:create'EMPLOYEE','cf1',并创造一批销售订单数据,包含但限于产品id、销售员id、销售时间、销售额;

2.Hbase中创建TotalSale表:create 'TotalSale','cf1',并创造一批销售数据,包括但不限于用户id、销售总额;

3.编写mapperreducerdriver源代码;

4.编写testDriver程序输出员工id、销售单数和销售总额。


testMapper

package com.hbasepackage;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.hbase.io.*;import org.apache.hadoop.hbase.client.Result;//import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableMapper;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.hbase.util.Bytes;import com.google.common.primitives.*;//import com.yammer.metrics.core.HealthCheck.Result;@SuppressWarnings("unused")public class testMapper extends TableMapper<Text, IntWritable> {public void map(ImmutableBytesWritable rowKey, Result columns, Contextcontext)throws IOException, InterruptedException {try {// get rowKey and convert it to stringString inKey = new String(rowKey.get());// set new key having only dateString oKey = inKey.split("#")[0];// get sales column in byte format first and then convert it to// string(as it is stored as string from hbase shell)byte[] bSales = columns.getValue(Bytes.toBytes("cf1"), Bytes.toBytes("sales"));String sSales = new String(bSales);Integer sales = new Integer(sSales);// emit date and sales valuescontext.write(new Text(oKey), new IntWritable(sales));} catch (RuntimeException e) {e.printStackTrace();}}}



testReducer

package com.hbasepackage;import java.io.IOException;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableReducer;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.hbase.util.Bytes;import com.google.common.primitives.*;@SuppressWarnings("unused")public class testReducer extends TableReducer<Text, IntWritable,ImmutableBytesWritable> {public void reduce(Text key, Iterable<IntWritable> values, Contextcontext)throws IOException, InterruptedException {try {int sum = 0;// loop through different sales vales and add it to sumfor (IntWritable sales : values) {Integer intSales = new Integer(sales.toString());sum += intSales;}String keyString = key.toString();System.out.println("" + keyString + "\t" + sum);// create hbase put with rowkey as datePut insHBase = new Put(key.getBytes());// insert sum value to hbaseinsHBase.add(Bytes.toBytes("cf1"), Bytes.toBytes("Total sales:"),Bytes.toBytes(sum));// write data to Hbase tablecontext.write(null, insHBase);} catch (Exception e) {e.printStackTrace();}}}



testDriver

package com.hbasepackage;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.client.Scan;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.hbase.util.Bytes;import com.google.common.primitives.*;@SuppressWarnings("unused")public class testDriver {public static void main(String[] args) throws Exception {Configuration conf = new Configuration();// define scan and define column families to scanScan scan = new Scan();scan.addFamily(Bytes.toBytes("cf1"));Job job = new Job(conf);job.setMapperClass(testMapper.class);job.setReducerClass(testReducer.class);job.setJarByClass(testDriver.class);// define input hbase tableTableMapReduceUtil.initTableMapperJob("EMPLOYEE",scan,testMapper.class,Text.class,IntWritable.class,job);// define output tableTableMapReduceUtil.initTableReducerJob("TotalSale",testReducer.class,job);job.waitForCompletion(true);}}


0 0
原创粉丝点击