在MapReduce中连接Hbase数据

来源：互联网发布：两钻淘宝店值多少钱编辑：程序博客网时间：2024/06/03 21:42

1.在Hbase中创建EMPLOYEE表：create'EMPLOYEE','cf1'，并创造一批销售订单数据，包含但限于产品id、销售员id、销售时间、销售额；

2.在Hbase中创建TotalSale表：create 'TotalSale','cf1'，并创造一批销售数据，包括但不限于用户id、销售总额；

3.编写mapper、reducer和driver源代码；

4.编写testDriver程序输出员工id、销售单数和销售总额。

testMapper

package com.hbasepackage;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.hbase.io.*;import org.apache.hadoop.hbase.client.Result;//import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableMapper;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.hbase.util.Bytes;import com.google.common.primitives.*;//import com.yammer.metrics.core.HealthCheck.Result;@SuppressWarnings("unused")public class testMapper extends TableMapper<Text, IntWritable> {public void map(ImmutableBytesWritable rowKey, Result columns, Contextcontext)throws IOException, InterruptedException {try {// get rowKey and convert it to stringString inKey = new String(rowKey.get());// set new key having only dateString oKey = inKey.split("#")[0];// get sales column in byte format first and then convert it to// string(as it is stored as string from hbase shell)byte[] bSales = columns.getValue(Bytes.toBytes("cf1"), Bytes.toBytes("sales"));String sSales = new String(bSales);Integer sales = new Integer(sSales);// emit date and sales valuescontext.write(new Text(oKey), new IntWritable(sales));} catch (RuntimeException e) {e.printStackTrace();}}}

testReducer

package com.hbasepackage;import java.io.IOException;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableReducer;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.hbase.util.Bytes;import com.google.common.primitives.*;@SuppressWarnings("unused")public class testReducer extends TableReducer<Text, IntWritable,ImmutableBytesWritable> {public void reduce(Text key, Iterable<IntWritable> values, Contextcontext)throws IOException, InterruptedException {try {int sum = 0;// loop through different sales vales and add it to sumfor (IntWritable sales : values) {Integer intSales = new Integer(sales.toString());sum += intSales;}String keyString = key.toString();System.out.println("" + keyString + "\t" + sum);// create hbase put with rowkey as datePut insHBase = new Put(key.getBytes());// insert sum value to hbaseinsHBase.add(Bytes.toBytes("cf1"), Bytes.toBytes("Total sales:"),Bytes.toBytes(sum));// write data to Hbase tablecontext.write(null, insHBase);} catch (Exception e) {e.printStackTrace();}}}

testDriver

package com.hbasepackage;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.client.Scan;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.hbase.util.Bytes;import com.google.common.primitives.*;@SuppressWarnings("unused")public class testDriver {public static void main(String[] args) throws Exception {Configuration conf = new Configuration();// define scan and define column families to scanScan scan = new Scan();scan.addFamily(Bytes.toBytes("cf1"));Job job = new Job(conf);job.setMapperClass(testMapper.class);job.setReducerClass(testReducer.class);job.setJarByClass(testDriver.class);// define input hbase tableTableMapReduceUtil.initTableMapperJob("EMPLOYEE",scan,testMapper.class,Text.class,IntWritable.class,job);// define output tableTableMapReduceUtil.initTableReducerJob("TotalSale",testReducer.class,job);job.waitForCompletion(true);}}

0 0