mapreduce读取hbase中表的数据,直接打印或者回传数据到hbase数据库表

来源:互联网 发布:vb.net加载dll 编辑:程序博客网 时间:2024/05/01 06:29
package com.syyz.zjs;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.client.Result;import org.apache.hadoop.hbase.client.Scan;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;import org.apache.hadoop.hbase.mapreduce.TableMapper;import org.apache.hadoop.hbase.mapreduce.TableReducer;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;public class ExampleTotalMapReduce{public static void main(String[] args) {try{Configuration config = HBaseConfiguration.create();config.set("hbase.zookeeper.quorum", "node7,node8,node9");Job job = new Job(config,"ExampleSummary");job.setJarByClass(ExampleTotalMapReduce.class);     // class that contains mapper and reducerScan scan = new Scan();scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobsscan.setCacheBlocks(false);  // don't set to true for MR jobs// set other scan attrs//scan.addColumn(family, qualifier);TableMapReduceUtil.initTableMapperJob("t_cdr3",        // input tablescan,               // Scan instance to control CF and attribute selectionMyMapper.class,     // mapper classText.class,         // mapper output keyIntWritable.class,  // mapper output valuejob);TableMapReduceUtil.initTableReducerJob("t_cdr",        // output tableMyTableReducer.class,    // reducer classjob);job.setNumReduceTasks(1);   // at least one, adjust as requiredboolean b = job.waitForCompletion(true);if (!b) {throw new IOException("error with job!");} } catch(Exception e){e.printStackTrace();}}public static class MyMapper extends TableMapper<Text, IntWritable>  {private final IntWritable ONE = new IntWritable(1);private Text text = new Text();public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException {//取了表中一个datetime的字段String datetime = new String(value.getValue(Bytes.toBytes("cf1"), Bytes.toBytes("datetime")));text.set(datetime);context.write(text, ONE);}}//把取到的值直接打印了。。public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable>  {public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {int sum = 0;System.out.println(key+"*************");//for (IntWritable val : values) {////sum += val.get();////}//把结果输出到hbase中了//Put put = new Put(key.getBytes());//put.add(Bytes.toBytes("info"), Bytes.toBytes("count"), Bytes.toBytes(String.valueOf(sum)));////context.write(null, put);}}}
注意:本地windows7调试该工程时,需要修改源码里面的NativeIO.java
0 0
原创粉丝点击