mr任务之从多个Hbase表中读取数据进行处理

来源:互联网 发布:java 数据类型 double 编辑:程序博客网 时间:2024/06/05 19:57

从两个不同的Hbase表中读取数据:

在reduce中根据数据结构 判断数据是来源于那个Hbase表即可

// 设置查询条件List<Scan> scans = new ArrayList<Scan>();Scan scan1 = new Scan();scan1.setCaching(100);scan1.setCacheBlocks(false);scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, inTable.getBytes());scans.add(scan1);Scan scan2 = new Scan();scan2.setCaching(100);scan2.setCacheBlocks(false);scan2.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, inPhoneImsiTable.getBytes());scans.add(scan2);try {Job job=new Job(conf);job.setJobName("HistoryDataPhonetoImsiTasker");job.setInputFormatClass(MultiTableInputFormat.class);TableMapReduceUtil.initTableMapperJob(scans, ReadHbaseMapper.class, Text.class,Result.class, job);TableMapReduceUtil.initTableReducerJob(outTable, WritetoHbaseReducer.class, job);TableMapReduceUtil.addDependencyJars(job.getConfiguration(), SortKey.class);job.setJarByClass(HistoryDataPhonetoImsiCategoryTasker.class);job.setMapOutputKeyClass(SortKey.class);job.setMapOutputValueClass(CustomOutWritable.class);job.setSortComparatorClass(SecondSortCompartor.class);job.setGroupingComparatorClass(SecondSortGroupCompartor.class);job.setPartitionerClass(SecondSortPartitioner.class);job.waitForCompletion(true);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}

0 0
原创粉丝点击