HFileInputFormat实现

来源：互联网发布：数据库分布式编辑：程序博客网时间：2024/06/05 00:41
hbase的底层存储采用的是hfile文件格式，可以作为mr的输入，进行hfile的mr。代码如下：
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.KeyValue;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.io.hfile.CacheConfig;import org.apache.hadoop.hbase.io.hfile.HFile;import org.apache.hadoop.hbase.io.hfile.HFileScanner;import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.JobContext;import org.apache.hadoop.mapreduce.RecordReader;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileSplit;/** * This is direct port (hopefully) of the Scala version of this class available * on https://gist.github.com/1120311 *  * @author yuankang */public class HFileInputFormat extends FileInputFormat<ImmutableBytesWritable, KeyValue> {private class HFileRecordReader extendsRecordReader<ImmutableBytesWritable, KeyValue> {private HFile.Reader reader;private final HFileScanner scanner;private int entryNumber = 0;public HFileRecordReader(FileSplit split, Configuration conf)throws IOException {SchemaMetrics.configureGlobally(conf);final Path path = split.getPath();reader = HFile.createReader(FileSystem.get(conf), path, new CacheConfig(conf));scanner = reader.getScanner(false, false);reader.loadFileInfo(); // This is required or else seekTo throws a// NPEscanner.seekTo(); // This is required or else scanner.next throws an// error}@Overridepublic void close() throws IOException {if (reader != null) {reader.close();}}/* * @Override public boolean next(ImmutableBytesWritable key, KeyValue * value) throws IOException { entryNumber++; return scanner.next(); } */@Overridepublic ImmutableBytesWritable getCurrentKey() throws IOException,InterruptedException {// TODO Auto-generated method stubreturn new ImmutableBytesWritable(scanner.getKeyValue().getRow());}@Overridepublic KeyValue getCurrentValue() throws IOException,InterruptedException {// TODO Auto-generated method stubreturn scanner.getKeyValue();}@Overridepublic boolean nextKeyValue() throws IOException, InterruptedException {// TODO Auto-generated method stubentryNumber++;return scanner.next();}@Overridepublic float getProgress() throws IOException, InterruptedException {// TODO Auto-generated method stubif (reader != null) {        return (entryNumber / reader.getEntries());      }      return 1;}@Overridepublic void initialize(InputSplit arg0, TaskAttemptContext arg1)throws IOException, InterruptedException {}}@Overrideprotected boolean isSplitable(JobContext context, Path filename) {    return false;}@Overridepublic RecordReader<ImmutableBytesWritable, KeyValue> createRecordReader(InputSplit split,TaskAttemptContext context) throws IOException,InterruptedException {return new HFileRecordReader((FileSplit) split,context.getConfiguration());}}