mapreduce读取sequencefile文件中的数据

来源：互联网发布：网络维护专员岗位职责编辑：程序博客网时间：2024/05/22 07:47

sequencefile中的数据是以key，value对存储的。

通过mapreduce模式，可以读取sequencefile中的数据。

public class MapReduceReadFile {private static SequenceFile.Reader reader = null;private static Configuration conf = new Configuration();public static class ReadFileMapper extendsMapper<LongWritable, Text, LongWritable, Text> {/* (non-Javadoc) * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context) */@Overridepublic void map(LongWritable key, Text value, Context context) {key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);try {while (reader.next(key, value)) {System.out.printf("%s\t%s\n", key, value);context.write(key, value);}} catch (IOException e1) {e1.printStackTrace();} catch (InterruptedException e) {e.printStackTrace();}}}/** * @param args * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {Job job = new Job(conf,"read seq file");job.setJarByClass(MapReduceReadFile.class);job.setMapperClass(ReadFileMapper.class);job.setMapOutputValueClass(Text.class);Path path = new Path("logfile2");FileSystem fs = FileSystem.get(conf);reader = new SequenceFile.Reader(fs, path, conf);FileInputFormat.addInputPath(job, path);FileOutputFormat.setOutputPath(job, new Path("result"));System.exit(job.waitForCompletion(true)?0:1);}}