使用MapReduse 处理 SequenceFile

来源:互联网 发布:天下3 知乎 编辑:程序博客网 时间:2024/06/06 04:12

为了解决大量小图片在HDFS存储是存储在的问题,将小图片存储到SequenceFile中,然后通过MapReduce函数对SequenceFile文件进行操作。

用过设置,job的输入文件格式得到SequenceFile中的数据,代码如下:

package com.wang;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.SequenceFile;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.SequenceFileOutputFormat;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Mapper.Context;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.ReflectionUtils;public class Parral_Pyramid {static int i=0;public static void main(String[] args) throws Exception {// TODO Auto-generated method stubConfiguration conf=new Configuration();Job job = Job.getInstance(conf);//ע设置main的主类job.setJarByClass(Parral_Pyramid.class);job.setInputFormatClass(SequenceFileInputFormat.class);//设置Mapper参数job.setMapperClass(Image_Mapper.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Text.class);job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.class);job.setNumReduceTasks(0);FileInputFormat.setInputPaths(job, new Path("hdfs://master:9000/wang/result1.seq/part-r-00000"));FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/wang/result2.seq"));job.waitForCompletion(true);}static class Image_Mapper extends Mapper<Text, Text, Text, Text>{//private  SequenceFile.Reader reader = null;@Overrideprotected void map(Text key, Text value, Context context)throws IOException, InterruptedException {//得到文件内容i++;System.out.println("now_key:"+key.toString()+"value="+value); //这里map读进来的数据即SequenceFile中的key和value//context.write(new Text(next_key),value);}}}