我的hadoop初学程序------简单数据排序-------Sort

来源:互联网 发布:网络平台建设费用 编辑:程序博客网 时间:2024/05/18 02:45
package bin;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class Sort {//对输入文件中数据进行排序。输入文件中的每行内容均为一个数字,即一个数据。//要求在输出中每行有两个间隔的数字,其中,第一个代表原始数据在原始数据集中的位次,第二个代表原始数据。//private static int a;public static class SortMap extends Mapper<Object, Text, IntWritable, IntWritable> {//这个参数第二个为甚么要用Text而不是IntWritable呢??private static IntWritable data = new IntWritable();public void map(Object key,Text value,Context context) {String line = value.toString();data.set(Integer.parseInt(line));//sting类型转换为int类型,要求string文本中前缀不能有空格,而且也因此要求txt文件中不能有空行try {context.write(data, new IntWritable(1));} catch (IOException | InterruptedException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}public static class SortReduce extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable>{private static IntWritable lineNumber=new IntWritable(1);@SuppressWarnings("unused")public void reduce(IntWritable key,Iterable<IntWritable> values,Context context) {IntWritable previous =new IntWritable(key.get()-1);for (IntWritable value : values) {if (key.get()!=previous.get()) {try {context.write(lineNumber, key);} catch (IOException | InterruptedException e) {// TODO Auto-generated catch blocke.printStackTrace();}}lineNumber =new IntWritable(lineNumber.get()+1);previous=key;}}}/** * @param args * @throws IOException  * @throws InterruptedException  * @throws ClassNotFoundException  */public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {// TODO Auto-generated method stubConfiguration configuration=new Configuration();String[] otherArgs=new GenericOptionsParser(configuration, args).getRemainingArgs();if (otherArgs.length!=2) {System.out.println("Usage: Sort <in> <out>");System.exit(2);}Job job=new Job(configuration, "Tacert Sort");job.setJarByClass(Sort.class);job.setMapperClass(SortMap.class);//job.setCombinerClass(SortReduce.class);   这里加上combiner之后结果是错的job.setReducerClass(SortReduce.class);job.setOutputKeyClass(IntWritable.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPath(job, new Path(otherArgs[0]));FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));System.exit(job.waitForCompletion(true)? 0 : 1);}}


原创粉丝点击