hadoop学习3

来源:互联网 发布:youtube显示无网络连接 编辑:程序博客网 时间:2024/06/05 08:15

1.文件按照行来去重

import java.io.IOException;import org.apache.hadoop.fs.Path;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class Dedup {    public static class Map extends Mapper<Object,Text,Text,Text>{        private static Text line = new Text();        @Override        protected void map(Object key, Text value, Mapper<Object, Text, Text, Text>.Context context)                throws IOException, InterruptedException {            line = value;            context.write(line,new Text(""));        }    }    public static class Reduce extends Reducer<Text,Text,Text,Text>{        @Override        protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)                throws IOException, InterruptedException {            context.write(key,new Text(""));        }    }    public static void main(String[] args) throws Exception{        Configuration conf = new Configuration();        Job job = new Job(conf, "afan");            job.setOutputKeyClass(Text.class);        job.setOutputValueClass(Text.class);        job.setMapperClass(Map.class);        job.setReducerClass(Reduce.class);        FileInputFormat.setInputPaths(job,new Path("hdfs://node1:9000/afan/input"));        FileOutputFormat.setOutputPath(job, new Path("hdfs://node1:9000/afan/output"));        boolean ret = job.waitForCompletion(true);        System.exit(ret ? 0:1);    }}
原创粉丝点击