MapReduce 杂记

来源:互联网 发布:网络黑白txt下载 编辑:程序博客网 时间:2024/05/19 09:13

集群版本:Hadoop 2.7.2
JAVA版本:1.8.0_91
编辑器:Eclipse 4.4.2

默认了解MapReduce机制和基本编程

import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.RawComparator;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.io.WritableComparator;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Partitioner;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class MapReduceEx {    // Map    public static class Map extends Mapper<LongWritable, Text, Text, Text> {        public void map(LongWritable ikey, Text ivalue, Context context)                throws IOException, InterruptedException {            context.write(ivalue, new Text());        }    }    // Reduce    public static class Reduce extends Reducer<Text, Text, Text, Text> {        public void reduce(Text key, Iterable<Text> value, Context context)                throws IOException, InterruptedException {            for (Text text : value) {                context.write(text, new Text());            }        }    }    // 重构Writable类    public static class myWritableComparable implements            WritableComparable<myWritableComparable> {        private Text first;        private Text second;        public void set(Text first, Text second) {            this.first = first;            this.second = second;        }        public Text getFirst() {            return this.first;        }        public Text getSecond() {            return this.second;        }        @Override        public void readFields(DataInput in) throws IOException {            // TODO Auto-generated method stub            this.first.readFields(in);            this.second.readFields(in);            // INT = in.readInt()            // STRING = in.readLine()        }        @Override        public void write(DataOutput out) throws IOException {            // TODO Auto-generated method stub            this.first.write(out);            this.second.write(out);            // out.writeInt(INT);            // out.writeChars(STRING);        }        @Override        public int compareTo(myWritableComparable o) {            // TODO Auto-generated method stub            int result = this.first.compareTo(o.first);            if (result == 0) {                result = this.second.compareTo(o.second);            }            return result;        }    }    // 自定义分组    public static class myRawComparator implements            RawComparator<myWritableComparable> {        @Override        public int compare(myWritableComparable o1, myWritableComparable o2) {            // TODO Auto-generated method stub            Text a = o1.getFirst();            Text b = o2.getFirst();            return a.compareTo(b);        }        @Override        public int compare(byte[] arg0, int arg1, int arg2, byte[] arg3,                int arg4, int arg5) {            // TODO Auto-generated method stub            /**             * arg0 表示第1个参与比较的字节数组 arg1 表示第1个字节数组中开始比较的位置 arg2             * 表示第1个字节数组参与比较的字节长度 arg3 表示第2个参与比较的字节数组 arg4 表示第2个字节数组中开始比较的位置             * arg5 表示第2个字节数组参与比较的字节长度             */            return WritableComparator                    .compareBytes(arg0, arg1, 8, arg3, arg4, 8);        }    }    // 自定义分区    public class myPartitioner extends Partitioner<Text, Text> {        @Override        public int getPartition(Text key, Text value, int numReduceTasks) {            // TODO Auto-generated method stub            return (key.toString().hashCode() & Integer.MAX_VALUE)                    % numReduceTasks;        }    }    // 参数设定    public static void main(String[] args) throws IllegalArgumentException,            IOException, ClassNotFoundException, InterruptedException {        Configuration conf = new Configuration();        @SuppressWarnings("deprecation")        Job job = new Job(conf, "First_Cleaning");        // 指定main函数所子类        job.setJarByClass(MapReduceEx.class);        // 指定Map类        job.setMapperClass(Map.class);        // 指定map输出key的类        job.setMapOutputKeyClass(Text.class);        // 指定map输出value的类        job.setMapOutputValueClass(Text.class);        // 指定Reduce类        job.setReducerClass(Reduce.class);        // 指定reduce输出key的类        job.setOutputKeyClass(Text.class);        // 指定reduce输出value的类        job.setOutputValueClass(Text.class);        // 指定分组类        job.setGroupingComparatorClass(myRawComparator.class);        // 指定分区类        job.setPartitionerClass(myPartitioner.class);        // 指定Combiner类,对map输出进行归约        job.setCombinerClass(null);        // 指定CombinerKey分组类        job.setCombinerKeyGroupingComparatorClass(null);        // 指定排序时候所使用的比较器        job.setSortComparatorClass(null);        // 指定输入路径        FileInputFormat.addInputPath(job, new Path(""));        // 指定输出路径        FileOutputFormat.setOutputPath(job, new Path(""));        // 提交任务        System.exit(job.waitForCompletion(true) ? 0 : 1);    }}
1 0
原创粉丝点击