mapreduce自定义分组

来源：互联网发布：java 服务器监控开源编辑：程序博客网时间：2024/05/20 23:05

//-------------------FlowSumArea.java-------------package pack4;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import pack2.FlowBean;public class FlowSumArea {        public static class FlowSumAreaMapper extends Mapper<LongWritable, Text, Text, FlowBean> {                @Override                protected void map(LongWritable key, Text value,   Context context)                                                throws IOException, InterruptedException {                        String line = value.toString();                        String[] fields = line.split("\\s+");                        String phoneNB = fields[1];                        long u_flow = Long.parseLong(fields[7]);                        long d_flow = Long.parseLong(fields[8]);                        context.write(new Text(phoneNB), new FlowBean(phoneNB, u_flow, d_flow));                }        }        public static class FlowSumAreaReducer extends Reducer<Text, FlowBean, Text, FlowBean> {                @Override                protected void reduce(Text key, Iterable<FlowBean> values, Context context)                                                throws IOException, InterruptedException {                        long u_flow_count = 0;                        long d_flow_count = 0;                        for(FlowBean bean : values) {                                u_flow_count += bean.getU_flow();                                d_flow_count += bean.getD_flow();                        }                        context.write(key, new FlowBean(key.toString(), u_flow_count, d_flow_count));                }        }        public static void main(String[] args) throws Exception {                Configuration conf = new Configuration();                Job job = Job.getInstance(conf);                //设置整个job所用的那些类在哪个jar包                job.setJarByClass(FlowSumArea.class);                //job使用的mapper和reducer类                job.setMapperClass(FlowSumAreaMapper.class);                job.setReducerClass(FlowSumAreaReducer.class);                job.setPartitionerClass(AreaPartitioner.class);                job.setNumReduceTasks(6);                //指定mapper输出类型                job.setMapOutputKeyClass(LongWritable.class);                job.setMapOutputValueClass(Text.class);                //指定reducer输出类型                job.setOutputKeyClass(Text.class);                job.setOutputValueClass(FlowBean.class);                //指定原始数据存放路径                FileInputFormat.setInputPaths(job,new Path("/flow/src"));                //指定结果存放路径                FileOutputFormat.setOutputPath(job, new Path("/flow/out15"));                //提交               System.exit(job.waitForCompletion(true) ? 0 : 1);              }}

//-------------------AreaPartitioner.java-------------package pack4;import java.util.HashMap;import org.apache.hadoop.mapreduce.Partitioner;public class AreaPartitioner<KEY, VALUE>  extends Partitioner<KEY, VALUE>{        private static HashMap<String, Integer>  areamap = new HashMap<>();        static {                areamap.put("135", 0);                       areamap.put("136", 1);                       areamap.put("137", 2);                       areamap.put("138", 3);                       areamap.put("139", 4);               }        @Override        public int getPartition(KEY key, VALUE value, int numPartitions) {                int areaCoder = areamap.get(key.toString().substring(0, 3))==null?5:areamap.get(key.toString().substring(0, 3));                return areaCoder;        }}

0 0