单词统计案例

来源:互联网 发布:软件推广ppt 编辑:程序博客网 时间:2024/05/29 17:39

要解决的问题: 统计最后一个单词出现的次数[如下面的格式]

a,b,c,d
e,d,s,d
a,s,g,w
…..

解决过程和反思:[注意不要跳坑]

/** * @Title: Demos.java * @Author:youxiangyang * @Date:下午6:50:07 */package mr;import java.io.IOException;import java.util.HashMap;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;/** * @read here * 今天遇到这样一个问题: * 统计最后一个单词出现的次数[如下面的格式] * a,b,c,d * e,d,s,d * s,d,f,g * a,s,g,w *........... *下面是我自己的一个解决方法.但是比较复杂...额,不推荐.但是作为一个坑,我还是写出来了,请不要继续跳 *我的思路是这样的..其实,这是一个wordcount的改版.所以,我在map阶段,对最后的一个单词添加了标识符 *这样就可以在reduce阶段特别的搞出来这个.用if判断下,如果有.就放到一个map里.如果没有就放到另外一 *个.然后,在cleanup里对所有的结果进行最后的处理.遍历两个map.如果map1的key截取后相同,就给这个单 *词的次数加1...最后呢,输出来.解决思路不是很复杂.但绝对不是一个好方法.希望有伙伴可以给一个更好的. * *在最后,听了同学的一个方法,咳咳....到后来还是发现错了,不过他的想法还是挺好的    */public class Demos {    /**     * @param args     */    public static void main(String[] args) {        // TODO Auto-generated method stub        //main方法就省略不写了,这个体谅下,因为main方法里的内容都是固定套路,节约下时间        //仅提供了map和reduce    }    public static class wordMap extends Mapper<LongWritable, Text, Text, IntWritable>{        @Override        protected void map(LongWritable key, Text value,                Mapper<LongWritable, Text, Text,IntWritable>.Context context)                throws IOException, InterruptedException {            String[] lines = value.toString().split(",");            for (int i = 0; i < lines.length; i++) {                if (i==(lines.length-1)) {                    context.write(new Text(lines[i]+"&&"), new IntWritable(1));                }else {                    context.write(new Text(lines[i]), new IntWritable(1));                }            }        }    }    public static class wordReduce extends Reducer<Text, IntWritable, Text, IntWritable>{        HashMap<String, Integer> map1=new HashMap<String,Integer>();        HashMap<String, Integer> map2=new HashMap<String,Integer>();        @Override        protected void reduce(Text k1, Iterable<IntWritable> v1,                Reducer<Text, IntWritable, Text, IntWritable>.Context context)                throws IOException, InterruptedException {            int nums = 0;            for (IntWritable num : v1) {                nums+=num.get();            }            if (k1.toString().contains("&&")) {                map2.put(k1.toString(), nums);            }else {                map1.put(k1.toString(), nums);            }        }        /* (non-Javadoc)         * @see org.apache.hadoop.mapreduce.Reducer#cleanup(org.apache.hadoop.mapreduce.Reducer.Context)         */        @Override        protected void cleanup(                Reducer<Text, IntWritable, Text, IntWritable>.Context context)                throws IOException, InterruptedException {            //遍历map2,和map1            int nums=0;            for(String key:map2.keySet()){                for(String key1:map1.keySet()){                    if (key.toString().substring(-1).contains(key1)) {                        nums=map1.get(key1)+map2.get(key);                    }                }            }            for(String key:map2.keySet()){                for(String key1:map1.keySet()){                //设置substring是负数是希望从后面开始截取                    if (key.toString().substring(-1).contains(key1)) {                        context.write(new Text(key), new IntWritable(map1.get(key1)+nums));                    }                }            }        }    }}

下面是同学给的建议:!!!注意啦.观点是错误的!!大家不要跳坑!!!!!!

/** * @Title: Demo02.java * @Author:youxiangyang * @Date:下午7:37:45 */package mr;import java.io.IOException;import org.apache.hadoop.hdfs.server.namenode.status_jsp;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;/** * @author AURA * */public class Demo02 {    public static void main(String[] args) {    }    public static class wmap extends Mapper<LongWritable, Text, Text, IntWritable>{        /* (non-Javadoc)         * @see org.apache.hadoop.mapreduce.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapreduce.Mapper.Context)         */        @Override        protected void map(LongWritable key, Text value,                Mapper<LongWritable, Text, Text, IntWritable>.Context context)                throws IOException, InterruptedException {            String[] lines = value.toString().split(",");            String endword = lines[lines.length-1].trim();            int nums=0;            for (int i = 0; i < lines.length; i++) {                if (lines[i].trim().equals(endword)) {                    nums++;                }            }            context.write(new Text(endword), new IntWritable(1+nums));            //乍一看,还挺好的,但是呢,这样会漏很多很多啊!!!            //比如这行是这样:a,v,s,d            //下一行是:q,w,s,v            //按这样的方法统计,会少算一个"v"....        }    }    public static class wreduce extends Reducer<Text, IntWritable, Text, IntWritable>{        /* (non-Javadoc)         * @see org.apache.hadoop.mapreduce.Reducer#reduce(java.lang.Object, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context)         */        @Override        protected void reduce(Text key, Iterable<IntWritable> vIterable,                Reducer<Text, IntWritable, Text, IntWritable>.Context context)                throws IOException, InterruptedException {            int nums=0;            for (IntWritable num: vIterable) {                nums+=nums;            }            context.write(key, new IntWritable(nums));        }    }}

反思:灵活一点…不要读死书….多多向别人学习.

0 0