数据算法-hadoop7 购物车分析

来源:互联网 发布:程序怎样烧进单片机 编辑:程序博客网 时间:2024/05/22 00:53

主要就是通过递归把[a,b,c,d]切分成n个的组
如果n=2
为:[a,b],[a,c],[a,d],[b,c],[b,d],[c,d]
如果n=3
为:[a,b,c],[a,b,d],[b,c,d],[a,c,d]
然后统计

public class MBAMapper extends Mapper<LongWritable, Text, Text, IntWritable> {    public static final int DEFAULT_NUMBER_OF_PAIRS = 2;    private static final Text reducerKey = new Text();    private static final IntWritable NUMBER_ONE = new IntWritable(1);    int numberOfPairs;    @Override    protected void setup(Context context) throws IOException,InterruptedException {        this.numberOfPairs = context.getConfiguration().getInt(                "number.of.pairs", DEFAULT_NUMBER_OF_PAIRS);    }    public void map(LongWritable key, Text value, Context context)            throws IOException, InterruptedException {        String line = value.toString();        List<String> items = convertItemsToList(line);        if ((items == null) || (items.isEmpty())) {            // no mapper output will be generated            return;        }        generateMapperOutput(numberOfPairs, items, context);    }    private static List<String> convertItemsToList(String line) {        if ((line == null) || (line.length() == 0)) {            // no mapper output will be generated            return null;        }        String[] tokens = StringUtils.split(line, ",");        if ((tokens == null) || (tokens.length == 0)) {            return null;        }        List<String> items = new ArrayList<String>();        for (String token : tokens) {            if (token != null) {                items.add(token.trim());            }        }        return items;    }    private void generateMapperOutput(int numberOfPairs, List<String> items,            Context context) throws IOException, InterruptedException {        List<List<String>> sortedCombinations = Combination                .findSortedCombinations(items, numberOfPairs);        for (List<String> itemList : sortedCombinations) {            System.out.println("itemlist=" + itemList.toString());            reducerKey.set(itemList.toString());            context.write(reducerKey, NUMBER_ONE);        }    }}
public class MBAReducer  extends Reducer<Text, IntWritable, Text, IntWritable>{    public void reduce(Text key, Iterable<IntWritable> values, Context context)            throws IOException, InterruptedException {        int sum = 0; // total items paired        for (IntWritable value : values) {            sum += value.get();        }        context.write(key, new IntWritable(sum));    }}
public static void main(String[] args) throws Exception {        Configuration conf1 = new Configuration();        System.setProperty("hadoop.home.dir", "E:\\hadoop-2.6.0");        conf1.setBoolean("dfs.permissions", false);        Job job = Job.getInstance(conf1, "MBA");        job.setMapperClass(MBAMapper.class);        job.setReducerClass(MBAReducer.class);        job.setMapOutputKeyClass(Text.class);        job.setMapOutputValueClass(IntWritable.class);        // output format        job.setOutputFormatClass(TextOutputFormat.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(IntWritable.class);        job.setNumReduceTasks(1);        FileInputFormat.setInputPaths(job, new Path(                "E:\\java\\test\\07\\07.txt"));        FileOutputFormat                .setOutputPath(job, new Path("E:\\java\\test\\07\\out"));        if (job.waitForCompletion(true)) {            // log.info("MR run successfully");        } else {            // log.error("MR run failed");        }    }

输入

crackers,bread,bananacrackers,coke,butter,coffeecrackers,breadcrackers,breadcrackers,bread,coffeebutter,cokebutter,coke,bread,crackers

输出

[banana, bread] 1[banana, crackers]  1[bread, butter] 1[bread, coffee] 1[bread, coke]   1[bread, crackers]   5[butter, coffee]    1[butter, coke]  3[butter, crackers]  2[coffee, coke]  1[coffee, crackers]  2[coke, crackers]    2
原创粉丝点击