数据算法-hadoop7 购物车分析
来源:互联网 发布:程序怎样烧进单片机 编辑:程序博客网 时间:2024/05/22 00:53
主要就是通过递归把[a,b,c,d]切分成n个的组
如果n=2
为:[a,b],[a,c],[a,d],[b,c],[b,d],[c,d]
如果n=3
为:[a,b,c],[a,b,d],[b,c,d],[a,c,d]
然后统计
public class MBAMapper extends Mapper<LongWritable, Text, Text, IntWritable> { public static final int DEFAULT_NUMBER_OF_PAIRS = 2; private static final Text reducerKey = new Text(); private static final IntWritable NUMBER_ONE = new IntWritable(1); int numberOfPairs; @Override protected void setup(Context context) throws IOException,InterruptedException { this.numberOfPairs = context.getConfiguration().getInt( "number.of.pairs", DEFAULT_NUMBER_OF_PAIRS); } public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); List<String> items = convertItemsToList(line); if ((items == null) || (items.isEmpty())) { // no mapper output will be generated return; } generateMapperOutput(numberOfPairs, items, context); } private static List<String> convertItemsToList(String line) { if ((line == null) || (line.length() == 0)) { // no mapper output will be generated return null; } String[] tokens = StringUtils.split(line, ","); if ((tokens == null) || (tokens.length == 0)) { return null; } List<String> items = new ArrayList<String>(); for (String token : tokens) { if (token != null) { items.add(token.trim()); } } return items; } private void generateMapperOutput(int numberOfPairs, List<String> items, Context context) throws IOException, InterruptedException { List<List<String>> sortedCombinations = Combination .findSortedCombinations(items, numberOfPairs); for (List<String> itemList : sortedCombinations) { System.out.println("itemlist=" + itemList.toString()); reducerKey.set(itemList.toString()); context.write(reducerKey, NUMBER_ONE); } }}
public class MBAReducer extends Reducer<Text, IntWritable, Text, IntWritable>{ public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; // total items paired for (IntWritable value : values) { sum += value.get(); } context.write(key, new IntWritable(sum)); }}
public static void main(String[] args) throws Exception { Configuration conf1 = new Configuration(); System.setProperty("hadoop.home.dir", "E:\\hadoop-2.6.0"); conf1.setBoolean("dfs.permissions", false); Job job = Job.getInstance(conf1, "MBA"); job.setMapperClass(MBAMapper.class); job.setReducerClass(MBAReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // output format job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, new Path( "E:\\java\\test\\07\\07.txt")); FileOutputFormat .setOutputPath(job, new Path("E:\\java\\test\\07\\out")); if (job.waitForCompletion(true)) { // log.info("MR run successfully"); } else { // log.error("MR run failed"); } }
输入
crackers,bread,bananacrackers,coke,butter,coffeecrackers,breadcrackers,breadcrackers,bread,coffeebutter,cokebutter,coke,bread,crackers
输出
[banana, bread] 1[banana, crackers] 1[bread, butter] 1[bread, coffee] 1[bread, coke] 1[bread, crackers] 5[butter, coffee] 1[butter, coke] 3[butter, crackers] 2[coffee, coke] 1[coffee, crackers] 2[coke, crackers] 2
阅读全文
0 0
- 数据算法-hadoop7 购物车分析
- hadoop7
- 数据挖掘--购物车 推荐算法
- 大数据hadoop7天视频教程全套
- 算法(4)购物篮分析
- 购物车代码分析
- 购物车思路分析
- 购物车分析
- 购物车分析
- PHP_分析购物车
- 数据挖掘算法之-关联规则挖掘(Association Rule)(购物篮分析)
- 数据挖掘算法之-关联规则挖掘(Association Rule)(购物篮分析)
- 购物车没有数据
- 购物车json数据
- 购物车死数据
- 删除购物车数据
- python 购物数据意淫分析(1)
- python购物数据意淫分析(2)
- tcp/ip学习笔记--第22章 TCP persist timer
- 置换加密法
- 链路层常见报文格式及长度
- JVM虚拟机有内存泄露怎么解决
- 网络编程是什么
- 数据算法-hadoop7 购物车分析
- 我的java自学经历
- Apriori
- C和指针之动态内存分配(读取范围在1和标准输入读取的size之前每个数据出现的次数)
- 编译坑_Ubuntu16.04编译过程踩坑记录(二)
- scope中的图怎么保存
- git rebase和git merge
- SSM集成错误
- Angular CLI ng g module 参数