spring mongo 下 mapreduce 和 group的用法

来源:互联网 发布:ubuntu启动anaconda 编辑:程序博客网 时间:2024/05/30 07:13

1.  KeywordsStaticsModel   数据模型

private String id;private String keyword; //关键词private String keywordUuid;//keyword + datestr 的md5//搜索相关private int sessionCount = 0;private int adsCount = 0;//点击相关private int clickCount = 0;private int yahooClick = 0;private float score;//排序分数 private long createTime; //创建时间private String createTimeStr; //记录当前访问时间 

2.  mapreduce 统计 每个关键字的sessionCount总数

//Query query = new Query();query.addCriteria(Criteria.where("createTime").gte(timeStart).lt(timeEnd));String mapFunction = "function(){" +"emit(this.keyword,{" +"sessionCount:this.sessionCount," +"adsCount:this.adsCount," +"amazonClick:this.amazonClick," +"becomeClick:this.becomeClick," +"clickCount:this.clickCount," +"rakutenClick:this.rakutenClick})" +"}";String reduceFunction = "function(key, values){" +"var x = {sessionCount:0,adsCount:0,clickCount:0,yahooClick:0,yahooActionClick:0};" +"values.forEach(function(val){ " +                              "x.sessionCount+=val.sessionCount; "+                            "x.adsCount += val.adsCount; "+                            "x.clickCount += val.clickCount; "+                            "});" +                            "return x;}";long startTime = System.currentTimeMillis();MapReduceResults<KeywordsStaticsModel> asd = getMongoTemplate().mapReduce(query, "keywordsStaticsModel", mapFunction, reduceFunction, KeywordsStaticsModel.class);long endTime = System.currentTimeMillis();System.out.println("spend time ==>"+(endTime-startTime));

3.  group 的 统计 每个关键字的sessionCount总数,注意最多只能处理20000条数据。

Criteria criteria = Criteria.where("createTime").gte(timeStart).lt(timeEnd);String reduceFunction = "function(obj,prev){" +"prev.sessionCountSum += obj.sessionCount;" +"prev.adsCountSum += obj.adsCount;}";GroupBy groupBy = new GroupBy("keyword");groupBy.reduceFunction(reduceFunction);DBObject dbObject = new BasicDBObject();dbObject.put("sessionCountSum", 0);dbObject.put("adsCountSum", 0);groupBy.initialDocument(dbObject);GroupByResults<KeywordsStaticsModel>  ttt = getMongoTemplate().group(criteria,"keywordsStaticsModel", groupBy, KeywordsStaticsModel.class);DBObject resultSet = ttt.getRawResults();System.out.println("count==>"+resultSet.get("count"));System.out.println("keys==>"+resultSet.get("keys"));


原创粉丝点击