java操作mongodb mapreduce

来源:互联网 发布:appstore日本福利软件 编辑:程序博客网 时间:2024/05/21 08:53

最近有一个需求:需要通过mongodb某一个字段来分组统计其它几个字段的数量,因为这是要更新线上千万以上的数据量,所以需要同时更新到mysql表中,而尽量不让延迟:代码如下:

package com.lifeix;


import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.text.ParseException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;


import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;
import com.mongodb.MapReduceCommand;
import com.mongodb.MapReduceOutput;
import com.mongodb.Mongo;
import com.mongodb.MongoException;
import com.mongodb.MongoOptions;
import com.mongodb.ServerAddress;


public class UpdateDataDb {

/**@description:
* @return:void
* @param args
* @throws MongoException 
* @throws SQLException 
* @throws ParseException 
* @throws IOException 
*/
private Connection mysqlConn;
public static void main(String[] args) throws MongoException, SQLException, ParseException, IOException {
UpdateDataDb t = new UpdateDataDb();
        t.insertData();
}

public Connection connectionMYSQL() {
Connection conn = null;
try {
  Class.forName("com.mysql.jdbc.Driver");
  conn = java.sql.DriverManager.getConnection(
    url, username, pass);
 } catch (ClassNotFoundException e) {
  // TODO Auto-generated catch block
  e.printStackTrace();
 } catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
 }
 return conn;
}

private void insertData() throws SQLException, MongoException, ParseException, IOException {
MongoOptions options = new MongoOptions();
options.autoConnectRetry = true;  
        options.connectionsPerHost = 300;  
        options.maxWaitTime= 3000;
Mongo mg = new Mongo(new ServerAddress("192.168.1.48",27017), options);
DB db = mg.getDB("l_comment");
selectData(db);
}

public void selectData(DB db) throws SQLException {
long start = System.currentTimeMillis();
int ids = 1;
String sql = "select localId from townfile_local_count where localId >=" + ids;
String mysql = "select clCount,cl from (select count(a.cl) clCount,cl from ( " +
"select * from ( " +
    "select count(checkLocal) countLocal,checkLocal cl  from townfile_local_check " + 
        "group by checkLocal,checkaccount) b ) a " +
            "group by a.cl) c ";
String updateSql = "update townfile_local_count_copy set reblogNO=?,likeNO=?,localPhotoNO=?,userNO=? where localId=?";
String usql = "update townfile_local_copy set commentNO=? where localId=?";
PreparedStatement ps = null;
PreparedStatement updatePs = null;
ResultSet rs = null;
ResultSet updateRs = null;
try {
mysqlConn = connectionMYSQL();
ps = mysqlConn.prepareStatement(sql);
rs = ps.executeQuery();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Map<Integer, Map<String, Integer>> datas = new HashMap<Integer, Map<String, Integer>>();
while(rs.next()) {
int localId = rs.getInt("localId");
DBCollection coll = db.getCollection("comment");
String map = "function() {{emit(this.rootId,[this.status,this.conType,this.content]);}}";  //通过rootId分组
String reduce = "function(key, values){var reblogCount = 0;var likeNo=0; " +
"var typeCount=0;var contentCount=0; var temp='';" +
"for(var i=0;i < values.length;i++) " +      //for循环的操作跟js没有什么区别
 "{" +
 "var val = values[i]; " +
 "var status=parseInt(val[0]);" +
 "var type=parseInt(val[1]);" +
 "var content =  val[2];" +
 "if(status==11) {reblogCount = reblogCount + 1;} " +
 "else if(status==12) {likeNo = likeNo + 1;} " +
 " else if(status < 10 && status != 2) {" +
 " var text = '';if(content ==null)continue;" +
 "if(content.lastIndexOf('\"},\"type\"')) {" +
  "text = content.substring(content.indexOf(':') + 1,content.indexOf(',\"type\"'))" +
 "}if(content.lastIndexOf(',\"image\"')>0) {" +
  "text = content.substring(content.indexOf(':') + 1,content.lastIndexOf(',\"image\"'));" +
 "} else if(content.indexOf(',\"meta\"') > 0) {" +
  "text = content.substring(content.indexOf(':') + 1,content.lastIndexOf(',\"meta\"'))" +
 "} else if(content.indexOf(',\"pigeonId\"')>0) {" +
  "text = content.substring(content.indexOf(':') + 1, content.lastIndexOf(',\"pigeonId\"'));" +
 "} else if(content == '{}') {text='';}" +
 "if(type > 0) {typeCount = typeCount + 1;}" +
  "if(text && text != '\"\"') {" +
  "contentCount = contentCount + 1;temp += text + '#  ';" +
  "} "+
 "}" +
 "}" +
 " return {k:key,reb:reblogCount,like:likeNo,localPhoto:typeCount,contentNo:contentCount,cc:temp}; }";  //返回的数据就是统计完其它几个字段后的,通过rootId分组后的这一行数据,不会重复

//注意,统计多个字段不同条件的数量时,设计下面的查询条件是很重要的,不然是无法一次统计出不同条件的数据,具体情况,要根据你的业务做调整。你要知道,mongodb可是处理千万级以上数据的,能少遍历就少遍历
DBObject query = new BasicDBObject();
query.put("rootId", localId);
query.put("type", 140);
BasicDBList list = new BasicDBList();
list.add(new BasicDBObject("status",new BasicDBObject("$gte",0)));
list.add(new BasicDBObject("conType", new BasicDBObject("$gt",0)));
query.put("$or",list);
MapReduceCommand cmd = new MapReduceCommand(coll,map, reduce,"reblogNo",MapReduceCommand.OutputType.INLINE,query);
MapReduceOutput out = coll.mapReduce(cmd);
for (DBObject result : out.results()) {
System.out.println(result);
Map<String,Integer> rest = null;
Integer id = Integer.parseInt(result.get("_id").toString());
if(datas.containsKey(id)) {
rest = datas.get(id);
} else {
rest = new HashMap<String, Integer>();
}
Object obj = result.get("value");

                     //mapreduce会返回两种数据类型,如果查不出数据,则会是一个list,否则是一个object
if(obj instanceof BasicDBList) {
BasicDBList objt = (BasicDBList) obj;
String stype = objt.get(1).toString();
stype = stype.substring(0,stype.lastIndexOf("."));
Integer type = Integer.parseInt(stype);

String sst = objt.get(0).toString();
sst = sst.substring(0, sst.lastIndexOf("."));
Integer status = Integer.parseInt(sst);
if(status.intValue() == 12) {
int likeNo = rest.get("likeNo") == null?0:rest.get("likeNo").intValue();
rest.put("likeNo", 1+likeNo);
} else if(status.intValue() == 11) {
int reblogNo = rest.get("reblogNo") == null?0:rest.get("reblogNo");
rest.put("reblogNo", 1 + reblogNo);
} else if(status.intValue() < 10 && status.intValue() != 2) {
if(type.intValue() > 0) {
int localPhotoNo = rest.get("localPhototNo") == null?0:rest.get("localPhotoNo").intValue();
rest.put("localPhotoNo", 1 + localPhotoNo);
}
String content = objt.get(2)==null?null:objt.get(2).toString();
if(null == content || content.indexOf("{\"localId\":") >= 0 || !content.startsWith("{\"text\":") || 
content.startsWith("{\"text\":\"\",\"image\":\"\",") || content.startsWith("{\"text\":\"\"}") ||
content.startsWith("{\"text\":\"\",\"meta\":\"\",") || content.startsWith("{}")) {
continue;
}
int contentNo = rest.get("contentNo") == null?0: rest.get("contentNo").intValue();
rest.put("contentNo", 1 + contentNo);
}

} else {
BasicDBObject objt = (BasicDBObject) result.get("value");
int reblogNo = rest.get("reblogNo")==null?0:rest.get("reblogNo").intValue();
int likeNo = rest.get("likeNo") == null?0:rest.get("likeNo").intValue();
int localPhotoNo = rest.get("localPhototNo") == null?0:rest.get("localPhotoNo").intValue();
int contentNo = rest.get("contentNo") == null?0: rest.get("contentNo").intValue();
rest.put("reblogNo", objt.getInt("reb") + reblogNo);
rest.put("likeNo", objt.getInt("like") + likeNo);
rest.put("localPhotoNo", objt.getInt("localPhoto") + localPhotoNo);
rest.put("contentNo", objt.getInt("contentNo") + contentNo);
}
datas.put(id, rest);

}  
}
long end1 = System.currentTimeMillis();
System.out.println("分组查询时间:" + (end1-start)/1000 + " s");

updatePs = mysqlConn.prepareStatement(mysql);
updateRs = updatePs.executeQuery();
while(updateRs.next()) {
int count = updateRs.getInt("clCount");
int localId = updateRs.getInt("cl");
Map<String, Integer> map = datas.get(localId);
if(map != null) {
map.put("userNo", count);
datas.put(localId, map);
}
}
updateRs.close();
updatePs.close();

if(datas.size() > 0) {

for(Entry<Integer, Map<String, Integer>> entry : datas.entrySet()) {
Map<String, Integer> map = entry.getValue();
PreparedStatement pst = mysqlConn.prepareStatement(updateSql);
int reblogNo = map.get("reblogNo")==null?0:map.get("reblogNo").intValue();
int likeNo = map.get("likeNo") == null?0:map.get("likeNo").intValue();
int localPhotoNo = map.get("localPhotoNo") == null?0:map.get("localPhotoNo").intValue();
int contentNo = map.get("contentNo") == null?0: map.get("contentNo").intValue();
int userNo = map.get("userNo") == null?0:map.get("userNo").intValue();

pst.setInt(1, reblogNo);
pst.setInt(2, likeNo);
pst.setInt(3, localPhotoNo);
pst.setInt(4, userNo);
pst.setInt(5, entry.getKey());
pst.execute();
pst.close();
PreparedStatement pst2 = mysqlConn.prepareStatement(usql);
pst2.setInt(1, contentNo);
pst2.setInt(2, entry.getKey());
pst2.execute();
pst2.close();

System.out.println(entry.getKey() + " updated  " + entry.getValue());
}
}
long end2 = System.currentTimeMillis();
System.out.println("更新时间:" + (end2-end1)/1000 + " s");
}
}

原创粉丝点击