费尽周折写的一个mysql多字段滤重sql

来源:互联网 发布:淘宝联盟订单不结算 编辑:程序博客网 时间:2024/06/02 02:09
需求是两个表的合并 需要从表user_reply和reply_mention合并到表user_reply_mention,但是合并之后rid和uid有重复数据,需要滤重,不想写代码,写了个sql脚本,通过中间表过滤了。
CREATE TABLE `user_reply_mention` (  `id` int(11) unsigned NOT NULL AUTO_INCREMENT COMMENT '自增ID',  `rid` int(11) DEFAULT NULL COMMENT '帖子回复ID',  `uid` int(11) DEFAULT NULL COMMENT '用户ID',  `type` int(2) DEFAULT NULL COMMENT '数据类型(0 新加数据 1 用户评论 2 评论提到我',  `create_time` datetime DEFAULT NULL COMMENT '创建时间',  PRIMARY KEY (`id`),  UNIQUE KEY `unique_rid_uid` (`rid`,`uid`)) ENGINE=InnoDB AUTO_INCREMENT=3000000 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT COMMENT='用户回复提到表'; CREATE TABLE `user_reply_mention2` (  `id` int(11) unsigned NULL AUTO_INCREMENT COMMENT '自增ID',  `rid` int(11) DEFAULT NULL COMMENT '帖子回复ID',  `uid` int(11) DEFAULT NULL COMMENT '用户ID',  `type` int(2) DEFAULT NULL COMMENT '数据类型(0 新加数据 1 用户评论 2 评论提到我',  `create_time` datetime DEFAULT NULL COMMENT '创建时间',  `rid_uid` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,  PRIMARY KEY (`id`),  KEY `index_rid_uid` (`rid_uid`(191))) ENGINE=InnoDB AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT COMMENT='用户回复提到表2';CREATE TABLE `user_reply_mention3` (  `id` int(11) DEFAULT  NULL COMMENT '自增ID',  `rid` int(11) DEFAULT NULL COMMENT '帖子回复ID',  `uid` int(11) DEFAULT NULL COMMENT '用户ID',  `type` int(2) DEFAULT NULL COMMENT '数据类型(0 新加数据 1 用户评论 2 评论提到我',  `create_time` datetime DEFAULT NULL COMMENT '创建时间',  `rid_uid` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,  KEY `rid_uid` (`rid_uid`(191))) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT COMMENT='用户回复提到表3';CREATE TABLE `user_reply_mention4` (  `id` int(11) unsigned NOT NULL AUTO_INCREMENT COMMENT '自增ID',  `rid` int(11) DEFAULT NULL COMMENT '帖子回复ID',  `uid` int(11) DEFAULT NULL COMMENT '用户ID',  `type` int(2) DEFAULT NULL COMMENT '数据类型(0 新加数据 1 用户评论 2 评论提到我',  `create_time` datetime DEFAULT NULL COMMENT '创建时间',  `rid_uid` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,  PRIMARY KEY (`id`),  KEY `rid_uid` (`rid_uid`(191))) ENGINE=InnoDB AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 ROW_FORMAT=COMPACT COMMENT='用户回复提到表3';#导入数据alter table user_reply_mention2 auto_increment=0; insert into user_reply_mention2(id,uid,rid,create_time,type) select id,uid,rid,createtime,1 as type from user_reply; alter table user_reply_mention2 auto_increment=2000000;insert into user_reply_mention2(uid,rid,create_time,type) select uid,rid,create_time,2 as type from reply_mention;alter table user_reply_mention2 auto_increment=3000000;update user_reply_mention2 set rid_uid = concat(rid,'-',uid);#开始滤重insert into user_reply_mention3(rid_uid) select A.rid_uid from  ((select count(id) co, rid_uid  from user_reply_mention2 group by rid_uid having co > 1) A); //133439
update user_reply_mention3 B,user_reply_mention2 A set B.id = A.id where B.rid_uid = A.rid_uid;#总重复数select sum(A.co) from ((select count(1) as  co, rid_uid  from user_reply_mention2 group by rid_uid having co > 1)A);//266893select count(B.id) from user_reply_mention3 B,user_reply_mention2 A  where B.rid_uid = A.rid_uid and A.id != B.id; // 133454(删除数据)#user_reply_mention2 备份到4insert into user_reply_mention4(id,uid,rid,create_time,type,rid_uid) select id,uid,rid,create_time,type,rid_uid from user_reply_mention2; #删除2中重复数据select count(*) from user_reply_mention4   where id in (select A.id from user_reply_mention2 A,user_reply_mention3 B where A.rid_uid = B.rid_uid and A.id != B.id); //133454(删除数据)
delete from user_reply_mention4   where id in (select A.id from user_reply_mention2 A,user_reply_mention3 B where A.rid_uid = B.rid_uid and A.id != B.id); //133454(删除数据)
#检查是否有重复数据select count(id) co, rid_uid  from user_reply_mention4 group by rid_uid having co > 1; //0条#从4插入从2插入 4插入insert into user_reply_mention(id,uid,rid,create_time,type) select id,uid,rid,create_time,type from user_reply_mention2; //1661812#删除delete from user_reply_mention where id < 3000000;#验证从4插入insert into user_reply_mention(id,uid,rid,create_time,type) select id,uid,rid,create_time,type from user_reply_mention4; //1528358#插入#上线之后数据导入select id,uid,rid,createtime,1 as type from user_reply where id > 1776557;select max(id) from user_reply_mention2;1776557 最大id(type为1)2147792(type为2)insert into user_reply_mention(id,uid,rid,create_time,type) select id,uid,rid,createtime,1 as type from user_reply where id > 1776557; 17908961924350(总数)