SQL查询重复处理记录

来源：互联网发布：血手幽灵数据共享平台编辑：程序博客网时间：2024/05/22 21:01
--1、用于查询重复处理记录(如果列没有大小关系时2000用生成自增列和临时表处理,SQL2005用row_number函数处理) 
--> --> (Roy)生成測試數據 
if not object_id('Tempdb..#T') is null 
    drop table #T 
Go 
Create table #T([ID] int,[Name] nvarchar(1),[Memo] nvarchar(2)) 
Insert #T 
select 1,N'A',N'A1' union all 
select 2,N'A',N'A2' union all 
select 3,N'A',N'A3' union all 
select 4,N'B',N'B1' union all 
select 5,N'B',N'B2' 
Go 
--I、Name相同ID最小的记录(推荐用1,2,3),方法3在SQl05时，效率高于1、2 
方法1: 
Select * from #T a where not exists(select 1 from #T where Name=a.Name and ID <a.ID) 
方法2: 
select a.* from #T a join (select min(ID)ID,Name from #T group by Name) b on a.Name=b.Name and a.ID=b.ID 
方法3: 
select * from #T a where ID=(select min(ID) from #T where Name=a.Name) 
方法4: 
select a.* from #T a join #T b on a.Name=b.Name and a.ID>=b.ID group by a.ID,a.Name,a.Memo having count(1)=1 
方法5: 
select * from #T a group by ID,Name,Memo having ID=(select min(ID)from #T where Name=a.Name) 
方法6: 
select * from #T a where (select count(1) from #T where Name=a.Name and ID <a.ID)=0 
方法7: 
select * from #T a where ID=(select top 1 ID from #T where Name=a.name order by ID) 
方法8: 
select * from #T a where ID!>all(select ID from #T where Name=a.Name) 
方法9(注:ID为唯一时可用): 
select * from #T a where ID in(select min(ID) from #T group by Name) 
--SQL2005: 
方法10: 
select ID,Name,Memo from (select *,min(ID)over(partition by Name) as MinID from #T a)T where ID=MinID 
方法11: 
select ID,Name,Memo from (select *,row_number()over(partition by Name order by ID) as MinID from #T a)T where MinID=1 
生成结果: 
/* 
ID          Name Memo 
----------- ---- ---- 
1          A    A1 
4          B    B1 
(2 行受影响) 
*/ 
--II、Name相同ID最大的记录,与min相反: 
方法1: 
Select * from #T a where not exists(select 1 from #T where Name=a.Name and ID>a.ID) 
方法2: 
select a.* from #T a join (select max(ID)ID,Name from #T group by Name) b on a.Name=b.Name and a.ID=b.ID order by ID 
方法3: 
select * from #T a where ID=(select max(ID) from #T where Name=a.Name) order by ID 
方法4: 
select a.* from #T a join #T b on a.Name=b.Name and a.ID <=b.ID group by a.ID,a.Name,a.Memo having count(1)=1 
方法5: 
select * from #T a group by ID,Name,Memo having ID=(select max(ID)from #T where Name=a.Name) 
方法6: 
select * from #T a where (select count(1) from #T where Name=a.Name and ID>a.ID)=0 
方法7: 
select * from #T a where ID=(select top 1 ID from #T where Name=a.name order by ID desc) 
方法8: 
select * from #T a where ID! <all(select ID from #T where Name=a.Name) 
方法9(注:ID为唯一时可用): 
select * from #T a where ID in(select max(ID) from #T group by Name) 
--SQL2005: 
方法10: 
select ID,Name,Memo from (select *,max(ID)over(partition by Name) as MinID from #T a)T where ID=MinID 
方法11: 
select ID,Name,Memo from (select *,row_number()over(partition by Name order by ID desc) as MinID from #T a)T where MinID=1 
生成结果2: 
/* 
ID          Name Memo 
----------- ---- ---- 
3          A    A3 
5          B    B2 
(2 行受影响) 
*/ 
--2、删除重复记录有大小关系时,保留大或小其中一个记录 
--> --> (Roy)生成測試數據 
if not object_id('Tempdb..#T') is null 
    drop table #T 
Go 
Create table #T([ID] int,[Name] nvarchar(1),[Memo] nvarchar(2)) 
Insert #T 
select 1,N'A',N'A1' union all 
select 2,N'A',N'A2' union all 
select 3,N'A',N'A3' union all 
select 4,N'B',N'B1' union all 
select 5,N'B',N'B2' 
Go 
--I、Name相同ID最小的记录(推荐用1,2,3),保留最小一条 
方法1: 
delete a from #T a where  exists(select 1 from #T where Name=a.Name and ID <a.ID) 
方法2: 
delete a  from #T a left join (select min(ID)ID,Name from #T group by Name) b on a.Name=b.Name and a.ID=b.ID where b.Id is null 
方法3: 
delete a from #T a where ID not in (select min(ID) from #T where Name=a.Name) 
方法4(注:ID为唯一时可用): 
delete a from #T a where ID not in(select min(ID)from #T group by Name) 
方法5: 
delete a from #T a where (select count(1) from #T where Name=a.Name and ID <a.ID)>0 
方法6: 
delete a from #T a where ID <>(select top 1 ID from #T where Name=a.name order by ID) 
方法7: 
delete a from #T a where ID>any(select ID from #T where Name=a.Name) 
select * from #T 
生成结果: 
/* 
ID          Name Memo 
----------- ---- ---- 
1          A    A1 
4          B    B1 
(2 行受影响) 
*/ 
--II、Name相同ID保留最大的一条记录: 
方法1: 
delete a from #T a where  exists(select 1 from #T where Name=a.Name and ID>a.ID) 
方法2: 
delete a  from #T a left join (select max(ID)ID,Name from #T group by Name) b on a.Name=b.Name and a.ID=b.ID where b.Id is null 
方法3: 
delete a from #T a where ID not in (select max(ID) from #T where Name=a.Name) 
方法4(注:ID为唯一时可用): 
delete a from #T a where ID not in(select max(ID)from #T group by Name) 
方法5: 
delete a from #T a where (select count(1) from #T where Name=a.Name and ID>a.ID)>0 
方法6: 
delete a from #T a where ID <>(select top 1 ID from #T where Name=a.name order by ID desc) 
方法7: 
delete a from #T a where ID <any(select ID from #T where Name=a.Name) 
select * from #T 
/* 
ID          Name Memo 
----------- ---- ---- 
3          A    A3 
5          B    B2 
(2 行受影响) 
*/ 
--3、删除重复记录没有大小关系时，处理重复值 
--> --> (Roy)生成測試數據 
if not object_id('Tempdb..#T') is null 
    drop table #T 
Go 
Create table #T([Num] int,[Name] nvarchar(1)) 
Insert #T 
select 1,N'A' union all 
select 1,N'A' union all 
select 1,N'A' union all 
select 2,N'B' union all 
select 2,N'B' 
Go 
方法1: 
if object_id('Tempdb..#') is not null 
    drop table # 
Select distinct * into # from #T--排除重复记录结果集生成临时表# 
truncate table #T--清空表 
insert #T select * from #    --把临时表#插入到表#T中 
--查看结果 
select * from #T 
/* 
Num        Name 
----------- ---- 
1          A 
2          B 
(2 行受影响) 
*/ 
--重新执行测试数据后用方法2 
方法2: 
alter table #T add ID int identity--新增标识列 
go 
delete a from  #T a where  exists(select 1 from #T where Num=a.Num and Name=a.Name and ID>a.ID)--只保留一条记录 
go 
alter table #T drop column ID--删除标识列 
--查看结果 
select * from #T 
/* 
Num        Name 
----------- ---- 
1          A 
2          B 
(2 行受影响) 
*/ 
--重新执行测试数据后用方法3 
方法3: 
declare Roy_Cursor cursor local for 
select count(1)-1,Num,Name from #T group by Num,Name having count(1)>1 
declare @con int,@Num int,@Name nvarchar(1) 
open Roy_Cursor 
fetch next from Roy_Cursor into @con,@Num,@Name 
while @@Fetch_status=0 
begin 
    set rowcount @con; 
    delete #T where Num=@Num and Name=@Name 
    set rowcount 0; 
    fetch next from Roy_Cursor into @con,@Num,@Name 
end 
close Roy_Cursor 
deallocate Roy_Cursor 
--查看结果 
select * from #T 
/* 
Num        Name 
----------- ---- 
1          A 
2          B 
(2 行受影响) 
--------------------------
--按某一字段分组取最大(小)值所在行的数据
(爱新觉罗.毓华 2007-10-23于浙江杭州)
/*
数据如下：
name val memo
a    2   a2(a的第二个值)
a    1   a1--a的第一个值
a    3   a3:a的第三个值
b    1   b1--b的第一个值
b    3   b3:b的第三个值
b    2   b2b2b2b2
b    4   b4b4
b    5   b5b5b5b5b5
*/
--创建表并插入数据：
create table tb(name varchar(10),val int,memo varchar(20))
insert into tb values('a',    2,   'a2(a的第二个值)')
insert into tb values('a',    1,   'a1--a的第一个值')
insert into tb values('a',    3,   'a3:a的第三个值')
insert into tb values('b',    1,   'b1--b的第一个值')
insert into tb values('b',    3,   'b3:b的第三个值')
insert into tb values('b',    2,   'b2b2b2b2')
insert into tb values('b',    4,   'b4b4')
insert into tb values('b',    5,   'b5b5b5b5b5')
go
--一、按name分组取val最大的值所在行的数据。
--方法1：
select a.* from tb a where val = (select max(val) from tb where name = a.name) order by a.name
--方法2：
select a.* from tb a where not exists(select 1 from tb where name = a.name and val > a.val)
--方法3：
select a.* from tb a,(select name,max(val) val from tb group by name) b where a.name = b.name and a.val = b.val order by a.name
--方法4：
select a.* from tb a inner join (select name , max(val) val from tb group by name) b on a.name = b.name and a.val = b.val order by a.name
--方法5
select a.* from tb a where 1 > (select count(*) from tb where name = a.name and val > a.val ) order by a.name
/*
name       val         memo                 
---------- ----------- -------------------- 
a          3           a3:a的第三个值
b          5           b5b5b5b5b5
*/
--二、按name分组取val最小的值所在行的数据。
--方法1：
select a.* from tb a where val = (select min(val) from tb where name = a.name) order by a.name
--方法2：
select a.* from tb a where not exists(select 1 from tb where name = a.name and val < a.val)
--方法3：
select a.* from tb a,(select name,min(val) val from tb group by name) b where a.name = b.name and a.val = b.val order by a.name
--方法4：
select a.* from tb a inner join (select name , min(val) val from tb group by name) b on a.name = b.name and a.val = b.val order by a.name
--方法5
select a.* from tb a where 1 > (select count(*) from tb where name = a.name and val < a.val) order by a.name
/*
name       val         memo                 
---------- ----------- -------------------- 
a          1           a1--a的第一个值
b          1           b1--b的第一个值
*/
--三、按name分组取第一次出现的行所在的数据。
select a.* from tb a where val = (select top 1 val from tb where name = a.name) order by a.name
/*
name       val         memo                 
---------- ----------- -------------------- 
a          2           a2(a的第二个值)
b          1           b1--b的第一个值
*/
--四、按name分组随机取一条数据。
select a.* from tb a where val = (select top 1 val from tb where name = a.name order by newid()) order by a.name
/*
name       val         memo                 
---------- ----------- -------------------- 
a          1           a1--a的第一个值
b          5           b5b5b5b5b5
*/
--五、按name分组取最小的两个(N个)val
select a.* from tb a where 2 > (select count(*) from tb where name = a.name and val < a.val ) order by a.name,a.val
select a.* from tb a where val in (select top 2 val from tb where name=a.name order by val) order by a.name,a.val
select a.* from tb a where exists (select count(*) from tb where name = a.name and val < a.val having Count(*) < 2) order by a.name
/*
name       val         memo                 
---------- ----------- -------------------- 
a          1           a1--a的第一个值
a          2           a2(a的第二个值)
b          1           b1--b的第一个值
b          2           b2b2b2b2
*/
--六、按name分组取最大的两个(N个)val
select a.* from tb a where 2 > (select count(*) from tb where name = a.name and val > a.val ) order by a.name,a.val
select a.* from tb a where val in (select top 2 val from tb where name=a.name order by val desc) order by a.name,a.val
select a.* from tb a where exists (select count(*) from tb where name = a.name and val > a.val having Count(*) < 2) order by a.name
/*
name       val         memo                 
---------- ----------- -------------------- 
a          2           a2(a的第二个值)
a          3           a3:a的第三个值
b          4           b4b4
b          5           b5b5b5b5b5
*/
--七，如果整行数据有重复，所有的列都相同。
/*
数据如下：
name val memo
a    2   a2(a的第二个值)
a    1   a1--a的第一个值
a    1   a1--a的第一个值
a    3   a3:a的第三个值
a    3   a3:a的第三个值
b    1   b1--b的第一个值
b    3   b3:b的第三个值
b    2   b2b2b2b2
b    4   b4b4
b    5   b5b5b5b5b5
*/
--在sql server 2000中只能用一个临时表来解决，生成一个自增列，先对val取最大或最小，然后再通过自增列来取数据。
--创建表并插入数据：
create table tb(name varchar(10),val int,memo varchar(20))
insert into tb values('a',    2,   'a2(a的第二个值)')
insert into tb values('a',    1,   'a1--a的第一个值')
insert into tb values('a',    1,   'a1--a的第一个值')
insert into tb values('a',    3,   'a3:a的第三个值')
insert into tb values('a',    3,   'a3:a的第三个值')
insert into tb values('b',    1,   'b1--b的第一个值')
insert into tb values('b',    3,   'b3:b的第三个值')
insert into tb values('b',    2,   'b2b2b2b2')
insert into tb values('b',    4,   'b4b4')
insert into tb values('b',    5,   'b5b5b5b5b5')
go
select * , px = identity(int,1,1) into tmp from tb
select m.name,m.val,m.memo from
(
  select t.* from tmp t where val = (select min(val) from tmp where name = t.name)
) m where px = (select min(px) from
(
  select t.* from tmp t where val = (select min(val) from tmp where name = t.name)
) n where n.name = m.name)
drop table tb,tmp
/*
name       val         memo
---------- ----------- --------------------
a          1           a1--a的第一个值
b          1           b1--b的第一个值
(2 行受影响)
*/
--在sql server 2005中可以使用row_number函数，不需要使用临时表。
--创建表并插入数据：
create table tb(name varchar(10),val int,memo varchar(20))
insert into tb values('a',    2,   'a2(a的第二个值)')
insert into tb values('a',    1,   'a1--a的第一个值')
insert into tb values('a',    1,   'a1--a的第一个值')
insert into tb values('a',    3,   'a3:a的第三个值')
insert into tb values('a',    3,   'a3:a的第三个值')
insert into tb values('b',    1,   'b1--b的第一个值')
insert into tb values('b',    3,   'b3:b的第三个值')
insert into tb values('b',    2,   'b2b2b2b2')
insert into tb values('b',    4,   'b4b4')
insert into tb values('b',    5,   'b5b5b5b5b5')
go
select m.name,m.val,m.memo from
(
  select * , px = row_number() over(order by name , val) from tb
) m where px = (select min(px) from
(
  select * , px = row_number() over(order by name , val) from tb
) n where n.name = m.name)
drop table tb
/*
name       val         memo
---------- ----------- --------------------
a          1           a1--a的第一个值
b          1           b1--b的第一个值
(2 行受影响)
*/
select * from @T t where userid=(select min(userid) from @T where left(loginname,3)=left(t.loginname,3))
----------------------------------------------------
--------------删除所有重复记录 
delete a where col1 in (select col1 from a group by col001 having count(*)>1) 
------------------全部重复 保留一条数据， 
select * into #b from a ---备份 
truncate table #b 
insert into a 
select disitnct  * from #b 
---------------某个字段重复，只保留一条数据库（用游标） 
-------------------------用游标过滤重复记录 
declare @aa varchar(60) 
declare www cursor for 
select h_name from join_product_ty_bf 
open www 
fetch next from www into @aa 
while(@@fetch_status=0) 
begin 
fetch next from www into @aa 
if exists 
(select h_name from join_product_ty_bf where h_name like @aa group by h_name  having count(*)>1) 
delete join_product_ty_bf where current of www 
end