Some time we need to remove duplicate records from tables on different scenarios like
Remove duplicate column when all column are duplicate .
Remove duplicate column when table have a UniqueID with identity and other fields are duplicate.
Remove duplicate column when all column are duplicate .
Remove duplicate column when table have a UniqueID with identity and other fields are duplicate.
I am sharing a article, which show you how to remove duplicate records in sql server.
Remove duplicate column when all column are duplicate
--------------------------------------------------------------------------------------------------------------------
--Create a table Customer
Create table Customer (id int, name varchar (100))
--Insert data into customer table with duplicate id and name INSERT INTO Customer VALUES (1, 'John')
INSERT INTO Customer VALUES (1, 'John')
INSERT INTO Customer VALUES (2, 'Tom')
INSERT INTO Customer VALUES (3, 'Mann')
INSERT INTO Customer VALUES (4, 'Honey')
INSERT INTO Customer VALUES (5, 'Tang')
INSERT INTO Customer VALUES (5, 'Tang')
-- Query 1 - Run first CTE query with row number
WITH duplicateRecord as
(
SELECT id, name, ROW_NUMBER() OVER (ORDER BY id, name) AS rownum
FROM Customer
)
DELETE duplicateRecord WHERE rownum NOT IN
(SELECT MIN(rownum) FROM duplicateRecord GROUP BY id, name)
--Result (2 row(s) affected)
Select * from Customer
--Result
--1 John
--2 Tom
--3 Mann
--4 Honey
--5 Tang
-- Query 2 -Run second CTE query with row number
WITH duplicateRecord (id, name,rownum) as
(
SELECT id, name, ROW_NUMBER() OVER (PARTITION BY id, name ORDER BY id, name) AS rownum
FROM Customer
)
DELETE FROM duplicateRecord
WHERE rownum > 1
--Result (2 row(s) affected)
Select * from Customer
--Result
--1 John
--2 Tom
--3 Mann
--4 Honey
--5 Tang
Remove duplicate column when table have a UniqueID with identity and other fields are duplicate
--------------------------------------------------------------------------------------------------------------------
-- Create a table Student
Create table Student (id int identity(1,1), name varchar (100),Address varchar(200))
-- Insert data into Student table with duplicate name only
INSERT INTO Student VALUES ('John','101 New York')
INSERT INTO Student VALUES ('John','110 New York')
INSERT INTO Student VALUES ('Tom','105 New York')
INSERT INTO Student VALUES ('Mann','104 New York')
INSERT INTO Student VALUES ('Honey','105 New York')
INSERT INTO Student VALUES ('Tang','9 New York')
INSERT INTO Student VALUES ('Tang','90 New York')
Select * from Student
-- Result
--1 John 101 New York
--2 John 110 New York -Duplicate
--3 Tom 105 New York
--4 Mann 104 New York
--5 Honey 105 New York
--6 Tang 9 New York
--7 Tang 90 New York -Duplicate
-- Query 1 - Run first CTE query with row number and delete latest duplicate record
WITH rowNumber as
(
SELECT name, ROW_NUMBER() OVER (ORDER BY name) AS rowNumber
FROM Student
)
DELETE rowNumber WHERE rowNumber NOT IN
(SELECT MIN(rowNumber) FROM rowNumber GROUP BY name)
-- Result
--1 John 101 New York
--3 Tom 105 New York
--4 Mann 104 New York
--5 Honey 105 New York
--6 Tang 9 New York
-- Use MAX(rowNumber) if you want delete old duplicate record
-- Query 2 -Run second CTE query with row number and delete latest duplicate record
WITH duplicateRecord (name,duplicateRecord) as
(
SELECT name, ROW_NUMBER() OVER (PARTITION BY name ORDER BY name) AS duplicate
FROM Student
)
DELETE FROM duplicateRecord WHERE duplicateRecord > 1
--Result
--1 John 101 New York
--3 Tom 105 New York
--4 Mann 104 New York
--5 Honey 105 New York
--6 Tang 9 New York
-- Query 3 -This query will delete old duplicate record and keep the latest one.
-- Delete old record John and Tang.
-- 1 John 101 New York
-- 6 Tang 9 New York
DELETE FROM Student WHERE id NOT IN ( SELECT MAX(id) FROM Student GROUP BY Name )
-- Query 4 - This query will delete latest one and keep the old records.
-- Delete latest record of John and Tang.
--2 John 110 New York
--7 Tang 90 New York
DELETE FROM Student WHERE id NOT IN ( SELECT MIN(id) FROM Student GROUP BY Name )
Comments
Post a Comment