T-SQL Deduplicator


/*
T-SQL DE-DUPLICATER
NOTE THAT THE CURSOR SELECT STATEMENT SELECTS *ALL* THE DUPLICATES
THE CURSOR THEN GIVES YOU ALL THE HIGHEST IDs, i.e. ALL THE LATEST - AND LET's YOU DO SOME ACTION ON THESE
TO SELECT ALL THE OLDEST SIMPLE SWITCH THE IF STATEMENT AROUND
ALSO NOTE THAT YOUR ACTION SHOULD OF-COURSE AFFECT THE CURSOR SELECT STATEMENT
*/
DECLARE @ID INT
DECLARE @ID2 INT

DECLARE DEDUPE_CURSOR CURSOR READ_ONLY
FOR SELECT
T1.[Id], T2.Id AS Id2
FROM [PersonSkill] T1
INNER JOIN [PersonSkill] T2
ON
ISNULL(T1.Deleted,1) = ISNULL(T2.Deleted,1)
AND T1.Skill = T2.Skill
AND T1.Person = T2.Person
AND T1.Id T2.Id
ORDER BY 1

/*
FOR SELECT TABLE_NAME, TABLE_SCHEMA
FROM INFORMATION_SCHEMA.TABLES
*/

OPEN DEDUPE_CURSOR

FETCH NEXT FROM DEDUPE_CURSOR INTO @ID, @ID2
WHILE (@@fetch_status -1)
BEGIN
IF (@@fetch_status -2)
BEGIN

IF @ID2 > @ID BEGIN
PRINT CONVERT(NVARCHAR, @ID2)
--UPDATE [PersonSkill] SET Deleted = GetDate(), DeletedBy = 1 WHERE Id = @ID2
--PRINT 'SOFT DELETED ' + CONVERT(NVARCHAR, @ID2)
END

END
FETCH NEXT FROM DEDUPE_CURSOR INTO @ID, @ID2
END

CLOSE DEDUPE_CURSOR

DEALLOCATE DEDUPE_CURSOR

Advertisements

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: