➠ Users cannot remove exact duplicates using row_number function(or any other function) in Teradata.
➠ Exact duplicates from 1 table can only be removed using other temporary table.
➠ There can be 2 types of duplicates present in Teradata tables.
Example: Complete row duplicate
id name subject marks
---- ------- -------- -----
123 Harry english 90
123 Harry english 90
Example: Key Column duplicate
id name subject marks
---- ------- -------- -----
123 Harry english 90
123 Harry maths 95
CREATE MULTISET TABLE student
(
id INTEGER,
name VARCHAR(100),
subject VARCHAR(100),
marks INTEGER
)primary index(id,name);
CREATE TABLE student_new AS (SELECT DISTINCT * FROM student) WITH DATA AND STATS;
DROP TABLE student;
RENAME TABLE student_new TO student;
CREATE SET TABLE student_set
(
id INTEGER,
name VARCHAR(100),
subject VARCHAR(100),
marks INTEGER
)primary index(id,name);
INSERT INTO student_set SELECT * FROM student;
DELETE FROM student;
INSERT INTO student SELECT * FROM student_set;
DROP TABLE student_set;
CREATE MULTISET TABLE student_temp
(
id INTEGER,
name VARCHAR(100),
subject VARCHAR(100),
marks INTEGER
)primary index(id,name);
INSERT INTO student_temp SELECT * FROM student QUALIFY row_number() over (partition by id,name,subject,marks order by id)=1;
DELETE FROM student;
INSERT INTO student SELECT * FROM student_temp;
DROP TABLE student_temp;
CREATE MULTISET TABLE student_temp
(
id INTEGER,
name VARCHAR(100),
subject VARCHAR(100),
marks INTEGER
)primary index(id,name);
INSERT INTO student_temp SELECT * FROM student QUALIFY row_number() over (partition by id,name order by marks desc)=1;
DELETE FROM student;
INSERT INTO student SELECT * FROM student_temp;
DROP TABLE student_temp;
SELECT * FROM student QUALIFY count() over (partition by id,name )>1;