I am trying to import CSV file into PostgreSQL using Python. I want to tell Python to only import the records/rows of data in CSV file which do not contain the duplicates (only the first unique builder record). I have attached the code I am using to find the duplicates in the CSV file. I am new to programming so please pardon my inexperience.
import csv
import psycopg2
database = psycopg2.connect (database = "***", user="***")
cursor = database.cursor()
delete = """Drop table if exists "Real".Noduplicates"""
print (delete)
mydata = cursor.execute(delete)
cursor.execute("""Create Table "Real".Noduplicates
(UserName varchar(55),
LastUpdate timestamp,
Week date,
Builder varchar(55),
Traffic integer
);""")
print "Table created successfully"
csv_data = csv.reader(file('SampleData2.csv'))
already_seen = set()
next(csv_data)
for row in csv_data:
builder = row[3]
if builder in already_seen:
print('{} is a duplicate builder'.format(builder))
else:
print('{} is a new builder'.format(builder))
already_seen.add(builder)
for row in csv_data:
cursor.execute("""INSERT INTO "Real".Noduplicates (UserName, LastUpdate, Week, Builder, Traffic)"""\
"""VALUES (%s,%s,%s,%s,%s)""",
row)
cursor.close()
database.commit()
database.close()
print "CSV Imported"