I have a sql db with columns [Record_ID](Primary Key) ,[Source] ,[Entity] (Can be null) ,[Last_Name] (Can be null) ,[Given_Name] (Can be null) ,[Aliases] (Can be null) ,[Date_Of_Birth] (Can be null). These values come from 3 different XML files. I parse these files in python and add in DB. These 3 XMLs are coming from website. If these files are updated I need to update my db with new values without duplicates. How can I do that ?
def readXML(sql_server, database, sql_user_name, sql_password, sql_driver):
print("Start")
url = 'somewebsite.com'
resp = requests.get(url)
soup = BeautifulSoup(resp.content, "xml")
recordData = soup.findAll('record')
now = datetime.datetime.now()
sql_insert_in_table = """
INSERT INTO myTable (Source, Given_Name, Last_Name, Date_Of_Birth, Aliases,Entity,Date_Added,Added_by) values (?,?,?,?,?,?,?,?)
"""
params = []
for child in recordData:
firstName = child.find('GivenName').text if child.find('GivenName')!=None else "N/A"
lastName = child.find('LastName').text if child.find('LastName')!=None else "N/A"
DoB = child.find('DateOfBirth').text if child.find('DateOfBirth')!=None else "N/A"
entity= child.find('Entity').text if child.find('Entity')!=None else "N/A"
aliases = child.find('Aliases').text if child.find('Aliases')!=None else "N/A"
params.append((source, firstName, lastName,
DoB, aliases, entity, now.date(), "Admin"))
exec_sql_query (sql_insert_in_table, params, sql_server, database, sql_user_name, sql_password, sql_driver)
def exec_sql_query(query, params, sql_server, database, sql_user_name, sql_password, sql_driver):
try:
with pyodbc.connect('DRIVER='+sql_driver+';SERVER=tcp:'+sql_server+';PORT=1433;DATABASE='+database+';UID='+sql_user_name+';PWD=' + sql_password) as conn:
with conn.cursor() as cursor:
conn.autocommit = True
cursor.executemany(query, params)
except pyodbc.Error as e:
logging.error(f"SQL query failed: {e}")
raise