Assuming that the key values in the list "lst" (e.g. "person_id" etc.) are always present, you just need to modify the complex list into normalized lists for this 3 pieces of tables:
lst = [
{
"person_id": "HZT998", "name": "john", "skills": ["python", "sql", "r"], "extras": {"likes_swimming": False, "likes_cooking": True}
},
{
"person_id": "HTY954", "name": "peter", "skills": ["python", "r", "c#"], "extras": {"likes_swimming": True, "likes_cooking": False}
}
]
data_for_first_table = [] # saves each user dictonary without 'skills'
data_dict_first_table = {} # user-dictonary-instance without 'skills'
data_for_second_table = [] # save each 'skill' separately
data_for_third_table = [] # save each skill-user dictonary matches
data_dict_third_table = {} # skill-user-dictonary-instance
######################################
# modify complex List for SQL-Qeruies
######################################
for entry in lst:
for key in entry:
if key == "skills":
for skills in entry[key]:
#print(skills)
data_for_second_table.append(skills)
# Third Table - "Match Table"
data_dict_third_table[skills] = entry["person_id"]
elif key == "extras":
for extra in entry[key]:
#print(extra + ": " + str(entry[key][extra]))
data_dict_first_table[extra] = entry[key][extra]
else:
#print(key + ": " + entry[key])
data_dict_first_table[key] = entry[key]
# store user-dictonary-instance without 'skills'
dict_copy = data_dict_first_table.copy()
data_for_first_table.append(dict_copy)
# store skill-user-dictonary-instance
dict_copy = data_dict_third_table.copy()
data_for_third_table.append(dict_copy)
### remove duplicates from 2nd list ###
data_for_second_table = list(dict.fromkeys(data_for_second_table))
print("####### TEST1 #########")
print(data_for_first_table)
print("####### TEST2 #########")
print(data_for_second_table)
print("####### TEST3 #########")
print(data_for_third_table)
... and to avoid duplicate data-entries (e.g. if you get the same file twice) set People.person_id and Skills.skill to primary-key.
Then you just need to make your SQL-queries:
######################
# Create SQL-Queries
######################
print("/* #### SQL-TEST #### */")
# INSERT(S)/UPDATE(S) for "Peaople"-Table
for entry in data_for_first_table:
raw_sql1 = "INSERT INTO People ( " + str([k for k in entry]).replace("[","").replace("]","").replace("'","") + " ) VALUES ( " + str([str(entry[k]) for k in entry]).replace("[","").replace("]","") + " ) ON DUPLICATE KEY UPDATE " + str([ k + " = " + str(entry[k]) for k in entry if k != 'person_id'] ).replace("[","").replace("]","").replace("'","").replace("= ","= '").replace(",","',") + "';"
print(raw_sql1)
# ONLY INSERT(S) for "Skills"-Table
for entry in data_for_second_table:
raw_sql2 = "INSERT IGNORE INTO Skills ( skill ) VALUES ( '" + entry + "' );"
print(raw_sql2)
# ONLY INSERT(S) for "Skills_People"-Table
for entry in data_for_third_table:
for multiple_entry in entry:
raw_sql3= "INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( '" + entry[multiple_entry] + "', '" + multiple_entry + "' );"
print(raw_sql3)
Note: This only works if the table-column-names exactly match the list-key-names.
In addition, the keys and values (Type = 'CHAR') were modified with simple Python-string-operations ".replace()" so that a valid SQL query is generated.
/* #### SQL-TEST #### */
INSERT INTO People ( person_id, name, likes_swimming, likes_cooking ) VALUES ( 'HZT998', 'john', 'False', 'True' ) ON DUPLICATE KEY UPDATE name = 'john', likes_swimming = 'False', likes_cooking = 'True';
INSERT INTO People ( person_id, name, likes_swimming, likes_cooking ) VALUES ( 'HTY954', 'peter', 'True', 'False' ) ON DUPLICATE KEY UPDATE name = 'peter', likes_swimming = 'True', likes_cooking = 'False';
INSERT IGNORE INTO Skills ( skill ) VALUES ( 'python' );
INSERT IGNORE INTO Skills ( skill ) VALUES ( 'sql' );
INSERT IGNORE INTO Skills ( skill ) VALUES ( 'r' );
INSERT IGNORE INTO Skills ( skill ) VALUES ( 'c#' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HZT998', 'python' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HZT998', 'sql' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HZT998', 'r' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HTY954', 'python' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HZT998', 'sql' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HTY954', 'r' );
INSERT IGNORE INTO Skills_People ( person_id, skill_id ) VALUES ( 'HTY954', 'c#' );