I have to Read and import data into MYSQL from given JSON file (50 MB size). I have tried using Python and failed (getting error). I can't figure out how to import JSON file to MYSQL in most efficient way. PLEASE help me out.
What I have done so far is :
import os
import json
import MySQLdb
# read JSON file which is in the next parent folder
file = 'vehicle.json'
json_data = open(file).read()
json_obj = json.loads(json_data)
#print(json_obj)
# do validation and checks before insert
def validate_string(val):
if val is not None:
if type(val) is int:
# for x in val:
# print(x)
return str(val).encode('utf-8')
else:
return val
# connect to MySQL
con = MySQLdb.connect(host="127.0.0.1", user="root", passwd="PASSWORD", db="vehicle", port=3306)
cursor = con.cursor()
cursor.execute("DROP TABLE IF EXISTS vehicle")
sql1 = """
CREATE TABLE vehicle (
CarYear int(4) ,
make varchar(15),
model varchar(30),
cylinders varchar(2),
VClass varchar(30),
drive varchar(30),
trany varchar(30),
displ varchar(20),
eng_dscr varchar(20),
trans_dscr varchar(30),
mpgData varchar(5),
evMotor varchar(10),
youSaveSpend varchar(20),
fuleType varchar(20),
fuleType1 varchar(30),
barrelsA08 varchar(2),
charge120 varchar(2),
charge240 varchar(2),
city08 varchar(4),
city08U varchar(2),
cityA08 varchar(2),
cityA08U varchar(2),
cityCD varchar(2),
cityE varchar(2),
cityUF varchar(2),
co2 varchar(2),
coA2 varchar(2),
co2TailpipeAGpm varchar(2),
co2TailpipeGpm varchar(20),
comb08 varchar(3),
comb08U varchar(2),
combA08 varchar(2),
combA08U varchar(2),
combE varchar(2),
combinedCD varchar(2),
combinedUF varchar(2),
engld varchar(8),
feScore varchar(3),
fuelCost08 varchar(5),
fuelCostA08 varchar(2),
ghgScore varchar(2),
ghgScoreA varchar(2),
highway08 varchar(4),
highway08U varchar(2),
highwayA08 varchar(2),
highwayA08U varchar(2),
highwayCD varchar(2),
highwayE varchar(2),
highwayUF varchar(2),
hlv varchar(2),
hpv varchar(2),
id varchar(8),
lv2 varchar(2),
lv4 varchar(2),
phevBlended varchar(10),
pv2 varchar(5),
pv4 varchar(5),
CarRange varchar(5),
rangeCity varchar(5),
rangeCityA varchar(5),
rangeHwy varchar(5),
rangeHwyA varchar(5),
UCity varchar(5),
UCityA varchar(5),
UHighway varchar(5),
UHighwayA varchar(5),
guzzler varchar(10),
tCharger varchar(10),
sCharger varchar(10),
atvType varchar(10),
fuelType2 varchar(10),
rangeA varchar(10),
mfrCode varchar(10),
c240Dscr varchar(10),
charge240b varchar(5),
c240bDscr varchar(10),
createdOn varchar(30),
modifiedOn varchar(30),
startStop varchar(10),
phevCity varchar(5),
phevHwy varchar(5),
phevComb varchar(5)
)"""
cursor.execute(sql1)
# parse json data to SQL insert
for i, item in enumerate(json_obj):
CarYear = validate_string(item.get("year", None))
make = validate_string(item.get("make", None))
model = validate_string(item.get("model", None))
cylinders = validate_string(item.get("cylinders", None))
VClass = validate_string(item.get("VClass", None))
drive = validate_string(item.get("drive", None))
trany = validate_string(item.get("trany", None))
displ = validate_string(item.get("displ", None))
eng_dscr = validate_string(item.get("eng_dscr", None))
trans_dscr = validate_string(item.get("trans_dscr", None))
mpgData = validate_string(item.get("mpgData", None))
evMotor = validate_string(item.get("evMotor", None))
youSaveSpend = validate_string(item.get("youSaveSpend", None))
fuleType = validate_string(item.get("fuleType", None))
fuleType1 = validate_string(item.get("fuleType1", None))
barrelsA08 = validate_string(item.get("barrelsA08", None))
charge120 = validate_string(item.get("charge120", None))
charge240 = validate_string(item.get("charge240", None))
city08 = validate_string(item.get("city08", None))
city08U = validate_string(item.get("city08U", None))
cityA08 = validate_string(item.get("cityA08", None))
cityA08U = validate_string(item.get("cityA08U", None))
cityCD = validate_string(item.get("cityCD", None))
cityE = validate_string(item.get("cityE", None))
cityUF = validate_string(item.get("cityUF", None))
co2 = validate_string(item.get("co2", None))
coA2 = validate_string(item.get("coA2", None))
co2TailpipeAGpm = validate_string(item.get("co2TailpipeAGpm", None))
co2TailpipeGpm = validate_string(item.get("co2TailpipeGpm", None))
comb08 = validate_string(item.get("comb08", None))
comb08U = validate_string(item.get("comb08U", None))
combA08 = validate_string(item.get("combA08", None))
combA08U = validate_string(item.get("combA08U", None))
combE = validate_string(item.get("combE", None))
combinedCD = validate_string(item.get("combinedCD", None))
combinedUF = validate_string(item.get("combinedUF", None))
engld = validate_string(item.get("engld", None))
feScore = validate_string(item.get("feScore", None))
fuelCost08 = validate_string(item.get("fuelCost08", None))
fuelCostA08 = validate_string(item.get("fuelCostA08", None))
ghgScore = validate_string(item.get("ghgScore", None))
ghgScoreA = validate_string(item.get("ghgScoreA", None))
highway08 = validate_string(item.get("highway08", None))
highway08U = validate_string(item.get("highway08U", None))
highwayA08 = validate_string(item.get("highwayA08", None))
highwayA08U = validate_string(item.get("highwayA08U", None))
highwayCD = validate_string(item.get("highwayCD", None))
highwayE = validate_string(item.get("highwayE", None))
highwayUF = validate_string(item.get("highwayUF", None))
hlv = validate_string(item.get("hlv", None))
hpv = validate_string(item.get("hpv", None))
lv2 = validate_string(item.get("lv2", None))
lv4 = validate_string(item.get("lv4", None))
phevBlended = validate_string(item.get("phevBlended", None))
pv2 = validate_string(item.get("pv2", None))
pv4 = validate_string(item.get("pv4", None))
CarRange = validate_string(item.get("CarRange", None))
rangeCity = validate_string(item.get("rangeCity", None))
rangeCityA = validate_string(item.get("rangeCityA", None))
rangeHwy = validate_string(item.get("rangeHwy", None))
rangeHwyA = validate_string(item.get("rangeHwyA", None))
UCity = validate_string(item.get("UCity", None))
UCityA = validate_string(item.get("UCityA", None))
UHighway = validate_string(item.get("UHighway", None))
UHighwayA = validate_string(item.get("UHighwayA", None))
guzzler = validate_string(item.get("guzzler", None))
tCharger = validate_string(item.get("tCharger", None))
sCharger = validate_string(item.get("sCharger", None))
atvType = validate_string(item.get("atvType", None))
fuelType2 = validate_string(item.get("fuelType2", None))
rangeA = validate_string(item.get("rangeA", None))
mfrCode = validate_string(item.get("mfrCode", None))
c240Dscr = validate_string(item.get("c240Dscr", None))
charge240b = validate_string(item.get("charge240b", None))
c240bDscr = validate_string(item.get("c240bDscr", None))
createdOn = validate_string(item.get("createdOn", None))
modifiedOn = validate_string(item.get("modifiedOn", None))
startStop = validate_string(item.get("startStop", None))
phevCity = validate_string(item.get("phevCity", None))
phevHwy = validate_string(item.get("phevHwy", None))
phevComb = validate_string(item.get("phevComb", None))
sql2 = """INSERT INTO vehicle (CarYear,make,model,cylinders,VClass,drive,trany,displ,eng_dscr,trans_dscr,mpgData,evMotor,
youSaveSpend,fuleType,fuleType1,barrelsA08,charge120,charge240,city08,city08U,cityA08,cityA08U,cityCD,cityE,cityUF,co2,coA2,co2TailpipeAGpm,
co2TailpipeGpm,comb08,comb08U,combA08,combA08U,combE,combinedCD,combinedUF,engld,feScore,fuelCost08,fuelCostA08,ghgScore,ghgScoreA,highway08,
highway08U,highwayA08,highwayA08U,highwayCD,highwayE,highwayUF,hlv,hpv,id,lv2,lv4,phevBlended,pv2,pv4,CarRange,rangeCity,rangeCityA,rangeHwy,
rangeHwyA,UCity,UCityA,UHighway,UHighwayA,guzzler,tCharger,sCharger,atvType,fuelType2,rangeA,mfrCode,c240Dscr,charge240b,c240bDscr,createdOn,
modifiedOn,startStop,phevCity,phevHwy,phevComb)
VALUES (%d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s), (CarYear,make,model,cylinders,VClass,drive,trany,displ,eng_dscr,trans_dscr,mpgData,evMotor,
youSaveSpend,fuleType,fuleType1,barrelsA08,charge120,charge240,city08,city08U,cityA08,cityA08U,cityCD,cityE,cityUF,co2,coA2,co2TailpipeAGpm,
co2TailpipeGpm,comb08,comb08U,combA08,combA08U,combE,combinedCD,combinedUF,engld,feScore,fuelCost08,fuelCostA08,ghgScore,ghgScoreA,highway08,
highway08U,highwayA08,highwayA08U,highwayCD,highwayE,highwayUF,hlv,hpv,id,lv2,lv4,phevBlended,pv2,pv4,CarRange,rangeCity,rangeCityA,rangeHwy,
rangeHwyA,UCity,UCityA,UHighway,UHighwayA,guzzler,tCharger,sCharger,atvType,fuelType2,rangeA,mfrCode,c240Dscr,charge240b,c240bDscr,createdOn,
modifiedOn,startStop,phevCity,phevHwy,phevComb)"""
cursor.execute(sql2)
con.commit()
# print(CarYear)
con.close()
And I'm getting this error :
Traceback (most recent call last): File "C:/Users/risha/PycharmProjects/Test/JSON_TEST.py", line 209, in cursor.execute(sql2) File "C:\Users\risha\AppData\Local\Programs\Python\Python36\lib\site-packages\MySQLdb\cursors.py", line 250, in execute self.errorhandler(self, exc, value) File "C:\Users\risha\AppData\Local\Programs\Python\Python36\lib\site-packages\MySQLdb\connections.py", line 50, in defaulterrorhandler raise errorvalue File "C:\Users\risha\AppData\Local\Programs\Python\Python36\lib\site-packages\MySQLdb\cursors.py", line 247, in execute res = self._query(query) File "C:\Users\risha\AppData\Local\Programs\Python\Python36\lib\site-packages\MySQLdb\cursors.py", line 411, in _query rowcount = self._do_query(q) File "C:\Users\risha\AppData\Local\Programs\Python\Python36\lib\site-packages\MySQLdb\cursors.py", line 374, in _do_query db.query(q) File "C:\Users\risha\AppData\Local\Programs\Python\Python36\lib\site-packages\MySQLdb\connections.py", line 277, in query _mysql.connection.query(self, query) _mysql_exceptions.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '%d,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s' at line 7")
Process finished with exit code 1
You can suggest any other method also. I have also created Database for same and by hard coding it is working fine but here it is showing this above error.
Any help would be appreciable.