86

I am querying a SQL database and I want to use pandas to process the data. However, I am not sure how to move the data. Below is my input and output.

import pyodbc
import pandas
from pandas import DataFrame

cnxn = pyodbc.connect(r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:\users\bartogre\desktop\CorpRentalPivot1.accdb;UID="";PWD="";')
crsr = cnxn.cursor()
for table_name in crsr.tables(tableType='TABLE'):
    print(table_name)
cursor = cnxn.cursor()
sql = "Select sum(CYTM), sum(PYTM), BRAND From data Group By BRAND"
cursor.execute(sql)
for data in cursor.fetchall():
    print (data)

('C:\\users\\bartogre\\desktop\\CorpRentalPivot1.accdb', None, 'Data', 'TABLE', None)
('C:\\users\\bartogre\\desktop\\CorpRentalPivot1.accdb', None, 'SFDB', 'TABLE', None)
(Decimal('78071898.71'), Decimal('82192672.29'), 'A')
(Decimal('12120663.79'), Decimal('13278814.52'), 'B')
Vincent
  • 22,366
  • 18
  • 58
  • 61
polonius11
  • 1,703
  • 5
  • 15
  • 23

3 Answers3

194

A shorter and more concise answer

import pyodbc
import pandas as pd

cnxn = pyodbc.connect(r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'
                      r'DBQ=C:\users\bartogre\desktop\data.mdb;')
sql = "Select sum(CYTM), sum(PYTM), BRAND From data Group By BRAND"
data = pd.read_sql(sql,cnxn)  # without parameters [non-prepared statement]

# with a prepared statement, use list/tuple/dictionary of parameters depending on DB
#data = pd.read_sql(sql=sql, con=cnxn, params=query_params) 
Trenton McKinney
  • 56,955
  • 33
  • 144
  • 158
Andres
  • 2,413
  • 1
  • 13
  • 18
  • This approach needs to be modified: https://stackoverflow.com/questions/71082494/getting-a-warning-when-using-a-pyodbc-connection-object-with-pandas – Peej1226 Nov 21 '22 at 14:26
15

I was way over thinking this one!

cnxn = pyodbc.connect(r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:\users\bartogre\desktop\CorpRentalPivot1.accdb;UID="";PWD="";')
crsr = cnxn.cursor()
for table_name in crsr.tables(tableType='TABLE'):
    print(table_name)
cursor = cnxn.cursor()
sql = "Select sum(CYTM), sum(PYTM), BRAND From data Group By BRAND"
cursor.execute(sql)
data = cursor.fetchall()
print(data)
Data = pandas.DataFrame(data)
print(Data)
polonius11
  • 1,703
  • 5
  • 15
  • 23
  • 19
    Doesn't this just spit everything into one column?! – AER Nov 08 '19 at 05:41
  • 6
    @AER Yes, but this seems to fix it: `Data = pandas.DataFrame.from_records(cursor.fetchall(), columns=[col[0] for col in cursor.description])` – Bungler May 27 '22 at 17:43
12

Another, faster method. Please see data = pd.read_sql(sql, cnxn)

import pyodbc
import pandas as pd
from pandas import DataFrame
from pandas.tools import plotting
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

cnxn = pyodbc.connect(r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)}; DBQ=C:\users\bartogre\desktop\data.mdb;UID="";PWD="";')
crsr = cnxn.cursor()
for table_name in crsr.tables(tableType='TABLE'):
    print(table_name)
cursor = cnxn.cursor()
sql = "Select *"
sql = sql + " From data"
print(sql)
cursor.execute(sql)
data = pd.read_sql(sql, cnxn)
polonius11
  • 1,703
  • 5
  • 15
  • 23