I'm trying to create a PostgreSQL table of 30-minute data for the S&P 500 ETF (spy30new, for testing freshly inserted data) from a table of several stocks with 15-minute data (all15). all15 has an index on 'dt' (timestamp) and 'instr' (stock symbol). I would like spy30new to have an index on 'dt'.
import numpy as np
import pandas as pd
from datetime import datetime, date, time, timedelta
from dateutil import parser
from sqlalchemy import create_engine
# Query all15
engine = create_engine('postgresql://user:passwd@localhost:5432/stocks')
new15Df = (pd.read_sql_query("SELECT dt, o, h, l, c, v FROM all15 WHERE (instr = 'SPY') AND (date(dt) BETWEEN '2016-06-27' AND '2016-07-15');", engine)).sort_values('dt')
# Correct for Time Zone.
new15Df['dt'] = (new15Df['dt'].copy()).apply(lambda d: d + timedelta(hours=-4))
# spy0030Df contains the 15-minute data at 00 & 30 minute time points
# spy1545Df contains the 15-minute data at 15 & 45 minute time points
spy0030Df = (new15Df[new15Df['dt'].apply(lambda d: d.minute % 30) == 0]).reset_index(drop=True)
spy1545Df = (new15Df[new15Df['dt'].apply(lambda d: d.minute % 30) == 15]).reset_index(drop=True)
high = pd.concat([spy1545Df['h'], spy0030Df['h']], axis=1).max(axis=1)
low = pd.concat([spy1545Df['l'], spy0030Df['l']], axis=1).min(axis=1)
volume = spy1545Df['v'] + spy0030Df['v']
# spy30Df assembled and pushed to PostgreSQL as table spy30new
spy30Df = pd.concat([spy0030Df['dt'], spy1545Df['o'], high, low, spy0030Df['c'], volume], ignore_index = True, axis=1)
spy30Df.columns = ['d', 'o', 'h', 'l', 'c', 'v']
spy30Df.set_index(['dt'], inplace=True)
spy30Df.to_sql('spy30new', engine, if_exists='append', index_label='dt')
This gives the error "ValueError: Cannot cast DatetimeIndex to dtype datetime64[us]"
What I've tried so far (I have successfully pushed CSV files to PG using pandas. But here the source is a PG database):
Not placing an index on
'dt'
spy30Df.set_index(['dt'], inplace=True) # Remove this line spy30Df.to_sql('spy30new', engine, if_exists='append') # Delete the index_label option
Converting 'dt' from type pandas.tslib.Timestamp to datetime.datetime using
to_pydatetime()
(in case psycopg2 can work with python dt, but not pandas Timestamp)u = (spy0030Df['dt']).tolist() timesAsPyDt = np.asarray(map((lambda d: d.to_pydatetime()), u)) spy30Df = pd.concat([spy1545Df['o'], high, low, spy0030Df['c'], volume], ignore_index = True, axis=1) newArray = np.c_[timesAsPyDt, spy30Df.values] colNames = ['dt', 'o', 'h', 'l', 'c', 'v'] newDf = pd.DataFrame(newArray, columns=colNames) newDf.set_index(['dt'], inplace=True) newDf.to_sql('spy30new', engine, if_exists='append', index_label='dt')
Using
datetime.utcfromtimestamp()
timesAsDt = (spy0030Df['dt']).apply(lambda d: datetime.utcfromtimestamp(d.tolist()/1e9))
Using
pd.to_datetime()
timesAsDt = pd.to_datetime(spy0030Df['dt'])