Having very poor performance when calculating a slope based on a date. I am trying to calculate it by having a rolling function on dataframe and invoking my function. The dataframe's index is set to a date.
I already tried using Dask for hopeful performance boost, but with no luck.
Basic dataframe (bare in mind, I would go back till 2010, this is just example):
{
"schema": {
"fields": [
{
"name": "index",
"type": "integer"
},
{
"name": "id",
"type": "integer"
},
{
"name": "stampDate",
"type": "string"
},
{
"name": "some_value",
"type": "number"
},
{
"name": "slope",
"type": "number"
}
],
"primaryKey": [
"index"
],
"pandas_version": "0.20.0"
},
"data": [
{
"index": 0,
"id": 131125,
"stampDate": "2019-04-05",
"some_value": 4.3894552243,
"slope": null
},
{
"index": 1,
"id": 131125,
"stampDate": "2019-04-08",
"some_value": 4.3905985407,
"slope": null
},
{
"index": 2,
"id": 131125,
"stampDate": "2019-04-09",
"some_value": 4.3916080856,
"slope": null
},
{
"index": 3,
"id": 131125,
"stampDate": "2019-04-10",
"some_value": 4.3924879736,
"slope": null
},
{
"index": 4,
"id": 131125,
"stampDate": "2019-04-11",
"some_value": 4.3931915741,
"slope": null
},
{
"index": 5,
"id": 131125,
"stampDate": "2019-04-12",
"some_value": 4.3937353197,
"slope": null
},
{
"index": 6,
"id": 131125,
"stampDate": "2019-04-15",
"some_value": 4.3941392301,
"slope": null
},
{
"index": 7,
"id": 131125,
"stampDate": "2019-04-16",
"some_value": 4.3944615003,
"slope": null
},
{
"index": 8,
"id": 131125,
"stampDate": "2019-04-17",
"some_value": 4.3947207227,
"slope": null
},
{
"index": 9,
"id": 131125,
"stampDate": "2019-04-18",
"some_value": 4.3949305203,
"slope": null
},
{
"index": 10,
"id": 131125,
"stampDate": "2019-04-19",
"some_value": 4.3951279366,
"slope": null
},
{
"index": 11,
"id": 131125,
"stampDate": "2019-04-22",
"some_value": 4.3953253139,
"slope": null
},
{
"index": 12,
"id": 131125,
"stampDate": "2019-04-23",
"some_value": 4.3955362179,
"slope": null
},
{
"index": 13,
"id": 131125,
"stampDate": "2019-04-24",
"some_value": 4.3957865316,
"slope": null
},
{
"index": 14,
"id": 131125,
"stampDate": "2019-04-25",
"some_value": 4.3961698956,
"slope": null
},
{
"index": 15,
"id": 131125,
"stampDate": "2019-04-26",
"some_value": 4.3967600627,
"slope": null
},
{
"index": 16,
"id": 131125,
"stampDate": "2019-04-29",
"some_value": 4.3975283597,
"slope": null
},
{
"index": 17,
"id": 131125,
"stampDate": "2019-04-30",
"some_value": 4.398422731,
"slope": null
},
{
"index": 18,
"id": 131125,
"stampDate": "2019-05-01",
"some_value": 4.3994686347,
"slope": null
},
{
"index": 19,
"id": 131125,
"stampDate": "2019-05-02",
"some_value": 4.4007661971,
"slope": null
},
{
"index": 20,
"id": 131125,
"stampDate": "2019-05-03",
"some_value": 4.4023364863,
"slope": null
},
{
"index": 21,
"id": 131125,
"stampDate": "2019-05-06",
"some_value": 4.4042014461,
"slope": null
},
{
"index": 22,
"id": 131125,
"stampDate": "2019-05-07",
"some_value": 4.4063777255,
"slope": null
},
{
"index": 23,
"id": 131125,
"stampDate": "2019-05-08",
"some_value": 4.4088000078,
"slope": null
},
{
"index": 24,
"id": 131125,
"stampDate": "2019-05-09",
"some_value": 4.4114385815,
"slope": null
},
{
"index": 25,
"id": 131125,
"stampDate": "2019-05-10",
"some_value": 4.4141755263,
"slope": null
},
{
"index": 26,
"id": 131125,
"stampDate": "2019-05-13",
"some_value": 4.4170305369,
"slope": null
},
{
"index": 27,
"id": 131125,
"stampDate": "2019-05-14",
"some_value": 4.4199724971,
"slope": null
},
{
"index": 28,
"id": 131125,
"stampDate": "2019-05-15",
"some_value": 4.4229622227,
"slope": null
},
{
"index": 29,
"id": 131125,
"stampDate": "2019-05-16",
"some_value": 4.425834168,
"slope": null
},
{
"index": 30,
"id": 131125,
"stampDate": "2019-05-17",
"some_value": 4.4285928992,
"slope": null
},
{
"index": 31,
"id": 131125,
"stampDate": "2019-05-20",
"some_value": 4.4312250488,
"slope": null
},
{
"index": 32,
"id": 131125,
"stampDate": "2019-05-21",
"some_value": 4.4337007315,
"slope": null
},
{
"index": 33,
"id": 131125,
"stampDate": "2019-05-22",
"some_value": 4.4360814874,
"slope": null
},
{
"index": 34,
"id": 131125,
"stampDate": "2019-05-23",
"some_value": 4.4383431665,
"slope": null
},
{
"index": 35,
"id": 131125,
"stampDate": "2019-05-24",
"some_value": 4.440531364,
"slope": null
},
{
"index": 36,
"id": 131125,
"stampDate": "2019-05-27",
"some_value": 4.4427089019,
"slope": null
},
{
"index": 37,
"id": 131125,
"stampDate": "2019-05-28",
"some_value": 4.4449274874,
"slope": null
},
{
"index": 38,
"id": 131125,
"stampDate": "2019-05-29",
"some_value": 4.4471844952,
"slope": null
},
{
"index": 39,
"id": 131125,
"stampDate": "2019-05-30",
"some_value": 4.449504192,
"slope": null
},
{
"index": 40,
"id": 131125,
"stampDate": "2019-05-31",
"some_value": 4.4517660608,
"slope": null
},
{
"index": 41,
"id": 131125,
"stampDate": "2019-06-03",
"some_value": 4.4539344203,
"slope": null
},
{
"index": 42,
"id": 131125,
"stampDate": "2019-06-04",
"some_value": 4.4559970974,
"slope": null
},
{
"index": 43,
"id": 131125,
"stampDate": "2019-06-05",
"some_value": 4.4579651625,
"slope": null
},
{
"index": 44,
"id": 131125,
"stampDate": "2019-06-06",
"some_value": 4.459839165,
"slope": null
},
{
"index": 45,
"id": 131125,
"stampDate": "2019-06-07",
"some_value": 4.4616219346,
"slope": null
},
{
"index": 46,
"id": 131125,
"stampDate": "2019-06-10",
"some_value": 4.4632574824,
"slope": null
},
{
"index": 47,
"id": 131125,
"stampDate": "2019-06-11",
"some_value": 4.4647361678,
"slope": null
},
{
"index": 48,
"id": 131125,
"stampDate": "2019-06-12",
"some_value": 4.466082816,
"slope": null
},
{
"index": 49,
"id": 131125,
"stampDate": "2019-06-13",
"some_value": 4.4673404257,
"slope": null
},
{
"index": 50,
"id": 131125,
"stampDate": "2019-06-14",
"some_value": 4.4685941632,
"slope": null
},
{
"index": 51,
"id": 131125,
"stampDate": "2019-06-17",
"some_value": 4.4698818222,
"slope": null
},
{
"index": 52,
"id": 131125,
"stampDate": "2019-06-18",
"some_value": 4.471241,
"slope": null
},
{
"index": 53,
"id": 131125,
"stampDate": "2019-06-19",
"some_value": 4.4726679779,
"slope": null
},
{
"index": 54,
"id": 131125,
"stampDate": "2019-06-20",
"some_value": 4.4742000819,
"slope": null
},
{
"index": 55,
"id": 131125,
"stampDate": "2019-06-21",
"some_value": 4.4758914523,
"slope": null
},
{
"index": 56,
"id": 131125,
"stampDate": "2019-06-24",
"some_value": 4.477742414,
"slope": null
},
{
"index": 57,
"id": 131125,
"stampDate": "2019-06-25",
"some_value": 4.4797452754,
"slope": null
},
{
"index": 58,
"id": 131125,
"stampDate": "2019-06-26",
"some_value": 4.4818719696,
"slope": null
},
{
"index": 59,
"id": 131125,
"stampDate": "2019-06-27",
"some_value": 4.4840968683,
"slope": 0.0012813253
},
{
"index": 60,
"id": 131125,
"stampDate": "2019-06-28",
"some_value": 4.4864136767,
"slope": 0.0012824029
},
{
"index": 61,
"id": 131125,
"stampDate": "2019-07-01",
"some_value": 4.4888228692,
"slope": 0.0012846152
},
{
"index": 62,
"id": 131125,
"stampDate": "2019-07-02",
"some_value": 4.4913125608,
"slope": 0.0012878838
},
{
"index": 63,
"id": 131125,
"stampDate": "2019-07-03",
"some_value": 4.493866489,
"slope": 0.0012921251
},
{
"index": 64,
"id": 131125,
"stampDate": "2019-07-04",
"some_value": 4.4964651949,
"slope": 0.0012972484
},
{
"index": 65,
"id": 131125,
"stampDate": "2019-07-05",
"some_value": 4.4990115742,
"slope": 0.0013007585
},
{
"index": 66,
"id": 131125,
"stampDate": "2019-07-08",
"some_value": 4.5015115546,
"slope": 0.001305065
},
{
"index": 67,
"id": 131125,
"stampDate": "2019-07-09",
"some_value": 4.503975426,
"slope": 0.0013100242
},
{
"index": 68,
"id": 131125,
"stampDate": "2019-07-10",
"some_value": 4.5063935048,
"slope": 0.0013155059
},
{
"index": 69,
"id": 131125,
"stampDate": "2019-07-11",
"some_value": 4.5088068518,
"slope": 0.0013214345
},
{
"index": 70,
"id": 131125,
"stampDate": "2019-07-12",
"some_value": 4.5112001078,
"slope": 0.0013257165
},
{
"index": 71,
"id": 131125,
"stampDate": "2019-07-15",
"some_value": 4.513556964,
"slope": 0.0013304582
},
{
"index": 72,
"id": 131125,
"stampDate": "2019-07-16",
"some_value": 4.5158853175,
"slope": 0.0013355747
},
{
"index": 73,
"id": 131125,
"stampDate": "2019-07-17",
"some_value": 4.5182409874,
"slope": 0.0013410361
},
{
"index": 74,
"id": 131125,
"stampDate": "2019-07-18",
"some_value": 4.5206487973,
"slope": 0.001346835
},
{
"index": 75,
"id": 131125,
"stampDate": "2019-07-19",
"some_value": 4.5231767435,
"slope": 0.0013512278
},
{
"index": 76,
"id": 131125,
"stampDate": "2019-07-22",
"some_value": 4.5258054984,
"slope": 0.0013561668
},
{
"index": 77,
"id": 131125,
"stampDate": "2019-07-23",
"some_value": 4.5285925513,
"slope": 0.001361694
},
{
"index": 78,
"id": 131125,
"stampDate": "2019-07-24",
"some_value": 4.5317109265,
"slope": 0.0013679611
},
{
"index": 79,
"id": 131125,
"stampDate": "2019-07-25",
"some_value": 4.5351725242,
"slope": 0.0013751053
},
{
"index": 80,
"id": 131125,
"stampDate": "2019-07-26",
"some_value": 4.539034657,
"slope": 0.0013817525
},
{
"index": 81,
"id": 131125,
"stampDate": "2019-07-29",
"some_value": 4.5433277604,
"slope": 0.0013897168
},
{
"index": 82,
"id": 131125,
"stampDate": "2019-07-30",
"some_value": 4.5480155368,
"slope": 0.0013990907
},
{
"index": 83,
"id": 131125,
"stampDate": "2019-07-31",
"some_value": 4.5529754328,
"slope": 0.0014098796
},
{
"index": 84,
"id": 131125,
"stampDate": "2019-08-01",
"some_value": 4.5581529992,
"slope": 0.0014220548
},
{
"index": 85,
"id": 131125,
"stampDate": "2019-08-02",
"some_value": 4.5634863619,
"slope": 0.0014343286
},
{
"index": 86,
"id": 131125,
"stampDate": "2019-08-05",
"some_value": 4.5689449021,
"slope": 0.0014479487
},
{
"index": 87,
"id": 131125,
"stampDate": "2019-08-06",
"some_value": 4.5743500886,
"slope": 0.0014627368
},
{
"index": 88,
"id": 131125,
"stampDate": "2019-08-07",
"some_value": 4.5794717928,
"slope": 0.0014784134
},
{
"index": 89,
"id": 131125,
"stampDate": "2019-08-08",
"some_value": 4.5842600857,
"slope": 0.0014947071
}
]
}
import numpy as np
df['slope'] = df['some_value'].rolling(window=587,min_periods=60).apply(
slope_with_dates, raw=True)
def slope_with_dates(dependent_var):
initial_array = np.arange(1, 3000)
new_array = initial_array[(initial_array % 7 != 0) & (initial_array % 7 != 6)]
dependent_var = dependent_var[~np.isnan(dependent_var)]
last_array = new_array[:len(dependent_var)]
A = np.vstack([last_array, np.ones(len(last_array))]).T
m, c = np.linalg.lstsq(A, dependent_var, rcond=-1)[0]
return m
I am hoping to reduce the time required to perform this function.