1

Having very poor performance when calculating a slope based on a date. I am trying to calculate it by having a rolling function on dataframe and invoking my function. The dataframe's index is set to a date.

I already tried using Dask for hopeful performance boost, but with no luck.

Basic dataframe (bare in mind, I would go back till 2010, this is just example):

{
  "schema": {
    "fields": [
      {
        "name": "index",
        "type": "integer"
      },
      {
        "name": "id",
        "type": "integer"
      },
      {
        "name": "stampDate",
        "type": "string"
      },
      {
        "name": "some_value",
        "type": "number"
      },
      {
        "name": "slope",
        "type": "number"
      }
    ],
    "primaryKey": [
      "index"
    ],
    "pandas_version": "0.20.0"
  },
  "data": [
    {
      "index": 0,
      "id": 131125,
      "stampDate": "2019-04-05",
      "some_value": 4.3894552243,
      "slope": null
    },
    {
      "index": 1,
      "id": 131125,
      "stampDate": "2019-04-08",
      "some_value": 4.3905985407,
      "slope": null
    },
    {
      "index": 2,
      "id": 131125,
      "stampDate": "2019-04-09",
      "some_value": 4.3916080856,
      "slope": null
    },
    {
      "index": 3,
      "id": 131125,
      "stampDate": "2019-04-10",
      "some_value": 4.3924879736,
      "slope": null
    },
    {
      "index": 4,
      "id": 131125,
      "stampDate": "2019-04-11",
      "some_value": 4.3931915741,
      "slope": null
    },
    {
      "index": 5,
      "id": 131125,
      "stampDate": "2019-04-12",
      "some_value": 4.3937353197,
      "slope": null
    },
    {
      "index": 6,
      "id": 131125,
      "stampDate": "2019-04-15",
      "some_value": 4.3941392301,
      "slope": null
    },
    {
      "index": 7,
      "id": 131125,
      "stampDate": "2019-04-16",
      "some_value": 4.3944615003,
      "slope": null
    },
    {
      "index": 8,
      "id": 131125,
      "stampDate": "2019-04-17",
      "some_value": 4.3947207227,
      "slope": null
    },
    {
      "index": 9,
      "id": 131125,
      "stampDate": "2019-04-18",
      "some_value": 4.3949305203,
      "slope": null
    },
    {
      "index": 10,
      "id": 131125,
      "stampDate": "2019-04-19",
      "some_value": 4.3951279366,
      "slope": null
    },
    {
      "index": 11,
      "id": 131125,
      "stampDate": "2019-04-22",
      "some_value": 4.3953253139,
      "slope": null
    },
    {
      "index": 12,
      "id": 131125,
      "stampDate": "2019-04-23",
      "some_value": 4.3955362179,
      "slope": null
    },
    {
      "index": 13,
      "id": 131125,
      "stampDate": "2019-04-24",
      "some_value": 4.3957865316,
      "slope": null
    },
    {
      "index": 14,
      "id": 131125,
      "stampDate": "2019-04-25",
      "some_value": 4.3961698956,
      "slope": null
    },
    {
      "index": 15,
      "id": 131125,
      "stampDate": "2019-04-26",
      "some_value": 4.3967600627,
      "slope": null
    },
    {
      "index": 16,
      "id": 131125,
      "stampDate": "2019-04-29",
      "some_value": 4.3975283597,
      "slope": null
    },
    {
      "index": 17,
      "id": 131125,
      "stampDate": "2019-04-30",
      "some_value": 4.398422731,
      "slope": null
    },
    {
      "index": 18,
      "id": 131125,
      "stampDate": "2019-05-01",
      "some_value": 4.3994686347,
      "slope": null
    },
    {
      "index": 19,
      "id": 131125,
      "stampDate": "2019-05-02",
      "some_value": 4.4007661971,
      "slope": null
    },
    {
      "index": 20,
      "id": 131125,
      "stampDate": "2019-05-03",
      "some_value": 4.4023364863,
      "slope": null
    },
    {
      "index": 21,
      "id": 131125,
      "stampDate": "2019-05-06",
      "some_value": 4.4042014461,
      "slope": null
    },
    {
      "index": 22,
      "id": 131125,
      "stampDate": "2019-05-07",
      "some_value": 4.4063777255,
      "slope": null
    },
    {
      "index": 23,
      "id": 131125,
      "stampDate": "2019-05-08",
      "some_value": 4.4088000078,
      "slope": null
    },
    {
      "index": 24,
      "id": 131125,
      "stampDate": "2019-05-09",
      "some_value": 4.4114385815,
      "slope": null
    },
    {
      "index": 25,
      "id": 131125,
      "stampDate": "2019-05-10",
      "some_value": 4.4141755263,
      "slope": null
    },
    {
      "index": 26,
      "id": 131125,
      "stampDate": "2019-05-13",
      "some_value": 4.4170305369,
      "slope": null
    },
    {
      "index": 27,
      "id": 131125,
      "stampDate": "2019-05-14",
      "some_value": 4.4199724971,
      "slope": null
    },
    {
      "index": 28,
      "id": 131125,
      "stampDate": "2019-05-15",
      "some_value": 4.4229622227,
      "slope": null
    },
    {
      "index": 29,
      "id": 131125,
      "stampDate": "2019-05-16",
      "some_value": 4.425834168,
      "slope": null
    },
    {
      "index": 30,
      "id": 131125,
      "stampDate": "2019-05-17",
      "some_value": 4.4285928992,
      "slope": null
    },
    {
      "index": 31,
      "id": 131125,
      "stampDate": "2019-05-20",
      "some_value": 4.4312250488,
      "slope": null
    },
    {
      "index": 32,
      "id": 131125,
      "stampDate": "2019-05-21",
      "some_value": 4.4337007315,
      "slope": null
    },
    {
      "index": 33,
      "id": 131125,
      "stampDate": "2019-05-22",
      "some_value": 4.4360814874,
      "slope": null
    },
    {
      "index": 34,
      "id": 131125,
      "stampDate": "2019-05-23",
      "some_value": 4.4383431665,
      "slope": null
    },
    {
      "index": 35,
      "id": 131125,
      "stampDate": "2019-05-24",
      "some_value": 4.440531364,
      "slope": null
    },
    {
      "index": 36,
      "id": 131125,
      "stampDate": "2019-05-27",
      "some_value": 4.4427089019,
      "slope": null
    },
    {
      "index": 37,
      "id": 131125,
      "stampDate": "2019-05-28",
      "some_value": 4.4449274874,
      "slope": null
    },
    {
      "index": 38,
      "id": 131125,
      "stampDate": "2019-05-29",
      "some_value": 4.4471844952,
      "slope": null
    },
    {
      "index": 39,
      "id": 131125,
      "stampDate": "2019-05-30",
      "some_value": 4.449504192,
      "slope": null
    },
    {
      "index": 40,
      "id": 131125,
      "stampDate": "2019-05-31",
      "some_value": 4.4517660608,
      "slope": null
    },
    {
      "index": 41,
      "id": 131125,
      "stampDate": "2019-06-03",
      "some_value": 4.4539344203,
      "slope": null
    },
    {
      "index": 42,
      "id": 131125,
      "stampDate": "2019-06-04",
      "some_value": 4.4559970974,
      "slope": null
    },
    {
      "index": 43,
      "id": 131125,
      "stampDate": "2019-06-05",
      "some_value": 4.4579651625,
      "slope": null
    },
    {
      "index": 44,
      "id": 131125,
      "stampDate": "2019-06-06",
      "some_value": 4.459839165,
      "slope": null
    },
    {
      "index": 45,
      "id": 131125,
      "stampDate": "2019-06-07",
      "some_value": 4.4616219346,
      "slope": null
    },
    {
      "index": 46,
      "id": 131125,
      "stampDate": "2019-06-10",
      "some_value": 4.4632574824,
      "slope": null
    },
    {
      "index": 47,
      "id": 131125,
      "stampDate": "2019-06-11",
      "some_value": 4.4647361678,
      "slope": null
    },
    {
      "index": 48,
      "id": 131125,
      "stampDate": "2019-06-12",
      "some_value": 4.466082816,
      "slope": null
    },
    {
      "index": 49,
      "id": 131125,
      "stampDate": "2019-06-13",
      "some_value": 4.4673404257,
      "slope": null
    },
    {
      "index": 50,
      "id": 131125,
      "stampDate": "2019-06-14",
      "some_value": 4.4685941632,
      "slope": null
    },
    {
      "index": 51,
      "id": 131125,
      "stampDate": "2019-06-17",
      "some_value": 4.4698818222,
      "slope": null
    },
    {
      "index": 52,
      "id": 131125,
      "stampDate": "2019-06-18",
      "some_value": 4.471241,
      "slope": null
    },
    {
      "index": 53,
      "id": 131125,
      "stampDate": "2019-06-19",
      "some_value": 4.4726679779,
      "slope": null
    },
    {
      "index": 54,
      "id": 131125,
      "stampDate": "2019-06-20",
      "some_value": 4.4742000819,
      "slope": null
    },
    {
      "index": 55,
      "id": 131125,
      "stampDate": "2019-06-21",
      "some_value": 4.4758914523,
      "slope": null
    },
    {
      "index": 56,
      "id": 131125,
      "stampDate": "2019-06-24",
      "some_value": 4.477742414,
      "slope": null
    },
    {
      "index": 57,
      "id": 131125,
      "stampDate": "2019-06-25",
      "some_value": 4.4797452754,
      "slope": null
    },
    {
      "index": 58,
      "id": 131125,
      "stampDate": "2019-06-26",
      "some_value": 4.4818719696,
      "slope": null
    },
    {
      "index": 59,
      "id": 131125,
      "stampDate": "2019-06-27",
      "some_value": 4.4840968683,
      "slope": 0.0012813253
    },
    {
      "index": 60,
      "id": 131125,
      "stampDate": "2019-06-28",
      "some_value": 4.4864136767,
      "slope": 0.0012824029
    },
    {
      "index": 61,
      "id": 131125,
      "stampDate": "2019-07-01",
      "some_value": 4.4888228692,
      "slope": 0.0012846152
    },
    {
      "index": 62,
      "id": 131125,
      "stampDate": "2019-07-02",
      "some_value": 4.4913125608,
      "slope": 0.0012878838
    },
    {
      "index": 63,
      "id": 131125,
      "stampDate": "2019-07-03",
      "some_value": 4.493866489,
      "slope": 0.0012921251
    },
    {
      "index": 64,
      "id": 131125,
      "stampDate": "2019-07-04",
      "some_value": 4.4964651949,
      "slope": 0.0012972484
    },
    {
      "index": 65,
      "id": 131125,
      "stampDate": "2019-07-05",
      "some_value": 4.4990115742,
      "slope": 0.0013007585
    },
    {
      "index": 66,
      "id": 131125,
      "stampDate": "2019-07-08",
      "some_value": 4.5015115546,
      "slope": 0.001305065
    },
    {
      "index": 67,
      "id": 131125,
      "stampDate": "2019-07-09",
      "some_value": 4.503975426,
      "slope": 0.0013100242
    },
    {
      "index": 68,
      "id": 131125,
      "stampDate": "2019-07-10",
      "some_value": 4.5063935048,
      "slope": 0.0013155059
    },
    {
      "index": 69,
      "id": 131125,
      "stampDate": "2019-07-11",
      "some_value": 4.5088068518,
      "slope": 0.0013214345
    },
    {
      "index": 70,
      "id": 131125,
      "stampDate": "2019-07-12",
      "some_value": 4.5112001078,
      "slope": 0.0013257165
    },
    {
      "index": 71,
      "id": 131125,
      "stampDate": "2019-07-15",
      "some_value": 4.513556964,
      "slope": 0.0013304582
    },
    {
      "index": 72,
      "id": 131125,
      "stampDate": "2019-07-16",
      "some_value": 4.5158853175,
      "slope": 0.0013355747
    },
    {
      "index": 73,
      "id": 131125,
      "stampDate": "2019-07-17",
      "some_value": 4.5182409874,
      "slope": 0.0013410361
    },
    {
      "index": 74,
      "id": 131125,
      "stampDate": "2019-07-18",
      "some_value": 4.5206487973,
      "slope": 0.001346835
    },
    {
      "index": 75,
      "id": 131125,
      "stampDate": "2019-07-19",
      "some_value": 4.5231767435,
      "slope": 0.0013512278
    },
    {
      "index": 76,
      "id": 131125,
      "stampDate": "2019-07-22",
      "some_value": 4.5258054984,
      "slope": 0.0013561668
    },
    {
      "index": 77,
      "id": 131125,
      "stampDate": "2019-07-23",
      "some_value": 4.5285925513,
      "slope": 0.001361694
    },
    {
      "index": 78,
      "id": 131125,
      "stampDate": "2019-07-24",
      "some_value": 4.5317109265,
      "slope": 0.0013679611
    },
    {
      "index": 79,
      "id": 131125,
      "stampDate": "2019-07-25",
      "some_value": 4.5351725242,
      "slope": 0.0013751053
    },
    {
      "index": 80,
      "id": 131125,
      "stampDate": "2019-07-26",
      "some_value": 4.539034657,
      "slope": 0.0013817525
    },
    {
      "index": 81,
      "id": 131125,
      "stampDate": "2019-07-29",
      "some_value": 4.5433277604,
      "slope": 0.0013897168
    },
    {
      "index": 82,
      "id": 131125,
      "stampDate": "2019-07-30",
      "some_value": 4.5480155368,
      "slope": 0.0013990907
    },
    {
      "index": 83,
      "id": 131125,
      "stampDate": "2019-07-31",
      "some_value": 4.5529754328,
      "slope": 0.0014098796
    },
    {
      "index": 84,
      "id": 131125,
      "stampDate": "2019-08-01",
      "some_value": 4.5581529992,
      "slope": 0.0014220548
    },
    {
      "index": 85,
      "id": 131125,
      "stampDate": "2019-08-02",
      "some_value": 4.5634863619,
      "slope": 0.0014343286
    },
    {
      "index": 86,
      "id": 131125,
      "stampDate": "2019-08-05",
      "some_value": 4.5689449021,
      "slope": 0.0014479487
    },
    {
      "index": 87,
      "id": 131125,
      "stampDate": "2019-08-06",
      "some_value": 4.5743500886,
      "slope": 0.0014627368
    },
    {
      "index": 88,
      "id": 131125,
      "stampDate": "2019-08-07",
      "some_value": 4.5794717928,
      "slope": 0.0014784134
    },
    {
      "index": 89,
      "id": 131125,
      "stampDate": "2019-08-08",
      "some_value": 4.5842600857,
      "slope": 0.0014947071
    }
  ]
}
import numpy as np

df['slope'] = df['some_value'].rolling(window=587,min_periods=60).apply(
        slope_with_dates, raw=True)

def slope_with_dates(dependent_var):
   initial_array = np.arange(1, 3000)
   new_array = initial_array[(initial_array % 7 != 0) & (initial_array % 7 != 6)]
   dependent_var = dependent_var[~np.isnan(dependent_var)]
   last_array = new_array[:len(dependent_var)]
   A = np.vstack([last_array, np.ones(len(last_array))]).T
   m, c = np.linalg.lstsq(A, dependent_var, rcond=-1)[0]
   return m

I am hoping to reduce the time required to perform this function.

Spinxas
  • 71
  • 1
  • 2
  • 15
  • Have a look at [How to make good reproducible pandas examples](https://stackoverflow.com/questions/20109391/how-to-make-good-reproducible-pandas-examples) and edit your question providing a sample of data and the expected output. – Alexandre B. Aug 14 '19 at 16:49
  • @AlexandreB. I have edited the question with a sample dataframe, this should be sufficient to execute the sample code provided. – Spinxas Aug 14 '19 at 17:14

0 Answers0