How to improve for-loop on DataFrame with row-wise calculation depending on previous row?

Question

I have a working code with a for-loop that I would like to optimise in terms of speed (the project has been rejected because it's slow).

This is my situation: I have an Excel file from which I import a table with thousands of rows and a few columns into a Pandas DataFrame. The first column is a monotonic increasing series of timestamps, with 15 minutes frequency.

I have to use this table to calculate few extra columns, append them to the original table and save the resulting DataFrame into a new Excel file.

What is slowing the code is the core calculation of the extra columns. Here is a snippet of the working code:

import pandas as pd
from datetime import timedelta as td

cons_prod = pd.read_csv("sample.csv", index_col=0, parse_dates=True)

soc_dct = {}  # State of charge (kW)
charge_dct = {}  # Charge (kW)
discharge_dct = {}  # Discharge (kW)
acc_dct = {}  # Auto-consumption NEW (kW)
lst_dct = {}  # Lost injection due to battery efficiency (kW)
inj_dct = {}  # Injection NEW (kW)
gridcons_dct = {}  # Grid Consumption NEW (kW)
agg_dct = {}  # Additional Auto-consumption through battery (kW)

battery_parameters = {
    "power": 50,
    "energy": 130,
    "efficiency": 0.9,
    "minsoc": 0.1,
    "soct0": 65.0,
}

bp_energy = battery_parameters["energy"]
bp_power = battery_parameters["power"]
soct0 = 0.5 * bp_energy

for t in cons_prod.index:
    L = cons_prod.loc[t, "Injection (kW)"]
    m = cons_prod.loc[t, "Grid Consumption (kW)"]
    k = cons_prod.loc[t, "Auto-consumption (kW)"]
    f = cons_prod.loc[t, "Consumption (kW)"]
    if t == cons_prod.index[0]:
        # State of charge (kW)
        soc_dct.setdefault(t, soct0)

        # Charge (kW)
        charge_dct.setdefault(
            t,
            min(
                L,
                (bp_energy - soc_dct[t]) * 4,
                bp_power,
            )
            * battery_parameters["efficiency"]
            if L >= 0
            else 0,
        )

        # Discharge (kW)
        discharge_dct.setdefault(
            t,
            -min(
                m,
                4 * soc_dct[t] - 4 * (battery_parameters["minsoc"] * bp_energy),
                bp_power,
            )
            if m >= 0
            else 0,
        )

        # Auto-consumption NEW (kW)
        acc_dct.setdefault(t, k - discharge_dct[t])

        # Lost injection due to battery efficiency (kW)
        lst_dct.setdefault(
            t,
            (charge_dct[t] / battery_parameters["efficiency"]) - charge_dct[t],
        )

        # Injection NEW (kW)
        inj_dct.setdefault(t, L - charge_dct[t] - lst_dct[t])

        # Grid Consumption NEW (kW)
        gridcons_dct.setdefault(t, f - acc_dct[t])

        # Additional Auto-consumption through battery (kW)
        agg_dct.setdefault(t, acc_dct[t] - k)
    else:
        # State of charge (kW)
        soc_dct.setdefault(
            t,
            soc_dct[t - td(minutes=15)]
            + (charge_dct[t - td(minutes=15)] + discharge_dct[t - td(minutes=15)]) / 4,
        )

        # Charge (kW)
        charge_dct.setdefault(
            t,
            min(
                L,
                (bp_energy - soc_dct[t]) * 4,
                bp_power,
            )
            * battery_parameters["efficiency"]
            if L >= 0
            else 0,
        )

        # Discharge (kW)
        discharge_dct.setdefault(
            t,
            -min(
                m,
                4 * soc_dct[t] - 4 * (battery_parameters["minsoc"] * bp_energy),
                bp_power,
            )
            if m >= 0
            else 0,
        )

        # Auto-consumption NEW (kW)
        acc_dct.setdefault(t, k - discharge_dct[t])

        # Lost injection due to battery efficiency (kW)
        lst_dct.setdefault(
            t, charge_dct[t] / battery_parameters["efficiency"] - charge_dct[t]
        )

        # Injection NEW (kW)
        inj_dct.setdefault(t, L - charge_dct[t] - lst_dct[t])

        # Grid Consumption NEW (kW)
        gridcons_dct.setdefault(t, f - acc_dct[t])

        # Additional Auto-consumption through battery (kW)
        agg_dct.setdefault(t, acc_dct[t] - k)
# Creating a DataFrame with all the values
output_df = pd.DataFrame(
    data=[
        soc_dct,
        charge_dct,
        discharge_dct,
        acc_dct,
        lst_dct,
        inj_dct,
        gridcons_dct,
        agg_dct,
    ]
).T
output_df.columns = [
    "State of charge (kW)",
    "Charge (kW)",
    "Discharge (kW)",
    "Auto-consumption NEW (kW)",
    "Lost injection due to battery efficiency (kW)",
    "Injection NEW (kW)",
    "Grid Consumption NEW (kW)",
    "Additional Auto-consumption through battery (kW)",
]
charge_dct = {}  # Charge (kW)
discharge_dct = {}  # Discharge (kW)
acc_dct = {}  # Auto-consumption NEW (kW)
lst_dct = {}  # Lost injection due to battery efficiency (kW)
inj_dct = {}  # Injection NEW (kW)
gridcons_dct = {}  # Grid Consumption NEW (kW)
agg_dct = {}  # Additional Auto-consumption through battery (kW)

for t in cons_prod.index:
    L = cons_prod.loc[t, "Injection (kW)"]
    m = cons_prod.loc[t, "Grid Consumption (kW)"]
    k = cons_prod.loc[t, "Auto-consumption (kW)"]
    f = cons_prod.loc[t, "Consumption (kW)"]
    if t == cons_prod.index[0]:
        # State of charge (kW)
        soc_dct.setdefault(t, soct0)

        # Charge (kW)
        charge_dct.setdefault(
            t,
            min(
                L,
                (bp_energy - soc_dct[t]) * 4,
                bp_power,
            )
            * battery_parameters["efficiency"]
            if L >= 0
            else 0,
        )

        # Discharge (kW)
        discharge_dct.setdefault(
            t,
            -min(
                m,
                4 * soc_dct[t] - 4 * (battery_parameters["minsoc"] * bp_energy),
                bp_power,
            )
            if m >= 0
            else 0,
        )

        # Auto-consumption NEW (kW)
        acc_dct.setdefault(t, k - discharge_dct[t])

        # Lost injection due to battery efficiency (kW)
        lst_dct.setdefault(
            t,
            (charge_dct[t] / battery_parameters["efficiency"]) - charge_dct[t],
        )

        # Injection NEW (kW)
        inj_dct.setdefault(t, L - charge_dct[t] - lst_dct[t])

        # Grid Consumption NEW (kW)
        gridcons_dct.setdefault(t, f - acc_dct[t])

        # Additional Auto-consumption through battery (kW)
        agg_dct.setdefault(t, acc_dct[t] - k)
    else:
        # State of charge (kW)
        soc_dct.setdefault(
            t,
            soc_dct[t - td(minutes=15)]
            + (charge_dct[t - td(minutes=15)] + discharge_dct[t - td(minutes=15)]) / 4,
        )

        # Charge (kW)
        charge_dct.setdefault(
            t,
            min(
                L,
                (bp_energy - soc_dct[t]) * 4,
                bp_power,
            )
            * battery_parameters["efficiency"]
            if L >= 0
            else 0,
        )

        # Discharge (kW)
        discharge_dct.setdefault(
            t,
            -min(
                m,
                4 * soc_dct[t] - 4 * (battery_parameters["minsoc"] * bp_energy),
                bp_power,
            )
            if m >= 0
            else 0,
        )

        # Auto-consumption NEW (kW)
        acc_dct.setdefault(t, k - discharge_dct[t])

        # Lost injection due to battery efficiency (kW)
        lst_dct.setdefault(
            t, charge_dct[t] / battery_parameters["efficiency"] - charge_dct[t]
        )

        # Injection NEW (kW)
        inj_dct.setdefault(t, L - charge_dct[t] - lst_dct[t])

        # Grid Consumption NEW (kW)
        gridcons_dct.setdefault(t, f - acc_dct[t])

        # Additional Auto-consumption through battery (kW)
        agg_dct.setdefault(t, acc_dct[t] - k)
# Creating a DataFrame with all the values
output_df = pd.DataFrame(
    data=[
        soc_dct,
        charge_dct,
        discharge_dct,
        acc_dct,
        lst_dct,
        inj_dct,
        gridcons_dct,
        agg_dct,
    ]
).T
output_df.columns = [
    "State of charge (kW)",
    "Charge (kW)",
    "Discharge (kW)",
    "Auto-consumption NEW (kW)",
    "Lost injection due to battery efficiency (kW)",
    "Injection NEW (kW)",
    "Grid Consumption NEW (kW)",
    "Additional Auto-consumption through battery (kW)",
]

cons_prod is the table imported into a DataFrame.

As you can see, we have two situations: when t == cons_prod.index[0] (i.e., the first item of the timestamps), the calculations use the values at the same t. However, from the second timestamp on, some of the calculations refer to the previous value (in here referred to the 15 minutes before using the index t - td(minutes=15)).

These are the reasons why I am struggling to move away from the for-loop.

A few explanations to possible questions

Q: Why are you using dictionaries? A: Because I figured that they fill faster than other data types, and I can use them to create a DataFrame later on.
Q: Are the timestamps coherent, e.g. do they have missing values? A: No missing values, as there is a previous function I wrote to make sure that the timestamps are filled completely.
Q: The calculations do not currently refer to a previous row in a DataFrame, but in a dictionary! Why the misleading title? A: This is the best solution I could come up with so far, but I am wondering whether my not-so-complete knowledge of Pandas is hiding a simpler, faster solution.

Hope the framework is clear.

Thank you in advance!

EDIT: as per request, added a 100-lines sample of cons_prod and modified the previous code to meet the requirements of a MRE.

EDIT 2: I have tried moving from dictionaries to a Pandas lookup, trying to optimise as much as possible. This is the code I came up with:

from time import time as tt

cp = cons_prod.copy(deep=True)

# Initialise the columns filling them with zeroes
cp["State of charge (kW)"] = 0
cp["Charge (kW)"] = 0
cp["Discharge (kW)"] = 0

# Storing the position of the columns in variables
cp_soc = cp.columns.get_loc("State of charge (kW)")
cp_charge = cp.columns.get_loc("Charge (kW)")
cp_discharge = cp.columns.get_loc("Discharge (kW)")
cp_inj = cp.columns.get_loc("Injection (kW)")
cp_gridcons = cp.columns.get_loc("Grid Consumption (kW)")

# Storing the values of the battery dictionary lookups in variables
bp_energy = dct_bp["energy"]
bp_power = dct_bp["power"]
bp_efficiency = dct_bp["efficiency"]
bp_soct0 = dct_bp["soct0"]
bp_minsoc = dct_bp["minsoc"]

start1 = tt()  # Measuring time
for row in cp.itertuples(name=None):  # Using itertuples to gain some speed
    L = cp.loc[row[0], "Injection (kW)"]
    m = cp.loc[row[0], "Grid Consumption (kW)"]
    k = cp.loc[row[0], "Auto-consumption (kW)"]
    f = cp.loc[row[0], "Consumption (kW)"]
    if row[0] == cp.index[0]:
        cp.iloc[0, cp_soc] = bp_soct0
        cp.iloc[0, cp_charge] = float(
            min(L, (bp_energy - bp_soct0) * 4, bp_power) * bp_efficiency
            if L >= 0
            else 0,
        )
        cp.iloc[0, cp_discharge] = float(
            -min(
                m,
                4 * bp_soct0 - 4 * (bp_minsoc * bp_energy),
                bp_power,
            )
            if m >= 0
            else 0
        )
    else:
        t = pd.Index(cp.index).get_loc(row[0])
        cp.iloc[t, cp_soc] = float(
            cp.iloc[t - 1, cp_soc]
            + (cp.iloc[t - 1, cp_charge] + cp.iloc[t - 1, cp_discharge]) / 4
        )
        cp.iloc[t, cp_charge] = float(
            min(L, (bp_energy - cp.iloc[t, cp_soc]) * 4, bp_power) * bp_efficiency
            if L >= 0
            else 0,
        )
        cp.iloc[t, cp_discharge] = float(
            -min(
                m,
                4 * cp.iloc[t, cp_soc] - 4 * (dct_bp["minsoc"] * bp_energy),
                bp_power,
            )
            if m >= 0
            else 0
        )
end1 = tt() - start1
print(f"Pandas lookup took {end1:.2f} seconds")

With this code, I got an average of 42 seconds per complete task, while I used to have <20 seconds with dictionaries.

Ciao Filippo, do you mind to add a [mcve]? In particular it will be great to have a sample of data in `cons_prod` — rpanai, Feb 08 '22 at 13:09
Hello @rpanai, thanks for the answer! I made the changes to the code to be just copy-pasted and used. As for `sample.csv`, use the data in the Pastebin I have attached. — Filippo Antonio Capizzi, Feb 08 '22 at 13:22
Consider creating empty columns (eg: for `soc_dct`) in your dataframe and then filling them incrementally row-by-row. Using dicts may actually be slower in this case. (1) I don't know what it means to say that dicts fill up faster. Dict usage requires hashing and then (possibly) linear searching, all of which is likely slower than accessing arrays by index in pandas, (2) Converting dict to df takes time, (3) `t - td(minutes=15)` always refers to the previous row of the df since data is in 15 min increments, which means dict lookup by the full timestamp of `t - td(minutes=15)` is wasteful. — Ankur, Feb 08 '22 at 20:01
Thank you @Ankur for your comment. I'd like to point out that I don't access the dictionaries, but I just fill them with a new key-value pair at every loop. Then the dicts are used to add the columns in the original dataframe, and I chose them instead of lists (or similar) because of their O(1) time complexity. Nonetheless, I'll try your way of creating empty columns and fill them, I'll let you know. — Filippo Antonio Capizzi, Feb 09 '22 at 08:55
@Ankur, added my attempt in using directly Pandas lookup. It's slower than using dictionaries (unless I can still optimise it?). — Filippo Antonio Capizzi, Feb 09 '22 at 10:13

Emma · Answer 1 · 2022-02-10T16:47:42.900

The accessing dict is fast in Python perspective, however, not quite true for Pandas perspective. One of the best feature of Pandas is the vectorization which makes the Pandas performs quite efficient on the large dataset.

What is vectorization, why is fast? => What is "vectorization"?

Also, I want to leave this reference. When you are working on the Pandas, try looking for the operations in the order that is mentioned in the link to optimize the processes.

https://stackoverflow.com/a/55557758/2956135

Back to your case, your calculations of most of parameters are only depending onto the data from the same row. This is the perfect case that you can do vectorization easily.

The 3 parameters ("State of charge", "Charge", and "Discharge", are depending onto each other and depending onto the calculation result from the previous row. This part I am not finding a way to vectorize. Hopefully, someone can optimize this further.

First I take out constant dictionary and keep it as basic constant. This is probably not making much difference but having simple reference is better than having an additional step to access constant in dictionary. If you had to pass multiple constants to a function, putting into dictionary and passing it as dictionary makes sense but here it is not the case.

power = 50
efficiency = 0.9
minsoc = 0.1
soct0 = 65.0
bp_energy = 130
bp_power = 50
soct0 = 0.5 * bp_energy

Then, define the function to calculate the 3 params, take out any calculations of other params.

def _calc_soc_discharge(prev, L, m):
    if prev is None:
        soc = 65.0

        # Charge (kW)
        charge = min(
            L,
            (bp_energy - soc) * 4,
            bp_power,
        ) * efficiency if L >= 0 else 0

        # Discharge (kW)
        discharge =  -min(
            m,
            4 * soc - 4 * (minsoc * bp_energy),
            bp_power,
        ) if m >= 0 else 0
        return [soc, charge, discharge]
    else:
        soc = prev[0] + (prev[1] + prev[2]) / 4

        # Charge (kW)
        charge = min(
            L,
            (bp_energy - soc) * 4,
            bp_power,
        ) * efficiency if L >= 0 else 0

        # Discharge (kW)
        discharge = -min(
            m,
            4 * soc - 4 * (minsoc * bp_energy),
            bp_power,
        ) if m >= 0 else 0
            
        return [soc, charge, discharge]

Call this function iteratively and append the result back to the dataframe.

scd = []
for i, row in cons_prod[['Injection (kW)', 'Grid Consumption (kW)']].iterrows():
    scd.append(_calc_soc_discharge(None if len(scd) == 0 else scd[-1], row['Injection (kW)'], row['Grid Consumption (kW)']))

cons_prod = pd.concat([cons_prod, pd.DataFrame(scd, columns=["State of charge (kW)", "Charge (kW)", "Discharge (kW)"], index=cons_prod.index)], axis=1)

Now, this dataframe has all data required to calculate other parameters. Here we use vectorization. This part should be really optimized. The mathematical operations calling with the Pandas Series is part of the vectorization.

cons_prod['Auto-consumption NEW (kW)'] = cons_prod['Auto-consumption (kW)'] - cons_prod['Discharge (kW)']
cons_prod['Lost injection due to battery efficiency (kW)'] = cons_prod['Charge (kW)'] / efficiency - cons_prod['Charge (kW)']
cons_prod['Injection NEW (kW)'] = cons_prod['Injection (kW)'] - cons_prod['Charge (kW)'] - cons_prod['Lost injection due to battery efficiency (kW)']
cons_prod['Grid Consumption NEW (kW)'] = cons_prod['Consumption (kW)'] - cons_prod['Auto-consumption NEW (kW)']
cons_prod['Additional Auto-consumption through battery (kW)'] = cons_prod['Auto-consumption NEW (kW)'] - cons_prod['Auto-consumption (kW)']

Benchmark

With the sample data on my laptop.

Original solution: 48.2 ms ± 6.17 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
With vectorization: 13.8 ms ± 2.13 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)

this is exactly the kind of optimisation I was looking for! I kept on asking myself "how could I introduce vectorization here?", but I was missing the obvious. Thank you! — Filippo Antonio Capizzi, Feb 11 '22 at 13:57

score 0 · Answer 2 · answered Feb 09 '22 at 13:35

Since I don't have a data set to test with and I'm not sure just how fast you need this to run I figured I'd start an answer with some optimisations and suggestions and add to it until it covers everything off.

You are using textual labels to identify columns inside a loop that runs over all of your data:

L = cp.loc[row[0], "Injection (kW)"]
m = cp.loc[row[0], "Grid Consumption (kW)"]
k = cp.loc[row[0], "Auto-consumption (kW)"]
f = cp.loc[row[0], "Consumption (kW)"]

These labels are likely cached somewhere in a dictionary but you should be able to completely avoid this lookup using iloc instead of loc. Just identify the correct columns prior to starting the loop then use the integer indexes.

You have a dictionary of constants which you pull from inside your loop:

battery_parameters = {
    "power": 50,
    "energy": 130,
    "efficiency": 0.9,
    "minsoc": 0.1,
    "soct0": 65.0,
}

Every time you use one of these values in your loop you do an extra dictionary lookup for no reason. why not bp_power=50 bp_energy=130?

You have an if statement in your loop to check for the first row that gets evaluated every loop. Just pull the first row's code out of the loop and do it before the loop and you can skip this evaluation for all of the other rows.

I don't really see why you're using dictionaries and setdefault for the additional rows you're calculating. Unless I'm missing something you should be using a list as all of your keys are just integers anyway and setdefault checks for previous values that won't exist in the dictionary which wastes time. If you do something like:

soc_dct = [0] * num_rows

You'll have a preallocated list of 0s. The preallocation will save time but not much the big win is much faster lookups and insertions and no need to append to the lists as all the indexes will already exist. So instead of soc_dct.setdefault(index,newValue) you just use soc_dct[index] = newValue

I have a feeling this will get you to a level of performance you're happy with. It seems like you're just looping over data and doing simple math. Simple math is very slow in python but probably not a problem here. If you still want more speed you could use Cython to compile this module into a C module just using the cythonize command and if that isn't enough you'd likely have to go in and start doing type annotations.

Thanks for the reply, David. First , if you want to try with a dataset, you'll find it in the OP, the Pastebin link. Then, I see you're using the second example of code instead of the first: note that I did it just to test @Ankur's way. Nonetheless, I'll test removing the row check and the additional dictionaries lookup. — Filippo Antonio Capizzi, Feb 09 '22 at 13:56
I've never actually used pandas and I do expect there is probably a better way to add columns to the dataset as you go I am very confident unnecessary dictionary lookups are what are slowing you down. Again as I'm not familiar with pandas I can't say for sure how it works but I expect your main loop would be better using enumerate and upacking the rows in the for statement particularly if you know in advance the integer indexes of the various columns. The key is don't do unnecessary work especially in a big loop. — David Oldford, Feb 09 '22 at 15:12
Oh and looking at Ankur's comment now it seems to me he's effectively recommending the same thing as my list recommendation but the pandas way. It is definitely better to do this from a code perspective and likely faster (though maybe not faster depending on what pandas does in the background). This portion is the most meaningful optimisation as it eliminates a lot of unnecessary insertions into a dictionary. — David Oldford, Feb 09 '22 at 15:23

score 0 · Answer 3 · answered Feb 13 '22 at 15:22

You can use Numba to make this code much much faster. Numba use a just-in-time compiler to generate a very fast native code. Numba does not supports Pandas but the columns can be converted to Numpy view and Numba is able to works on them very quickly.

Assuming the datetime indexes are properly sorted in the input dataframe (which appears to be the case regarding the Pandas code), you can write the following Numba function:

import numba as nb

# Note: if you know that there is no NaN & Inf values then you can 
# use the additional flag fastmath=True so to generate a faster code.
# Check the results with this flag to be safe.
@nb.njit('void(float64[:], float64[:], float64[:], int64[:], float64[:], float64[:], float64[:])')
def compute(L, m, k, f, soc, charge, discharge):
    n = L.shape[0]
    assert m.size == n and k.size == n and f.size == n
    assert soc.size == n and charge.size == n and discharge.size == n

    for i in range(n):
        if i == 0:
            soc[i] = bp_soct0
            charge[i] = min(L[i], (bp_energy - bp_soct0) * 4.0, bp_power) * bp_efficiency if L[i] >= 0.0 else 0.0
            discharge[i] = -min(m[i], 4.0 * bp_soct0 - 4.0 * (bp_minsoc * bp_energy), bp_power) if m[i] >= 0.0 else 0.0
        else:
            soc[i] = soc[i-1] + (charge[i-1] + discharge[i-1]) * 0.25
            charge[i] = min(L[i], (bp_energy - soc[i]) * 4.0, bp_power) * bp_efficiency if L[i] >= 0.0 else 0.0
            discharge[i] = -min(m[i], 4.0 * soc[i] - 4.0 * (bp_minsoc * bp_energy), bp_power) if m[i] >= 0.0 else 0.0

@nb.njit is a decorator that compiles the Python function into a fast native code. Its string parameter is the signature of the function specifying the parameter types and the return type of the function. The types may be different on you machine. Note that it is better to work with well-defined types by manually casting Pandas columns if necessary. It can be not only fast, but also take less memory space and remove possible unexpected issues (eg. floating-point rounding).

The Numba code works on view of each columns of the copied Pandas dataframe called cp. The wrapping code is the following:

from time import time as tt

cp = cons_prod.copy(deep=True)

# Initialise the columns filling them with zeroes
cp["State of charge (kW)"] = 0.0
cp["Charge (kW)"] = 0.0
cp["Discharge (kW)"] = 0.0

dct_bp = {
    "power": 50,
    "energy": 130,
    "efficiency": 0.9,
    "minsoc": 0.1,
    "soct0": 65.0,
}

# Storing the values of the battery dictionary lookups in variables
bp_energy = dct_bp["energy"]
bp_power = dct_bp["power"]
bp_efficiency = dct_bp["efficiency"]
bp_soct0 = dct_bp["soct0"]
bp_minsoc = dct_bp["minsoc"]

start1 = tt()  # Measuring time

# Extract Numpy views from the Pandas dataframe (20 us)
L = cp["Injection (kW)"].to_numpy()
m = cp["Grid Consumption (kW)"].to_numpy()
k = cp["Auto-consumption (kW)"].to_numpy()
f = cp["Consumption (kW)"].to_numpy()
soc = cp["State of charge (kW)"].to_numpy()
charge = cp["Charge (kW)"].to_numpy()
discharge = cp["Discharge (kW)"].to_numpy()

# Mutate/Fill the columns (2 us)
compute(L, m, k, f, soc, charge, discharge)

end1 = tt() - start1
print(f"Numba took {end1:.2f} seconds")

Benchmark

On my machine, the initial Pandas code (in EDIT 2) takes 70 ms. The Numba compute function takes only 0.002 ms and the wrapping code calling to_numpy takes only 0.020 ms! This means the current Numba code is roughly 3200 times faster on the provided small example Pandas dataframe! Moreover, the wrapping code should take a constant time independent of the input size. This means that only the time of the compute function should take a significant time in the measured section on a bigger dataframe.

In fact, I tried to create a 100 times bigger input dataframe with values of the small one repeated for 3 months and here are performance results on my machine:

Initial code in ("EDIT 2"):      6830   ms
This Numba code:                    0.1 ms

Thus, on a bigger dataset, Numba is 68300 time faster!

Thank you Jérôme for your solution! Unfortunately I cannot use numba because this has to be frozen and packed with pyinstaller, and the latter has still issues with numba (it's a known bug). Let me say I would have used your solution in a flash! — Filippo Antonio Capizzi, Feb 14 '22 at 09:00
Ok, sad. Note that Numba as an [AOT compiler](https://numba.readthedocs.io/en/stable/user/pycc.html). Alternatively, you can use *Cython*. The code should be very similar and certainly very fast too (at least >1000x). [Cython seems to be supported by PyInstaller](https://sodocumentation.net/cython/topic/6386/cython-bundling). — Jérôme Richard, Feb 14 '22 at 20:18

score 0 · Answer 4 · answered Feb 13 '22 at 23:20

Cython can solve slow looping issue and it is perfect with any type of calculation you need. Cython allows you to code a c-style program in Python: https://cython.readthedocs.io/en/latest/src/quickstart/overview.html In order to execute a cython function, you need to create a file, write cython functions and compile the file. The documentation helps you to better understand the syntax. Another solution is to try the code in a Jupyter notebook, following this steps:

Execute in a console/terminal pip install Cython
In Jupyter notebook, write and execute %load_ext Cython
Create your function inside a cell where the first row is a magic-keyword %%cython

I also noticed some replicated code; I tried to simplify as much as I understood

That is my solution, run in a Jupyter notebook: I suppose the type for each column is float32/int32, instead of float64/int64; It should be a reasonable choice

cons_prod['Consumption (kW)'] = cons_prod['Consumption (kW)'].astype(np.int32)
cons_prod['Renewable Production (kW)'] = cons_prod['Renewable Production (kW)'].astype(np.float32)
cons_prod['Auto-consumption (kW)']= cons_prod['Auto-consumption (kW)'].astype(np.float32)
cons_prod['Injection (kW)']= cons_prod['Injection (kW)'].astype(np.float32)
cons_prod['Grid Consumption (kW)']= cons_prod['Grid Consumption (kW)'].astype(np.float32)

cons_prod.dtypes

To increase the performance of cython, an explicit typing is crucial

%%cython
import cython
cimport cython
cimport numpy as np
import numpy as np

ctypedef np.float32_t float32_t
ctypedef np.int32_t int32_t


def func(
    float32_t[:] injection,
    float32_t[:] grid_consumption,
    float32_t[:] auto_consumption,
    int32_t[:] consumption,
    battery_parameters
    ):
    
    cdef int n_rows = len(injection)
    cdef int bp_energy = battery_parameters["energy"]
    cdef int bp_power = battery_parameters["power"]
    cdef float soct0 = 0.5 * bp_energy
    
    soc_dct = np.zeros(n_rows, dtype=np.float32)
    charge_dct = np.zeros(n_rows, dtype=np.float32)
    discharge_dct = np.zeros(n_rows, dtype=np.float32)
    acc_dct = np.zeros(n_rows, dtype=np.float32)
    lst_dct = np.zeros(n_rows, dtype=np.float32)
    inj_dct = np.zeros(n_rows, dtype=np.float32)
    gridcons_dct = np.zeros(n_rows, dtype=np.float32)
    agg_dct = np.zeros(n_rows, dtype=np.float32)
    
    cdef float32_t[:] soc_view = soc_dct
    cdef float32_t[:] charge_view = charge_dct
    cdef float32_t[:] discharge_view = discharge_dct
    cdef float32_t[:] acc_view = acc_dct
    cdef float32_t[:] lst_view = lst_dct
    cdef float32_t[:] inj_view = inj_dct
    cdef float32_t[:] gridcons_view = gridcons_dct
    cdef float32_t[:] agg_view = agg_dct
    
    cdef int i
    
    # L = injection[i]
    # m = grid_consumption[i]
    # k = auto_consumption[i]
    # f = consumption[i]

    for i in range(0, n_rows):
        if i == 0:
            # State of charge (kW)
            soc_view[i] = soct0
        else:
            # State of charge (kW)
            soc_view[i] = soc_view[i-1] + (charge_view[i-1] + discharge_view[i-1]) / 4
        
        # Charge (kW)
        charge_view[i] = min(
            injection[i], (bp_energy - soc_view[i]) * 4,
            bp_power
        ) * battery_parameters['efficiency'] if injection[i]>=0 else 0
        
        # Discharge (kW)
        discharge_view[i] = -min(
            grid_consumption[i],
            4*soc_view[i] - 4*(battery_parameters['minsoc']*bp_energy),
            bp_power
        ) if grid_consumption[i] >= 0 else 0
    
        # Auto-consumption NEW (kW)
        auto_consumption[i] - discharge_view[i]
        
        # Lost injection due to battery efficiency (kW)
        lst_view[i] = (charge_view[i] / battery_parameters["efficiency"]) - charge_view[i]
        
        # Injection NEW (kW)
        inj_view[i] = injection[i] - charge_view[i] - lst_view[i]
        
        # Grid Consumption NEW (kW)
        gridcons_view[i] = consumption[i] - acc_view[i]
        
        # Additional Auto-consumption through battery (kW)
        agg_view[i] = acc_view[i] - auto_consumption[i]
        
    return soc_dct, charge_dct, discharge_dct, acc_dct, lst_dct, inj_dct, gridcons_dct,agg_dct

Benchmarks

Initial code: 15.6ms

Cython code: 349 µs

How to improve for-loop on DataFrame with row-wise calculation depending on previous row?

4 Answers4

Benchmark