I'm currently working within a dataframe and although the code works, the current routine takes around 1 hour to complete. Based upon claims_data having 130K rows and contract_data_shaped having 600 separate rows. Rough outline of routine is as follows:
- Gather the data into df
- Create a list of 600 custom objects taking the object attributes from an underlying dataframe.
- Apply a UDF to each of the 600 objects, element wise, across all 130K rows of claims_data and create columns for each output, again element-wise using .apply()
I know this is probably awful code but could somebody please indicate how I could perhaps speed this up? Is there some other way to write this function so that it vectorises the operations somehow please? Class and UDF as follows held within RI.py:
class XOL:
"""This is a generic which returns non-proportional recoveries"""
def __init__(self, **kwargs):
self.name = kwargs.get("name")
self.attachment = float(kwargs.get("attachment"))
self.limit = float(kwargs.get("limit"))
self.classes_covered = kwargs.get("classes_covered")
self.uwy_covered = float(kwargs.get("uwy_covered"))
self.placed = float(kwargs.get("placed"))
self.currency_covered = kwargs.get("currency_covered")
def calculate_recovery(self, **kwargs):
# global recovery
if (float(kwargs["gross_loss"]) >= self.attachment) & \
(kwargs["actuarial_class"] in self.classes_covered) & \
(kwargs["uwy"] == self.uwy_covered) & \
(kwargs["currency"] == self.currency_covered
):
return (min(kwargs["gross_loss"], self.limit) - self.attachment) * self.placed
else:
return 0
Main.py code as follows:
axis=1)
from RI import XOL
import pandas as pd
import openpyxl
import xlwings as xw
claims_data_path = "S:/Actuarial/Reserving/2022/2205/Financial Lines/Five Segments/Data/Reserving Policy Detail - Financial Lines 2205 v1.xlsx"
contract_details_path = "P:/WIP/Net Down/FinLinesContractDetails.xlsx"
fx_rates = pd.DataFrame(xw.Book(claims_data_path, update_links=False).sheets["Lookups"].range("H4:I180").value)
fx_rates.columns = fx_rates.iloc[0]
fx_rates = fx_rates[1:]
fx_rates = {row[0]: row[1] for index, row in fx_rates.iterrows()}
# Map the existing Actuarial classes to the RI mapping of three buckets
required_class_dict = {"Financial Lines - Commercial D&O": "D&O",
"Financial Lines - TRI": "W&I/TRI",
"Financial Lines - Cyber": "Cyber",
"Financial Lines - Commericial PI": "D&O",
"Financial Lines - Financial Institutions": "D&O",
}
claims_data = pd.DataFrame(xw.Book(claims_data_path, update_links=False).sheets["Reserving Policy Detail"].range("A3:BA128292").value)
claims_data.columns = claims_data.iloc[0]
claims_data = claims_data[1:]
claims_data["Adjusted Class"] = claims_data["Actuarial Class"].map(required_class_dict)
# multiply all relevant amount column by -1
col_list = ["Gross_Paid_Losses",
"Gross_Outstanding_Losses",
"Gross_Incurred_Losses",
"Net_Paid_Losses",
"Net_Outstanding_Losses",
"Net_Incurred_Losses"]
# swap sign convention of claims numbers
for col in col_list:
claims_data[col] *= -1
contract_data = pd.read_excel(contract_details_path, header=[0, 1], index_col=[0, 1, 2])
# Remove zero rows from the contract data mapping
contract_data = contract_data.loc[(contract_data.sum(axis=1) != 0), :]
contract_data_shaped = contract_data.stack(level=1)\
.reset_index()\
.rename(columns={"level_0": "Alias",
"level_1": "Treaty Year",
"level_2": "Currency",
"Currency": "Class"}
)
contract_data_shaped.to_csv("P:/WIP/Net Down/contract_data_shaped.csv", index=True, header=True)
# Chop the contract data down to save processing time - those contract terms = 0 placed
contract_data_shaped = contract_data_shaped[contract_data_shaped["Placed"] != 0]
# create a list of XOL contract objects, each defined by their attributes as loaded by the input files
contract_obj_list = [XOL(name=index,
attachment=row["Attachment"],
limit=row["Limit"],
placed=row["Placed"],
classes_covered=row["Class"],
uwy_covered=row["Treaty Year"],
currency_covered=row["Currency"])
for index, row in contract_data_shaped.iterrows()
]
# apply each contract in the contract list of instantiated XOL objects to each and every claim
recovering_contract_list = []
for contract in contract_obj_list:
claims_data[f"Contract_ID_{contract.name}"] = claims_data.apply(lambda x: contract.calculate_recovery(
gross_loss=float(x["Gross_Incurred_Losses"]),
actuarial_class=x["Adjusted Class"],
uwy=x["Underwriting_Year"],
currency=x["Currency"]),
axis=1)