I have the following code :
from __future__ import division
import pyodbc
import csv
import pandas as pd
import numpy as np
count = 1
dsn = "DRIVER={SQL Server};server=XXXX;database=ABCD"
conn = pyodbc.connect(dsn)
cursor = conn.cursor()
#policy = cursor.execute("select distinct client_num, py_type, py_num, max(ex_date_n) as ex_date_n, max(ef_date_n) as ef_date_n from dbo.policy group by client_num, py_type, py_num")
policy = cursor.execute("select distinct client_num, py_type, py_num, max(ex_date_n) as ex_date_n,max(ef_date_n) as ef_date_n from dbo.policy where client_num = 62961 and py_type = 'A' and py_num = '01' group by client_num, py_type, py_num")
results1 = cursor.fetchall()
for row in results1:
pol_client_num = row.client_num.strip()
pol_py_type = row.py_type.strip()
pol_py_num = row.py_num.strip()
pol_number = pol_client_num+pol_py_type+pol_py_num
pol_exp_date = row.ex_date_n
pol_eff_date = row.ef_date_n
#related = cursor.execute("select distinct a.client_num,a.py_type,a.py_num,a.rclient_num,a.py_rtype,a.rpy_num from policy_replace a where a.client_num = "+pol_client_num+" and a.py_type = '"+pol_py_type+"' and a.py_num = '"+pol_py_num+"'")
related = cursor.execute("select distinct a.client_num,a.py_type,a.py_num,a.rclient_num,a.py_rtype,a.rpy_num from policy_replace a where a.client_num = 62961 and a.py_type = 'A' and a.py_num = 01")
results2 = cursor.fetchall()
for row in results2:
rel_client_num = row.rclient_num.strip()
rel_py_type = row.py_rtype.strip()
rel_py_num = row.rpy_num.strip()
rel_pol_number = rel_client_num+rel_py_type+rel_py_num
related_dates = cursor.execute("select max(b.ex_date_n) as ex_date_n, b.ef_date_n from policy b where b.ex_date_n >= 20200225 and b.client_num = "+rel_client_num+" and b.py_type = '"+rel_py_type+"' and b.py_num = '"+rel_py_num+"' group by b.ef_date_n")
#related_dates = cursor.execute("select max(b.ex_date_n) as ex_date_n, b.ef_date_n from policy b where b.client_num = 37916 and b.py_type = 'F' and b.py_num = 05 group by b.ef_date_n")
results3 = cursor.fetchall()
for row in results3:
rel_exp_date = row.ex_date_n
rel_eff_date = row.ef_date_n
final_result1 = (pol_number,pol_exp_date,pol_eff_date,rel_pol_number,rel_exp_date,rel_eff_date)
df = pd.DataFrame(final_result1)
df = df.transpose()
df.columns = ['pol_number','pol_exp_date','pol_eff_date','rel_pol_number','rel_exp_date','rel_eff_date']
df_grouped = df.groupby('pol_number')['rel_exp_date'].min()
print(df_grouped)
print('done')
On execution, the following is the data output: For results1,
'62961 ', 'A', '01', 20210429, 20200429
For results2,
('62961 ', 'A', '01', '62961', 'A', '02'),
('62961 ', 'A', '01', '62961', 'A', '03'),
('62961 ', 'A', '01', '63832', 'A', '01')
For results3,
[(20201107, 20191107)]
[(20210407, 20200407)]
[(20200719, 20190719)]
The expected output is as follows:
'69621','A','01',20210429,20200429,'69621','A','02',20201107,20191107,'62961','A','03',20210407,20200407,'63832','A','01',20200719,20190719,'63832','A','01',20200719,20190719
The format of the output required is : for every row in results1 --- every related row in results2 --- every related date in results3 --- minimum of the exp_date across all rows and the corresponding pol_num/rel_pol_num This is the reason I am trying to use the df.min() function to get the min across all the exp_date's. But, it doesn't seem to do the job since I am possibly missing something. I also tried axis = 0 as mentioned in the comments but it didn't work. Any direction is appreciated.