I have a df as shown below
df = pd.DataFrame({'Session': ['s1', 's1', 's1', 's1', 's1', 's1', 's1',
's1', 's1', 's1', 's1', 's1', 's1', 's1', 's1'],
'slot_num': [1, 2, 3, 3, 4, 4, 5, 5, 6, 7, 7, 8, 8, 9, 9]})
df:
Session slot_num
s1 1
s1 2
s1 3
s1 3
s1 4
s1 4
s1 5
s1 5
s1 6
s1 7
s1 7
s1 8
s1 8
s1 9
s1 9
From the above I would like to create a column called service_time randomly with mean exactly 20, maximum is 25 and minimum 2 as quickly as possible.
I tried below code but it is not giving the mean exactly 20.
Note: Service time should contain whole numbers only
# generate service time with mean = 20, min = 2 and max = 25
def gen_avg(n, expected_avg=20, a=2, b=25):
l = np.random.randint(a, b, size=n)
while True:
if np.mean(l) == expected_avg:
break
while np.mean(l) > expected_avg:
c = np.random.choice(np.where(l>expected_avg)[0])
l[c] = np.random.randint(a, expected_avg+1)
while np.mean(l) < expected_avg:
c = np.random.choice(np.where(l<expected_avg)[0])
l[c] = np.random.randint(expected_avg, b)
return l
df['service_time'] = df.groupby('Session')['Session'].transform(lambda x: gen_avg(len(x)))
I tried below one as well but it is taking very long time
#https://stackoverflow.com/a/39435600/2901002
def gen_avg(n, expected_avg=20, a=5, b=25):
while True:
l = np.random.randint(a, b, size=n)
avg = np.mean(l)
if avg == expected_avg:
return l
df['service_time'] = df.groupby('Session')['Session'].transform(lambda x: gen_avg(len(x)))