the code below is outputting the total months a unique user id is in, and counting the total. What I need is to calculate active users based on usage level >= 100 for the last 12 months using the log-in month and year.
import pandas as pd
import numpy as ny
import sqldf as sqldf
#raw_data="data_python.xlsx"
df_raw = pd.read_excel(r"data_python.xlsx")
print (df_raw)
result = df_raw.groupby(["Usage","Month","Year"])["User ID"].nunique()
Active_Users = df_raw.groupby(["Usage","Month","Year"])["User ID"].apply(lambda x: frozenset(x.values))
Active_Users = Active_Users.reset_index()
Active_Users = pd.concat([Active_Users] + [Active_Users["Usage"].shift(i) for i in range(1, 12)], axis ="columns")
Active_Users.columns = ["User ID","Month","Year"] + [ f"shift_{i}" for i in range(12) ]
def count_unique(row):
total_set = frozenset()
columns = [ f"shift_{i}" for i in range(12) ]
for col in columns:
if row.get(col) and type(row.get(col)) == frozenset:
total_set = total_set | row.get(col)
return len(total_set)
Active_Users["T_Months_Usage"] = Active_Users.apply(count_unique, axis=1)
Active_Users
I have tried changing the usage and month variables but it is still counting months rather than individuals usage how do I supplement these 2 fields?