df = pl.DataFrame({
'txn_id': ['0x5...60', '0x1...6d', '0x9...84', '0xc...25', '0x5...50', '0xe...14', '0x2...f3', '0xe...75', '0x3...95', '0x4...4e'],
'txn_grouping': ['0x4...dd', '0x4...dd', '0xf...e2', '0x4...17', '0xe...8b', '0x6...4e', '0xe...da', '0xf...f2', '0x1...21', '0xc...cf'],
'ts': [1438918233, 1438918613, 1438918630, 1438918983, 1438919175, 1438918630, 1438919451, 1438919461, 1438919491, 1438919571]
})
df = df.select([
pl.exclude('ts'),
(pl.col("ts") * 1000).cast(pl.Datetime('ms'))
]).groupby_dynamic("ts", every = "5m").agg([
pl.n_unique("txn_id").alias("num_txs_per_5m"),
])
Ideally, i'd like to retain the original dataframe, and add a new column called 'num_txs_per_5m'. I can do this kind of window operation with non timestamp based fields,
df.select([pl.col('txn_id').count().over('txn_grouping'), 'txn_grouping'])
is it possible in polars to do this style of operation via the .over()
operation?