I have some classes that are continuous data and integers. I would like to sequentially number each common block of class integers sequentially as the index increases.
Here's my code including dummy data and what I've tried with ngroup
import pandas as pd
# Dummy data
data = pd.DataFrame({
'Index': range(36),
'KAGG': [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0],
'Desired result': [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6]
})
data["SEQ"] = data.groupby("KAGG").ngroup()+1
fig, (ax1, ax2, ax3) = plt.subplots(1,3, sharex=True, sharey=True, figsize=(10,8))
ax1.plot(data['KAGG'], data['Index'])
ax2.plot(data['SEQ'], data['Index'])
ax3.plot(data['Desired result'], data['Index'])
The current result and the desired result are pictured below, plots are from left to right: dummy data, my attempt, and the desired resut.
So the following answer worked:
code:
# Create the dummy data
data = pd.DataFrame({
'Index': range(36),
'KAGG': [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0],
'Desired result': [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6]
})
# Calculate the label increments
data['diff_or'] = data['KAGG'].diff().ne(0).astype(int)
data['labels'] = data['diff_or'].cumsum()
fig, (ax1, ax2) = plt.subplots(1,2, sharex=True, sharey=True, figsize=(10,8))
ax1.plot(data['KAGG'], data['Index'])
ax2.plot(data['labels'], data['Index']);