First thing, split fields by '/'
df.SUBJECT = df.SUBJECT.str.split('/')
df.STUDENT = df.STUDENT.str.split('/')
Then I use a function to explode rows. However, I had to segregate those rows that only had one student or subject.
def explode(df, columns):
idx = np.repeat(df.index, df[columns[0]].str.len())
a = df.T.reindex_axis(columns).values
concat = np.concatenate([np.concatenate(a[i]) for i in range(a.shape[0])])
p = pd.DataFrame(concat.reshape(a.shape[0], -1).T, idx, columns)
return pd.concat([df.drop(columns, axis=1), p], axis=1).reset_index(drop=True)
cond = df.STUDENT.str.len() == df.SUBJECT.str.len()
df_paired = df[cond]
df_unpard = df[~cond]
if not df_paired.empty:
df_paired = explode(df_paired, ['STUDENT','SUBJECT'])
if not df_unpard.empty:
df_unpard = explode(explode(df_unpard, ['STUDENT']), ['SUBJECT'])
Finally
pd.concat([df_paired, df_unpard], ignore_index=True)[df.columns]

Timing
piRSquared
%%timeit
df = df_.copy()
df.SUBJECT = df.SUBJECT.str.split('/')
df.STUDENT = df.STUDENT.str.split('/')
def explode(df, columns):
idx = np.repeat(df.index, df[columns[0]].str.len())
a = df.T.reindex_axis(columns).values
concat = np.concatenate([np.concatenate(a[i]) for i in range(a.shape[0])])
p = pd.DataFrame(concat.reshape(a.shape[0], -1).T, idx, columns)
return pd.concat([df.drop(columns, axis=1), p], axis=1).reset_index(drop=True)
cond = df.STUDENT.str.len() == df.SUBJECT.str.len()
df_paired = df[cond]
df_unpard = df[~cond]
if not df_paired.empty:
df_paired = explode(df_paired, ['STUDENT','SUBJECT'])
if not df_unpard.empty:
df_unpard = explode(explode(df_unpard, ['STUDENT']), ['SUBJECT'])
pd.concat([df_paired, df_unpard], ignore_index=True)[df.columns]
100 loops, best of 3: 7.76 ms per loop
jezrael
%%timeit
df = df_.copy()
s1 = df.SUBJECT.str.split('/', expand=True).stack()
s2 = df.STUDENT.str.split('/', expand=True).stack()
df1 = pd.concat([s1,s2], axis=1, keys=('SUBJECT','STUDENT')) \
.ffill() \
.reset_index(level=1, drop=True)
df.drop(['SUBJECT','STUDENT'], axis=1) \
.join(df1) \
.reset_index(drop=True)[['SUBJECT', 'STUDENT', 'CITY','STATE']]
100 loops, best of 3: 5.13 ms per loop