I've seen the question and answer relating to the User Warning: Boolean Series key will be reindexed to match DataFrame index
I've tried breaking out the commands individually as best I could but still get the UserWarning. What else could I do to make the below more explicit than it already is, and eliminate the warning on the last two statements (dfa_ = dfa[qux]
and dfb_ = dfb[qux]
)?
import random
import pandas as pd
fields = ['foo', 'bar', 'baz']
def randstring(x=3):
return ''.join([chr(random.randrange(65, 91)) for _ in range(x)])
def randstrlist(x=3):
return [randstring(x) for _ in range(5)]
values = {field: randstrlist() for field in fields}
def getitem():
return {field: random.choice(values[field]) for field in fields}
def getdata(setsize=100):
return [getitem() for _ in range(setsize)]
dfa = getdata()
dfb = getdata()
cols = dfa.columns.tolist()
dfa['qux'] = dfa.groupby(cols).cumcount()
dfb['qux'] = dfb.groupby(cols).cumcount()
cols = cols + ['qux']
dfo = pd.merge(dfa, dfb, on=cols, how='outer').drop('qux', 1)
qux = dfo.isnull().any(axis=1)
dfa_ = dfa[qux]
dfb_ = dfb[qux]