Let's say you have two data frame columns that have some list:
What will the code in python looks like?
Let's say you have two data frame columns that have some list:
What will the code in python looks like?
df1 = pd.DataFrame(
{
'Id':[1,2,3],
'a':[[1,2,3],[4, 5],[6]]
},
)
df2 = pd.DataFrame(
{
'Id':[1,2,3],
'b':[[3],[6, 7],[8]]
},
)
df3 = pd.DataFrame(
{
'Id': [
[int(df1[pd.Series(map(tuple, df1['a'])) == tuple(i)].index.values)+1,
int(df1[pd.Series(map(tuple, df2['b'])) == tuple(j)].index.values)+1]
for j in df2['b'] for i in df1['a'] if set.intersection(set(i), set(j))],
'a&b': [list(set.union(set(i), set(j)))
for j in df2['b']
for i in df1['a']
if set.intersection(set(i), set(j))],
}
)
I know my solution is very messy but when I try to solve to in other way like without use of tuple compression which one is very easy then I got various kind of error like ValueError: Lengths must match to compare
which is common we all know about this type of
set.union()
to get superset of a and blist
of combined id columnsdropna()
and just columns you want loc[]
df = pd.DataFrame({"id":[1,2,3],
"a":[[1,2,3],[4,5],[6]]})
df1 = pd.DataFrame({"id":[1,2,3],
"b":[[3],[6,7],[8]]})
df2 = (df.assign(foo=1)
.merge(df1.assign(foo=1), on="foo")
.assign(**{"a&b":lambda dfa: np.where(dfa.apply(lambda r: any(x in r.a for x in r.b), axis=1),
dfa.apply(lambda r: list(set(r.a).union(r.b)), axis=1),
np.nan)})
.dropna()
.assign(id=lambda dfa: dfa.loc[:,["id_x","id_y"]].apply(list, axis=1))
.loc[:,["id","a&b"]]
)
id | a&b | |
---|---|---|
0 | [1, 1] | [1, 2, 3] |
7 | [3, 2] | [6, 7] |
list
before it's exploded so it can be easily reconstructed(df.assign(a_arr=df.a).explode("a")
.merge(df1.assign(b_arr=df1.b).explode("b"), left_on="a", right_on="b")
.assign(**{"id":lambda dfa: dfa.loc[:,["id_x","id_y"]].apply(list, axis=1),
"a&b":lambda dfa: dfa.loc[:,["a_arr","b_arr"]].apply(lambda r: list(set(r.a_arr).union(r.b_arr)), axis=1)})
.loc[:,["id","a&b"]]
)