In order to achieve more generality in the code I am changing my previous answer to use a recursive solution. I also include your latest comment:
import pandas as pd
cols = ['network_id', 'agent_id', 'parent_id']
df = pd.DataFrame([[1, 10, 6],
[1, 11, 7],
[1, 12, 7],
[1, 13, 8],
[1, 6, 5],
[1, 7, 5],
[1, 8, 5],
[2, 104,101],
[2, 105,101],
[2, 106,101],
[2, 107,102],
[2, 108,103],
[2, 101,100],
[2, 102,100],
[2, 103,100]], columns = cols)
# For each network, I create a list of all nodes,
# including boths nodes that have children and those who don't
all_nodes_in_networks = df.groupby('network_id')\
.apply(lambda x: set(x[['agent_id', 'parent_id']].values.flatten()))\
.to_dict()
def find_children(df, node, network, explored_children = []):
'''
find direct children of a cerain node within a network
'''
children = df.query('parent_id==@node and network_id==@network')['agent_id'].values.tolist()
# Takes care of the case when we go back to an already visited node
new_children = set(children) - set(explored_children)
return new_children
def recursive_find_children(df, node, network, explored_children = []):
'''
recursively find all children of a certain node within a network
'''
new_children = find_children(df, node, network, explored_children)
# Exit Case, when we have arrived to a node with no children or we go back to an already visited node
if not new_children:
return set(explored_children)
else:
# Recursive call
# Add direct children and all children of children (to any nested level)
new_explored_children = set(explored_children).union(set(new_children))
return set(explored_children).union(*[recursive_find_children(df, nd,network, new_explored_children) for nd in new_children])
Now let's apply the function above to all nodes:
all_children = {network : {node : recursive_find_children(df, node, network) for node in all_nodes_in_networks[network]} for network in all_nodes_in_networks}
all_children
Out[113]:
{1: {5: {6L, 7L, 8L, 10L, 11L, 12L, 13L},
6: {10L},
7: {11L, 12L},
8: {13L},
10: set(),
11: set(),
12: set(),
13: set()},
2: {100: {101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L},
101: {104L, 105L, 106L},
102: {107L},
103: {108L},
104: set(),
105: set(),
106: set(),
107: set(),
108: set()}}
all_children_number = {network: {node: len(all_children[network][node]) for node in all_children[network]} for network in all_children}
all_children_number
Out[114]:
{1: {5: 7, 6: 1, 7: 2, 8: 1, 10: 0, 11: 0, 12: 0, 13: 0},
2: {100: 8, 101: 3, 102: 1, 103: 1, 104: 0, 105: 0, 106: 0, 107: 0, 108: 0}}
Hope this helps and that the code is clear enough.