After long time and with better knowledge I decided to compare several methods and a winner is:
np.fromiter(chain(*g.get_edgelist()), np.dtype('i'), count=g.ecount()).reshape(-1, 2)
It is more than 4x
times faster than np.array(g.get_edgelist())
! So importance of structuring output array can't be ignored here.
def edges_asarray1(g):
return np.array(g.get_edgelist())
def edges_asarray2(g):
return np.array([n.tuple for n in g.es])
def edges_fromiter1A(g):
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(g.get_edgelist(), dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices
def edges_fromiter1B(g):
index = np.fromiter(chain(*g.get_edgelist()), np.dtype('i'), count=2*g.ecount())
return index.reshape(-1, 2)
fig = plt.figure(figsize=(10, 10))
def edges_fromiter2A(g):
dt = np.dtype([('', np.intp)]*2)
indices = np.fromiter(map(lambda x: x.tuple, g.es), dt)
indices = indices.view(np.intp).reshape(-1, 2)
return indices
def edges_fromiter2B(g):
index = np.fromiter(chain(*map(lambda x: x.tuple, g.es)), np.dtype('i'), count=2*g.ecount())
return index.reshape(-1, 2)
plt.grid(True, which="both")
out = perfplot.bench(
setup = lambda x: ig.Graph.Erdos_Renyi(n=x, m=5*x),
kernels = [edges_asarray1, edges_asarray2, edges_fromiter1A, edges_fromiter1B, edges_fromiter2A, edges_fromiter2B],
n_range = [2 ** k for k in range(4, 21)],
xlabel = 'n',
title = 'testing graph with n nodes and 5*n edges',
show_progress = True)
out.show()
