11

I'm trying to make a function which concatenate multiple list if one element is the same in 2 or more different list.

Example :

[[1,2],[3,4,5],[0,4]] would become [[1,2],[0,3,4,5]

[[1],[1,2],[0,2]] would become [[0,1,2]]

[[1, 2], [2, 3], [3, 4]] would become [[1,2,3,4]]

In fact we just regroup the list if they have a common element and we delete one of the two element. The finals lists must have unique elements.

I tried to make the following function. It works, but when using big list (around 100 or 200 lists of list), I got the following recursion error : RecursionError: maximum recursion depth exceeded while getting the repr of an object

def concat(L):
   break_cond = False
   print(L)
   for L1 in L:
       for L2 in L:
           if (bool(set(L1) & set(L2)) and L1 != L2):
               break_cond = True
   if (break_cond):
       i, j = 0, 0
       while i < len(L):
           while j < len(L):
               if (bool(set(L[i]) & set(L[j])) and i != j):
                   L[i] = sorted(L[i] + list(set(L[j]) - set(L[i])))
                   L.pop(j)
               j += 1
           i += 1
       return concat(L)

Moreover, I would like to do it using only basic python and not that much library. Any idea ? Thanks

Example of list where I get the error :

[[0, 64], [1, 120, 172], [2, 130], [3, 81, 102], [5, 126], [6, 176], [7, 21, 94], [8, 111, 167], [9, 53, 60, 138], [10, 102, 179], [11, 45, 72], [12, 53, 129], [14, 35, 40, 58, 188], [15, 86], [18, 70, 94], [19, 28], [20, 152], [21, 24], [22, 143, 154], [23, 110, 171], [24, 102, 144], [25, 73, 106, 187], [26, 189], [28, 114, 137], [29, 148], [30, 39], [31, 159], [33, 44, 132, 139], [34, 81, 100, 136, 185], [35, 53], [37, 61, 138], [38, 144, 147, 165], [41, 42, 174], [42, 74, 107, 162], [43, 99, 123], [44, 71, 122, 126], [45, 74, 144], [47, 94, 151], [48, 114, 133], [49, 130, 144], [50, 51], [51, 187], [52, 124, 142, 146, 167, 184], [54, 97], [55, 94], [56, 88, 128, 166], [57, 63, 80], [59, 89], [60, 106, 134, 142], [61, 128, 145], [62, 70], [63, 73, 76, 101, 106], [64, 80, 176], [65, 187, 198], [66, 111, 131, 150], [67, 97, 128, 159], [68, 85, 128], [69, 85, 169], [70, 182], [71, 123], [72, 85, 94], [73, 112, 161], [74, 93, 124, 151, 191], [75, 163], [76, 99, 106, 129, 138, 152, 179], [77, 89, 92], [78, 146, 156], [79, 182], [82, 87, 130, 179], [83, 148], [84, 110, 146], [85, 98, 137, 177], [86, 198], [87, 101], [88, 134, 149], [89, 99, 107, 130, 193], [93, 147], [95, 193], [96, 98, 109], [104, 105], [106, 115, 154, 167, 190], [107, 185, 193], [111, 144, 153], [112, 128, 188], [114, 136], [115, 146], [118, 195], [119, 152], [121, 182], [124, 129, 177], [125, 156], [126, 194], [127, 198], [128, 149], [129, 153], [130, 164, 196], [132, 140], [133, 181], [135, 165, 170, 171], [136, 145], [141, 162], [142, 170, 187], [147, 171], [148, 173], [150, 180], [153, 191], [154, 196], [156, 165], [157, 177], [158, 159], [159, 172], [161, 166], [162, 192], [164, 184, 197], [172, 199], [186, 197], [187, 192]]
Kamloops
  • 407
  • 4
  • 13

6 Answers6

7

As mentioned by @ScottBoston this is a graph problem, known as connected components, I suggest you used networkx as indicated by @ScottBoston, in case you cannot here is a version without networkx:

from itertools import combinations


def bfs(graph, start):
    visited, queue = set(), [start]
    while queue:
        vertex = queue.pop(0)
        if vertex not in visited:
            visited.add(vertex)
            queue.extend(graph[vertex] - visited)
    return visited


def connected_components(G):
    seen = set()
    for v in G:
        if v not in seen:
            c = set(bfs(G, v))
            yield c
            seen.update(c)


def graph(edge_list):
    result = {}
    for source, target in edge_list:
        result.setdefault(source, set()).add(target)
        result.setdefault(target, set()).add(source)
    return result


def concat(l):
    edges = []
    s = list(map(set, l))
    for i, j in combinations(range(len(s)), r=2):
        if s[i].intersection(s[j]):
            edges.append((i, j))
    G = graph(edges)

    result = []
    unassigned = list(range(len(s)))
    for component in connected_components(G):
        union = set().union(*(s[i] for i in component))
        result.append(sorted(union))
        unassigned = [i for i in unassigned if i not in component]

    result.extend(map(sorted, (s[i] for i in unassigned)))

    return result


print(concat([[1, 2], [3, 4, 5], [0, 4]]))
print(concat([[1], [1, 2], [0, 2]]))
print(concat([[1, 2], [2, 3], [3, 4]]))

Output

[[0, 3, 4, 5], [1, 2]]
[[0, 1, 2]]
[[1, 2, 3, 4]]
Dani Mesejo
  • 61,499
  • 6
  • 49
  • 76
  • 3
    Thanks a lot ! I already knew about networkx, actually the problem I want to solve is a graph problem and that's why I needed a concat function. But I needed to do it from scratch, thanks again ! – Kamloops Dec 27 '18 at 17:24
6

You can use networkx library because is a graph theory and connected components problem:

import networkx as nx

l = [[1,2],[3,4,5],[0,4]]
#l = [[1],[1,2],[0,2]]
#l = [[1, 2], [2, 3], [3, 4]]

G = nx.Graph()

#Add nodes to Graph    
G.add_nodes_from(sum(l, []))

#Create edges from list of nodes
q = [[(s[i],s[i+1]) for i in range(len(s)-1)] for s in l]

for i in q:

    #Add edges to Graph
    G.add_edges_from(i)

#Find all connnected components in graph and list nodes for each component
[list(i) for i in nx.connected_components(G)]

Output:

[[1, 2], [0, 3, 4, 5]]

Output if uncomment line 2 and comment line 1:

[[0, 1, 2]]

Likewise for line 3:

[[1, 2, 3, 4]]
Scott Boston
  • 147,308
  • 15
  • 139
  • 187
1

Here's an iterative approach, should be about as efficient as you can probably get in pure python. One thing is having to spend an extra pass removing duplicates at the end.

original_list = [[1,2],[3,4,5],[0,4]]

mapping = {}
rev_mapping = {}

for i, candidate in enumerate(original_list):
    sentinel = -1
    for item in candidate:
        if mapping.get(item, -1) != -1:
            merge_pos = mapping[item]
            #update previous list with all new candidates
            for item in candidate:
                mapping[item] = merge_pos
            rev_mapping[merge_pos].extend(candidate)
            break
    else:
        for item in candidate:
            mapping[item] = i
        rev_mapping.setdefault(i, []).extend(candidate)

result = [list(set(item)) for item in rev_mapping.values()]
print(result)

Output:

[[1, 2], [0, 3, 4, 5]]
Paritosh Singh
  • 6,034
  • 2
  • 14
  • 33
0

You can use a recursive version of the breadth-first search with no imports:

def group_vals(d, current, _groups, _seen, _master_seen):
  if not any(set(current)&set(i) for i in d if i not in _seen):
    yield list({i for b in _groups for i in b})
    for i in d:
       if i not in _master_seen:
         yield from group_vals(d, i, [i], [i], _master_seen+[i])
  else:
    for i in d:
       if i not in _seen and set(current)&set(i):
         yield from group_vals(d, i, _groups+[i], _seen+[i], _master_seen+[i])

def join_data(_data):
  _final_result = list(group_vals(_data, _data[0], [_data[0]], [_data[0]], []))
  return [a for i, a in enumerate(_final_result) if a not in _final_result[:i]]

c = [[[1,2],[3,4,5],[0,4]], [[1],[1,2],[0,2]], [[1, 2], [2, 3], [3, 4]]]
print(list(map(join_data, c)))

Output:

[
 [[1, 2], [0, 3, 4, 5]], 
  [[0, 1, 2]], 
 [[1, 2, 3, 4]]
]
Ajax1234
  • 69,937
  • 8
  • 61
  • 102
  • I can't get your code to work on the bigger test case that the OP has posted. Also for other readers who are running this code in Python 2.7, you have to convert the `yield from` statement as shown [here](https://stackoverflow.com/a/17581397/1586200). – Autonomous Dec 27 '18 at 17:24
  • @ParagS.Chandakkar the recursion introduces a slight delay for longer input lists. Your answer fails on the last list the OP posted BTW. – Ajax1234 Dec 27 '18 at 17:29
  • Yes, I know, even my answer fails for the longer test case. I was just wondering where the flaw is in my approach. I looked at your approach and it also faces the same issue. – Autonomous Dec 27 '18 at 17:31
0

if you want it in simple form here is solution :

def concate(l):
    len_l = len(l)
    i = 0
    while i < (len_l - 1):
        for j in range(i + 1, len_l):

            # i,j iterate over all pairs of l's elements including new 
            # elements from merged pairs. We use len_l because len(l)
            # may change as we iterate

            i_set = set(l[i])
            j_set = set(l[j])

            if len(i_set.intersection(j_set)) > 0:
                # Remove these two from list
                l.pop(j)
                l.pop(i)

                # Merge them and append to the orig. list
                ij_union = list(i_set.union(j_set))
                l.append(ij_union)

                # len(l) has changed
                len_l -= 1

                # adjust 'i' because elements shifted
                i -= 1

                # abort inner loop, continue with next l[i]
                break

        i += 1
    return l
sahasrara62
  • 10,069
  • 3
  • 29
  • 44
0

If you want to see how the algorithm works, you can use this script which uses the connectivity matrix:

import numpy

def Concatenate(L):
    result = []
    Ls_length = len(L)
    conn_mat = numpy.zeros( [Ls_length, Ls_length] )  # you can use a list of lists instead of a numpy array
    check_vector = numpy.zeros( Ls_length )           # you can use a list instead of a numpy array

    idx1 = 0
    while idx1 < Ls_length:
        idx2 = idx1 + 1
        conn_mat[idx1,idx1] = 1   # the diaginal is always 1 since every set intersects itself.
        while idx2 < Ls_length:
            if bool(set(L[idx1]) & set(L[idx2]) ): # 1 if the sets idx1 idx2 intersect, and 0 if they don't.
                conn_mat[idx1,idx2] = 1            # this is clearly a symetric matrix. 
                conn_mat[idx2,idx1] = 1
            idx2 += 1
        idx1 += 1
    print (conn_mat)

    idx = 0
    while idx < Ls_length:
        if check_vector[idx] == 1:     # check if we already concatenate the idx element of L.
            idx += 1
            continue

        connected = GetAllPositiveIntersections(idx, conn_mat, Ls_length)
        r = set()
        for idx_ in connected:
            r = r.union(set(L[idx_]))
            check_vector[idx_] = 1

        result.append(list(r))

    return result

def GetAllPositiveIntersections(idx, conn_mat, Ls_length):
    # the elements that intersect idx are coded with 1s in the ids' row (or column, since it's a symetric matrix) of conn_mat. 
    connected = [idx]
    i = 0
    idx_ = idx
    while i < len(connected):
        j = 0
        while j < Ls_length:
            if bool(conn_mat[idx_][j]):
                if j not in connected: connected.append(j)   
            j += 1
        i += 1
        if i < len(connected): idx_ = connected[i]

    return list(set(connected))

Then you just:

L = [[1,2],[3,4,5],[0,4]]
r = Concatenate(L)
print(r)
mm_
  • 1,566
  • 2
  • 18
  • 37