2

I have hierarchical trees of the form:

(((A,B),(C,D)),E)

Is there a simple way to re-arrange/plot this (e.g. Python)?

enter image description here

pnuts
  • 58,317
  • 11
  • 87
  • 139
hello_there_andy
  • 2,039
  • 2
  • 21
  • 51

4 Answers4

2

Run this series of replacements:

  • ( -> <ul><li>
  • ) -> </ul></li>
  • , -> </li><li>

Then open it in the browser


Or if it's a python object, use pprint:

>>> x = ((("A","B"),("C","D")),"E")
>>> from pprint import pprint
>>> pprint(x, width=1)
((('A',
   'B'),
  ('C',
   'D')),
 'E')

Or a custom python solution:

from itertools import izip

def first_then(first, then):
    yield first
    while True:
        yield then

def tree_lines(x):
    if type(x) is tuple:
        if len(x) == 1:
            # singular tuple
            for p, l in izip(first_then('--', '  '), tree_lines(x[0])):
                yield p + l
        else:
            first, rest, last = x[0], x[1:-1], x[-1]

            # first entry
            for p, l in izip(first_then('T-', '| '), tree_lines(first)):
                yield p + l

            # middle entries
            for y in rest:
                for p, l in izip(first_then('>-', '| '), tree_lines(y)):
                    yield p + l

            # last entries
            for p, l in izip(first_then('L-', '  '), tree_lines(last)):
                yield p + l
    else:
        yield str(x)

x = ((('A','B'),('C','D')),'E')

for l in tree_lines(x):
    print(l)
Eric
  • 95,302
  • 53
  • 242
  • 374
1

A while back I wrote something for making textual representations of trees. It might be suitable here.

class Node:
    def __init__(self, value):
        self.value = value
        self.children = []


pipe = chr(179)
t = chr(195)
l = chr(192)
backwards_r = chr(191)

def printable(node, seq_is_last_child = []):
    """returns a string representation of the given node"""
    ret = ""
    if seq_is_last_child:
        for b in seq_is_last_child[:-1]:
            if b:
                ret = ret + "  "
            else:
                ret = ret + pipe + " "
        if seq_is_last_child[-1]:
            ret = ret + l + " "
        else:
            ret = ret + t + " "
    ret = ret + node.value
    for idx, c in enumerate(node.children):
        ret = ret + "\n" + printable(c, seq_is_last_child + [idx == len(node.children)-1])
    return ret

def make_node(t):
    """creates a Node system from a nested tuple"""
    ret = Node(backwards_r)
    for child in t:
        if isinstance(child, str):
            ret.children.append(Node(child))
        else:
            ret.children.append(make_node(child))
    return ret

x = ((('A','B'),('C','D')),'E')
print printable(make_node(x))

Result:

┐
├ ┐
│ ├ ┐
│ │ ├ A
│ │ └ B
│ └ ┐
│   ├ C
│   └ D
└ E

Edit: Unicode version:

class Node:
    def __init__(self, value):
        self.value = value
        self.children = []

def printable(node, seq_is_last_child = []):
    """returns a string representation of the given node"""
    ret = ""
    if seq_is_last_child:
        for b in seq_is_last_child[:-1]:
            if b:
                ret = ret + "  "
            else:
                ret = ret + "│ "
        if seq_is_last_child[-1]:
            ret = ret + "└ "
        else:
            ret = ret + "├ "
    ret = ret + node.value
    for idx, c in enumerate(node.children):
        ret = ret + "\n" + printable(c, seq_is_last_child + [idx == len(node.children)-1])
    return ret

def make_node(t):
    """creates a Node system from a nested tuple"""
    ret = Node("┐")
    for child in t:
        if isinstance(child, str):
            ret.children.append(Node(child))
        else:
            ret.children.append(make_node(child))
    return ret

x = ((('A','B'),('C','D')),'E')
print printable(make_node(x))
Kevin
  • 74,910
  • 12
  • 133
  • 166
1

You can use an iterative function which finds the depth and height of each point:

def locate(xs, depth, cnt):
    from functools import partial
    if isinstance(xs, str):
        return dict(depth=depth, height=- next(cnt), inner=None, txt=xs)
    else:
        fn = partial(locate, depth=depth+1, cnt=cnt)
        loc = list(map(fn, xs))
        height = np.mean([x['height'] for x in loc])
        return dict(depth=depth, height=height, inner=loc, txt=None)

Above function returns a dictionary, and we need another function which walks through this dictionary and plots each node:

def walk(loc, ax):
    col, lw = 'DarkBlue', 2
    x, y, inner, txt = map(loc.get, ['depth', 'height', 'inner', 'txt'])
    if not inner:
        ax.text(x, y, ' ' + txt, ha='left', va='center', size='large')
        return y
    else:
        ys =[walk(t, ax) for t in inner]
        for y1 in ys:
            ax.plot([x, x+1], [y1, y1], color=col, linewidth=lw)
        ax.plot([x, x], [min(ys), max(ys)], color=col, linewidth=lw)
        return y

The location function is invoked at a top level by passing a count iterator, and returns a dictionary which includes all the necessary information to plot each level:

from itertools import count
xs = ((('A','B'),('C','D')),'E',)
loc = locate(xs, 0, count())

The dictionary along with an axis is passed to walk function:

fig = plt.figure(figsize=(2, 3))
ax = fig.add_axes([.05, .05, .9, .9])                
walk(loc, ax)

plt.axis('off')
xl, yl = ax.get_xlim(), ax.get_ylim()
ax.set_xlim(xl[0] - .05, xl[1] + .05)
ax.set_ylim(yl[0] - .05, yl[1] + .05)

The result would be:

tree1

As another example:

xs = ((('A','B','C','D'),('E'),('F1','F2'),'G'),(('H1','H2'),('I','J','K'),'L'))

tree2

behzad.nouri
  • 74,723
  • 18
  • 126
  • 124
1

Using scipy's cluster.hierarchy.dendrogram:

import re
import numpy as np
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy as hier
import scipy.spatial.distance as dist
import itertools as IT

def parse_nested(text, left=r'[(]', right=r'[)]', sep=r','):
    """ http://stackoverflow.com/a/17141899/190597 (falsetru) """
    pat = r'({}|{}|{})'.format(left, right, sep)
    tokens = re.split(pat, text)    
    stack = [[]]
    for x in tokens:
        if not x: continue
        if re.match(sep, x): continue
        if re.match(left, x):
            stack[-1].append([])
            stack.append(stack[-1][-1])
        elif re.match(right, x):
            stack.pop()
            if not stack:
                raise ValueError('error: opening bracket is missing')
        else:
            stack[-1].append(x)
    if len(stack) > 1:
        print(stack)
        raise ValueError('error: closing bracket is missing')
    return stack.pop()

def place_points(datalist, x=IT.count(), y=1):
    retval = []
    for item in datalist:
        if isinstance(item, list):
            next(x)
            retval.extend(place_points(item, x=x, y=y*2.5))
        else:
            retval.append([item, (next(x), y)])
    return retval

# data = '(((A,B,G),(C,D,F)),(E,(H,I,J,(K,L,M,N))))'
data = '((A,B),(C,D),E)'
labels, points = zip(*place_points(parse_nested(data)))
d = dist.pdist(points)
linkage_matrix = hier.linkage(d)
P = hier.dendrogram(linkage_matrix, labels=labels)
plt.show()

enter image description here

unutbu
  • 842,883
  • 184
  • 1,785
  • 1,677