1

I have a catalogue contains three columns and I would like to read them in an array and exclude some of data-points from my catalogue by choosing them from two different plots. If I would call the columns of my catalogue 'm', 'rh', and 'rg', I would like to exclude data-points by choosing different boxes in a 'm-rh' diagram and 'm-rg' plot. How should it be done? I came across this examples but it doesn't return any values like a numpy array?

Any help contains where should I start or how it should be done will be appreciated.

Community
  • 1
  • 1
Dalek
  • 4,168
  • 11
  • 48
  • 100
  • I know you want to do it via Python, but try [TOPCAT](http://www.star.bris.ac.uk/~mbt/topcat/). Here you can load your file and then plot them and select your subset, which you can then save. – Srivatsan Aug 10 '15 at 12:39

1 Answers1

4

Basically, you're asking how to interactively select points in a rectangular region.

There's a matplotlib widget which will handle part of this (interactively drawing a rectangle) for you: matplotlib.widgets.RectangleSelector. You'll need to handle what you want to do with the rectangular region, though.

As a basic example, let's interactively highlight points inside a rectangle (this is an inefficient way to do that, but we'll need to build on this example to do what you want). After the figure window is closed, this will print out the points not selected (~ operates as logical_not on numpy arrays):

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import RectangleSelector

def main():
    x, y = np.random.random((2, 100))
    fig, ax = plt.subplots()
    ax.scatter(x, y, color='black')
    highlighter = Highlighter(ax, x, y)
    plt.show()

    selected_regions = highlighter.mask
    # Print the points _not_ selected
    print x[~selected_regions], y[~selected_regions]

class Highlighter(object):
    def __init__(self, ax, x, y):
        self.ax = ax
        self.canvas = ax.figure.canvas
        self.x, self.y = x, y
        self.mask = np.zeros(x.shape, dtype=bool)

        self._highlight = ax.scatter([], [], s=200, color='yellow', zorder=10)

        self.selector = RectangleSelector(ax, self, useblit=True)

    def __call__(self, event1, event2):
        self.mask |= self.inside(event1, event2)
        xy = np.column_stack([self.x[self.mask], self.y[self.mask]])
        self._highlight.set_offsets(xy)
        self.canvas.draw()

    def inside(self, event1, event2):
        """Returns a boolean mask of the points inside the rectangle defined by
        event1 and event2."""
        # Note: Could use points_inside_poly, as well
        x0, x1 = sorted([event1.xdata, event2.xdata])
        y0, y1 = sorted([event1.ydata, event2.ydata])
        mask = ((self.x > x0) & (self.x < x1) &
                (self.y > y0) & (self.y < y1))
        return mask

main()

However, you have an additional wrinkle, as you have two linked plots. You want a selection on the X-Y plot to also select things on the X-Z plot. Let's modify things to handle that:

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import RectangleSelector

def main():
    x, y, z = np.random.random((3, 100))
    z *= 10
    fig, axes = plt.subplots(figsize=(6, 8), nrows=2, sharex=True)
    axes[0].scatter(x, y, color='black')
    axes[1].scatter(x, z, color='black')
    axes[0].set(ylabel='Y')
    axes[1].set(xlabel='X', ylabel='Y')

    highlighter = Highlighter(axes, x, y, z)
    plt.show()

    selected_regions = highlighter.mask
    print x[~selected_regions], y[~selected_regions], z[~selected_regions]

class Highlighter(object):
    def __init__(self, axes, x, y, z):
        self.axes = axes
        self.canvas = axes[0].figure.canvas
        self.x, self.y, self.z = x, y, z
        self.mask = np.zeros(x.shape, dtype=bool)

        self._highlights = [ax.scatter([], [], s=200, color='yellow', zorder=10)
                               for ax in axes]

        self._select1 = RectangleSelector(axes[0], self.select_xy, useblit=True)
        self._select2 = RectangleSelector(axes[1], self.select_xz, useblit=True)

    def select_xy(self, event1, event2):
        self.mask |= self.inside(event1, event2, self.x, self.y)
        self.update()

    def select_xz(self, event1, event2):
        self.mask |= self.inside(event1, event2, self.x, self.z)
        self.update()

    def update(self):
        xy = np.column_stack([self.x[self.mask], self.y[self.mask]])
        self._highlights[0].set_offsets(xy)

        xz = np.column_stack([self.x[self.mask], self.z[self.mask]])
        self._highlights[1].set_offsets(xz)

        self.canvas.draw()

    def inside(self, event1, event2, x, y):
        x0, x1 = sorted([event1.xdata, event2.xdata])
        y0, y1 = sorted([event1.ydata, event2.ydata])
        return (x > x0) & (x < x1) & (y > y0) & (y < y1)

main()
Joe Kington
  • 275,208
  • 71
  • 604
  • 463