0

I'm studying bayesian blocks. And I don't understand how to make my function bayesian_blocks() work with binned data. I have data in .txt file where 0 column - bin start (in time), 1st - bin end, 2nd - counts in bin. Like this: -1.0000 -0.9960 6.0000 And I don`t understand how to change my code to work it properly. Please, please, please help me! My code:

import numpy as np
from scipy import stats
import pylab as pl


def test_dist():
    np.random.seed(0)
    x = np.concatenate([stats.cauchy(-5, 1.8).rvs(500),
                        stats.cauchy(-4, 0.8).rvs(2000),
                        stats.cauchy(-1, 0.3).rvs(500),
                        stats.cauchy(2, 0.8).rvs(1000),
                        stats.cauchy(4, 1.5).rvs(500)])

    x = x[(x > -15) & (x < 15)]
    return x


def create_data(name): # May be error in this function?
    x = []
    t = []
    with open(name, 'r') as f:
        for line in f:
            data_list = line.split()

            x.append(float(data_list[2]) - float(data_list[3]) + float(data_list[4]))
            t.append(float(data_list[0]))
    narr = np.array([x, bin_size])
    narr = narr[(narr > -10) & (narr < 25)]
    return narr


def plot_blocks(t):
    pl.hist(t, bins=200, histtype='stepfilled', alpha=0.2, density=True)
    pl.hist(t, bins=bayesian_blocks(t), color='black', histtype='step', density=True)

    return pl.show()


def bayesian_blocks(t):
    """Bayesian Blocks Implementation

    By Jake Vanderplas.  License: BSD
    Based on algorithm outlined in http://adsabs.harvard.edu/abs/2012arXiv1207.5578S

    Parameters
    ----------
    t : ndarray, length N
        data to be histogrammed

    Returns
    -------
    bins : ndarray
        array containing the (N+1) bin edges

    Notes
    -----
    This is an incomplete implementation: it may fail for some
    datasets.  Alternate fitness functions and prior forms can
    be found in the paper listed above.
    """
    # copy and sort the array
    t = np.sort(t)
    N = t.size

    # create length-(N + 1) array of cell edges
    edges = np.concatenate([t[:1],
                            0.5 * (t[1:] + t[:-1]),
                            t[-1:]])
    block_length = t[-1] - edges

    # arrays needed for the iteration
    nn_vec = np.ones(N)
    best = np.zeros(N, dtype=float)
    last = np.zeros(N, dtype=int)

    # -----------------------------------------------------------------
    # Start with first data cell; add one cell at each iteration
    # -----------------------------------------------------------------
    for K in range(N):
        # Compute the width and count of the final bin for all possible
        # locations of the K^th changepoint
        width = block_length[:K + 1] - block_length[K + 1] + 1
        count_vec = np.cumsum(nn_vec[:K + 1][::-1])[::-1]

        # evaluate fitness function for these possibilities
        fit_vec = count_vec * (np.log(count_vec/width) - 1)
        fit_vec -= 4  # 4 comes from the prior on the number of changepoints
        fit_vec[1:] += best[:K]

        # find the max of the fitness: this is the K^th changepoint
        i_max = np.argmax(fit_vec)
        last[K] = i_max
        best[K] = fit_vec[i_max]

    # -----------------------------------------------------------------
    # Recover changepoints by iteratively peeling off the last block
    # -----------------------------------------------------------------
    change_points = np.zeros(N, dtype=int)
    i_cp = N
    ind = N
    while True:
        i_cp -= 1
        change_points[i_cp] = ind
        if ind == 0:
            break
        ind = last[ind - 1]
    change_points = change_points[i_cp:]
    # print(change_points)

    return edges[change_points]


def create_and_show():
    #arr = test_dist()
    arr = create_data('data_n8.txt')
    plot_blocks(arr)


if __name__ == '__main__':
    create_and_show()

I need to get a standard histogram and bayesian blocks over it

Progman
  • 16,827
  • 6
  • 33
  • 48
Ruuzaki
  • 1
  • 1

0 Answers0