I'm studying bayesian blocks. And I don't understand how to make my function bayesian_blocks() work with binned data. I have data in .txt file where 0 column - bin start (in time), 1st - bin end, 2nd - counts in bin. Like this: -1.0000 -0.9960 6.0000 And I don`t understand how to change my code to work it properly. Please, please, please help me! My code:
import numpy as np
from scipy import stats
import pylab as pl
def test_dist():
np.random.seed(0)
x = np.concatenate([stats.cauchy(-5, 1.8).rvs(500),
stats.cauchy(-4, 0.8).rvs(2000),
stats.cauchy(-1, 0.3).rvs(500),
stats.cauchy(2, 0.8).rvs(1000),
stats.cauchy(4, 1.5).rvs(500)])
x = x[(x > -15) & (x < 15)]
return x
def create_data(name): # May be error in this function?
x = []
t = []
with open(name, 'r') as f:
for line in f:
data_list = line.split()
x.append(float(data_list[2]) - float(data_list[3]) + float(data_list[4]))
t.append(float(data_list[0]))
narr = np.array([x, bin_size])
narr = narr[(narr > -10) & (narr < 25)]
return narr
def plot_blocks(t):
pl.hist(t, bins=200, histtype='stepfilled', alpha=0.2, density=True)
pl.hist(t, bins=bayesian_blocks(t), color='black', histtype='step', density=True)
return pl.show()
def bayesian_blocks(t):
"""Bayesian Blocks Implementation
By Jake Vanderplas. License: BSD
Based on algorithm outlined in http://adsabs.harvard.edu/abs/2012arXiv1207.5578S
Parameters
----------
t : ndarray, length N
data to be histogrammed
Returns
-------
bins : ndarray
array containing the (N+1) bin edges
Notes
-----
This is an incomplete implementation: it may fail for some
datasets. Alternate fitness functions and prior forms can
be found in the paper listed above.
"""
# copy and sort the array
t = np.sort(t)
N = t.size
# create length-(N + 1) array of cell edges
edges = np.concatenate([t[:1],
0.5 * (t[1:] + t[:-1]),
t[-1:]])
block_length = t[-1] - edges
# arrays needed for the iteration
nn_vec = np.ones(N)
best = np.zeros(N, dtype=float)
last = np.zeros(N, dtype=int)
# -----------------------------------------------------------------
# Start with first data cell; add one cell at each iteration
# -----------------------------------------------------------------
for K in range(N):
# Compute the width and count of the final bin for all possible
# locations of the K^th changepoint
width = block_length[:K + 1] - block_length[K + 1] + 1
count_vec = np.cumsum(nn_vec[:K + 1][::-1])[::-1]
# evaluate fitness function for these possibilities
fit_vec = count_vec * (np.log(count_vec/width) - 1)
fit_vec -= 4 # 4 comes from the prior on the number of changepoints
fit_vec[1:] += best[:K]
# find the max of the fitness: this is the K^th changepoint
i_max = np.argmax(fit_vec)
last[K] = i_max
best[K] = fit_vec[i_max]
# -----------------------------------------------------------------
# Recover changepoints by iteratively peeling off the last block
# -----------------------------------------------------------------
change_points = np.zeros(N, dtype=int)
i_cp = N
ind = N
while True:
i_cp -= 1
change_points[i_cp] = ind
if ind == 0:
break
ind = last[ind - 1]
change_points = change_points[i_cp:]
# print(change_points)
return edges[change_points]
def create_and_show():
#arr = test_dist()
arr = create_data('data_n8.txt')
plot_blocks(arr)
if __name__ == '__main__':
create_and_show()
I need to get a standard histogram and bayesian blocks over it