-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbinData.py~
More file actions
executable file
·111 lines (100 loc) · 6.73 KB
/
Copy pathbinData.py~
File metadata and controls
executable file
·111 lines (100 loc) · 6.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import math
import numpy as np
from logList import logList
#Returns one array of x bin centers and one array containing 3 related arrays: [0] mean binned y values,
# [1] the added-in-quadrature-and-divided-by-n y errors (if given), and [2] the number of ys in a bin.
#Unless specified (by entering trim = 0), these outputs are trimmed to remove bins that contained
# no points.
#Possible bin schemes: 'bine_size' (all bins have equal size), 'n_binned' (all bins have equal number, to within one, binned points)
def binData(x_vals, y_vals, y_errs = None, bin_centers = None, bin_borders = None, n_bins = 20.0, trim = 1, bin_scheme = 'bin_size', bin_scale = 'linear', computed_value = 'mean', return_binned_xs = 0):
bin_scale = bin_scale.lower()
if y_errs == None:
y_errs = [0.0 for y_val in y_vals]
x_min = min(x_vals)
x_max = max(x_vals)
if bin_scheme == 'bin_size':
if bin_centers == None and bin_borders != None:
if len(bin_borders) <= 1:
bin_centers = bin_borders
else:
bin_centers = [(bin_border[i+1] + bin_border[i]) / 2.0 for i in range(len(bin_borders) - 1)]
#determine bin_centers from bin_borders
else:
if bin_centers == None and x_min != x_max:
if bin_scale == 'linear' or bin_scale == line:
bin_centers = np.arange(x_min, x_max , (x_max - x_min) / float(n_bins)) + (x_max - x_min) / (2.0 * float(n_bins) )
elif bin_scale == 'log':
bin_centers = logList(x_min, x_max, n_bins)
elif bin_centers == None:
bin_centers = np.arange(x_min - n_bins * 1.0 / 2.0, x_max + n_bins * 1.0 / 2.0, n_bins)
if len(bin_centers) <= 1:
bin_borders = [x_min, x_max]
else:
bin_borders = ( [bin_centers[0]- (bin_centers[1] - bin_centers[0]) / 2.0]
+ [ bin_centers[i] - (bin_centers[i] - bin_centers[i-1]) / 2.0 for i in range(1, len(bin_centers)) ]
+ [ bin_centers[len(bin_centers) - 1] + (bin_centers[len(bin_centers) - 1] - bin_centers[len(bin_centers) - 2] ) / 2.0 ] )
binned_xs = [[] for bin in bin_centers]
binned_ys = [[[] for bin in bin_centers],[[] for bin in bin_centers],[0 for bin in bin_centers]]
for i in range(len(y_vals)):
binned = 0
for j in range(len(bin_centers)):
if x_vals[i] <= bin_borders[j+1] and binned == 0:
binned_xs[j] = binned_xs[j] + [x_vals[i]]
binned_ys[0][j] = binned_ys[0][j] + [y_vals[i]]
binned_ys[1][j] = binned_ys[1][j] + [y_errs[i]]
binned_ys[2][j] = binned_ys[2][j] + 1
binned = 1
break
#if still unbinned, must exceed highest boundary, which means it belongs in last bucket
if binned == 0:
last_index = len(bin_centers) - 1
binned_xs[last_index] = binned_xs[last_index] + [x_vals[i]]
binned_ys[0][last_index] = binned_ys[0][last_index] + [y_vals[i]]
binned_ys[1][last_index] = binned_ys[1][last_index] + [y_errs[i]] # + y_errs[i] ** 2.0
binned_ys[2][last_index] = binned_ys[2][last_index] + 1
elif bin_scheme == 'n_binned':
#To divide m elements into n bins, each bin has int(m / n) elements (int round down).
# The remaining m - int(m / n) * n are distributed among the first m - int(m / n) * n (thus the + 1 below).
n_per_bin = [int(len(y_vals) / n_bins) + 1 if i < (len(y_vals) - int(len(y_vals) / n_bins) * n_bins) else int(len(y_vals) / n_bins) for i in range(n_bins)]
#range of INDECES covered by each bin (not an x or y range)
bin_ranges = [ [0 + sum(n_per_bin[0:i]), 0 + sum(n_per_bin[0:i]) + n_per_bin[i]] for i in range(n_bins) ]
sorted_arrays = zip(x_vals, y_vals, y_errs)
sorted_arrays.sort()
x_vals = [elem[0] for elem in sorted_arrays]
y_vals = [elem[1] for elem in sorted_arrays]
y_errs = [elem[2] for elem in sorted_arrays]
#technically bin x-means, rather than bin centers, but that strikes me as a decent stand in
bin_centers = [ np.mean(x_vals[bin_range[0]:bin_range[1]]) for bin_range in bin_ranges]
binned_xs = [x_vals[bin_range[0]:bin_range[1]] for bin_range in bin_ranges ]
binned_ys = [ [ y_vals[bin_range[0]:bin_range[1]] for bin_range in bin_ranges ],
[ [y_err for y_err in y_errs[bin_range[0]:bin_range[1] ] ] for bin_range in bin_ranges ],
n_per_bin ]
else:
print 'bin_scheme ' + bin_scheme + ' not recognized. Returning original data...'
return x_vals, y_vals, y_errs
if computed_value == 'mean' or (computed_value == 'wmean' and 0.0 in y_errs):
binned_ys[0] = [np.mean(ys) for ys in binned_ys[0]]
binned_ys[1] = [math.sqrt(sum([err ** 2.0 for err in binned_ys[1][i]]) / (len(binned_ys[1][i]) ** 2.0)) if binned_ys[2][i] > 0 else 0.0 for i in range(len(binned_ys[1])) ]
elif computed_value == 'wmean':
weights = [ [1.0 / err ** 2.0 for err in errs] for errs in binned_ys[1] ]
binned_ys[0] = [ np.sum(np.array(binned_ys[0][i]) * np.array(weights[i])) / sum(weights[i]) for i in range(len(binned_ys[0])) ]
binned_ys[1] = [ len(weights[i]) ** 0.5 / sum(weights[i]) for i in range(len(weights)) ]
elif computed_value == 'median':
binned_ys[0] = [np.median(ys) for ys in binned_ys[0]]
binned_ys[1] = [math.sqrt(sum([err ** 2.0 for err in binned_ys[1][i]]) / (len(binned_ys[1][i]) ** 2.0)) if binned_ys[2][i] > 0 else 0.0 for i in range(len(binned_ys[1])) ]
#otherwise, we don't compute anything and just return the y_values as they are
#else:
#binned_ys[0] = [binned_ys[0][i] / float(binned_ys[2][i]) if binned_ys[2][i] > 0 else 0.0 for i in range(len(binned_ys[0])) ]
#binned_ys[1] = [math.sqrt(binned_ys[1][i]) / float(binned_ys[2][i]) if binned_ys[2][i] > 0 else 0.0 for i in range(len(binned_ys[1])) ]
if trim:
trimmed_binned_xs = [binned_xs[i] for i in range(len(binned_xs)) if binned_ys[2][i] > 0]
trimmed_binned_ys = [[binned_data[i] for i in range(len(binned_data)) if binned_ys[2][i] > 0 ] for binned_data in binned_ys ]
#trimmed_binned_ys = [binned_y for binned_y in binned_ys if binned_ys[2] > 0]
trimmed_bin_centers = [bin_centers[i] for i in range(len(bin_centers)) if binned_ys[2][i] > 0 ]
binned_xs = trimmed_binned_xs
binned_ys = trimmed_binned_ys
bin_centers = trimmed_bin_centers
if return_binned_xs:
return bin_centers, binned_xs, binned_ys
else:
return bin_centers, binned_ys