I am trying to port Python median_grouped function from statistics built in module to C++.
Anyway, somewhere I'm missing something because my code doesn't work.
It should be: median_grouped([1, 3, 3, 5, 7], interval=1) = 3.25 and median_grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5]) = 3.7.
My code result is always 0 no matter what the input is.
#include <iostream>
#include <vector>
#include <cmath>
#include <algorithm>
using namespace std;
int bisect_left(vector<int> &a, float x, int lo=0, int hi=0){
/*"""Return the index where to insert item x in list a, assuming a is sorted.
The return value i is such that all e in a[:i] have e < x, and all e in
a[i:] have e >= x. So if x already appears in the list, a.insert(x) will
insert just before the leftmost x already there.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
*/
// First we sort the array
sort(a.begin(), a.end());
if (hi==0){
hi = a.size();
}
while (lo < hi){
int mid = (int)(lo+hi)/2;
if (a[mid] < x){
lo = mid+1;
}
else{
hi = mid;
}
}
return lo;
}
int bisect_right(vector<int> &a, float x, int lo=0, int hi=0){
/*"""Return the index where to insert item x in list a, assuming a is sorted.
The return value i is such that all e in a[:i] have e <= x, and all e in
a[i:] have e > x. So if x already appears in the list, a.insert(x) will
insert just after the rightmost x already there.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
*/
// First we sort the array
sort(a.begin(), a.end());
if (hi==0){
hi = a.size();
}
while (lo < hi){
int mid = (int)(lo+hi)/2;
if (x < a[mid]){
hi = mid;
}
else{
lo = mid+1;
}
}
return lo;
}
int _find_lteq(vector<int> &a, int x){
//'Locate the leftmost value exactly equal to x'
int i = bisect_left(a, x);
if ((i != (int)a.size()) && (a[i] == x)){
return i;
}
return 0;
}
int _find_rteq(vector<int> &a, int x, int l){
//'Locate the rightmost value exactly equal to x'
int i = bisect_right(a, x, 1, 0);
if ((i != (int)a.size()+1) && (a[i-1] == x)){
return i-1;
}
return 0;
}
float median_grouped(vector<int> &vec, int interval=1){
float L=0.0;
// First we sort the array
sort(vec.begin(), vec.end());
if(vec.size()==0){
return 0;
}
if(vec.size()==1){
return vec[0];
}
/* Find the value at the midpoint. Remember this corresponds to the
center of the class interval. */
int x = vec[(int)vec.size()/2];
for(int i=x; i<interval; i++){
//The lower limit of the median interval.
L = float(x) - float(interval)/2;
}
// Uses bisection search to search for x in data with log(n) time complexity
// Find the position of leftmost occurrence of x in data
int l1 = _find_lteq(vec, x);
// Find the position of rightmost occurrence of x in data[l1...len(data)]
// Assuming always l1 <= l2
int l2 = _find_rteq(vec, x, l1);
int cf = l1;
int f = l2 - l1 + 1;
return L + interval*(vec.size()/2 - cf)/f;
}
vector<int> vec{3, 5, 2, 7, 1, 3};
vector<int> vec_a{1,3,5,7};
vector<int> vec_c{ 1, 3, 4, 2, 7, 5, 8, 6 };
vector<int> a{1, 2, 2, 3, 4, 4, 4, 4, 4, 5};
int main() {
cout<< median_grouped(a,1)<<endl;
cout<< median_grouped(a,2)<<endl;
return 0;
}
And this is the function from statistics built in Python module that I am trying to port to C++:
def median_grouped(data, interval=1):
"""Return the 50th percentile (median) of grouped continuous data.
>>> median_grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5])
3.7
>>> median_grouped([52, 52, 53, 54])
52.5
This calculates the median as the 50th percentile, and should be
used when your data is continuous and grouped. In the above example,
the values 1, 2, 3, etc. actually represent the midpoint of classes
0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in
class 3.5-4.5, and interpolation is used to estimate it.
Optional argument ``interval`` represents the class interval, and
defaults to 1. Changing the class interval naturally will change the
interpolated 50th percentile value:
>>> median_grouped([1, 3, 3, 5, 7], interval=1)
3.25
>>> median_grouped([1, 3, 3, 5, 7], interval=2)
3.5
This function does not check whether the data points are at least
``interval`` apart.
"""
data = sorted(data)
n = len(data)
if n == 0:
raise StatisticsError("no median for empty data")
elif n == 1:
return data[0]
# Find the value at the midpoint. Remember this corresponds to the
# centre of the class interval.
x = data[n//2]
for obj in (x, interval):
if isinstance(obj, (str, bytes)):
raise TypeError('expected number but got %r' % obj)
try:
L = x - interval/2 # The lower limit of the median interval.
except TypeError:
# Mixed type. For now we just coerce to float.
L = float(x) - float(interval)/2
# Uses bisection search to search for x in data with log(n) time complexity
# Find the position of leftmost occurrence of x in data
l1 = _find_lteq(data, x)
# Find the position of rightmost occurrence of x in data[l1...len(data)]
# Assuming always l1 <= l2
l2 = _find_rteq(data, l1, x)
cf = l1
f = l2 - l1 + 1
return L + interval*(n/2 - cf)/f
Could you tell me what's wrong with my C++ code, please?