You can use the standard library function std::nth_element()
for this.
It should be fairly easy to implement a comparison function (or overload the comparison operator) for your struct. Then you'd just parse the file into a vector of those and be done with it. The algorithm uses a partial sort, linear time on average.
Here's the example given on the documentation site I've linked below:
#include <iostream>
#include <vector>
#include <algorithm>
#include <functional>
int main()
{
std::vector<int> v{5, 6, 4, 3, 2, 6, 7, 9, 3};
std::nth_element(v.begin(), v.begin() + v.size()/2, v.end());
std::cout << "The median is " << v[v.size()/2] << '\n';
std::nth_element(v.begin(), v.begin()+1, v.end(), std::greater<int>());
std::cout << "The second largest element is " << v[1] << '\n';
}
For reference:
Out of curiosity, I have implemented some approaches:
#include <algorithm>
#include <functional>
#include <queue>
#include <set>
#include <vector>
std::vector<int> filter_nth_element(std::vector<int> v, int n) {
auto target = v.begin()+n;
std::nth_element(v.begin(), target, v.end(), std::greater<int>());
std::vector<int> result(v.begin(), target);
return result;
}
std::vector<int> filter_pqueue(std::vector<int> v, int n) {
std::vector<int> result;
std::priority_queue<int, std::vector<int>, std::greater<int>> q;
for (auto i: v) {
q.push(i);
if (q.size() > n) {
q.pop();
}
}
while (!q.empty()) {
result.push_back(q.top());
q.pop();
}
return result;
}
std::vector<int> filter_set(std::vector<int> v, int n) {
std::set<int> s;
for (auto i: v) {
s.insert(i);
if (s.size() > n) {
s.erase(s.begin());
}
}
return std::vector<int>(s.begin(), s.end());
}
std::vector<int> filter_deque(std::vector<int> v, int n) {
std::deque<int> q;
for (auto i: v) {
q.push_back(i);
if (q.size() > n) {
q.erase(std::min_element(q.begin(), q.end()));
}
}
return std::vector<int>(q.begin(), q.end());
}
std::vector<int> filter_vector(std::vector<int> v, int n) {
std::vector<int> q;
for (auto i: v) {
q.push_back(i);
if (q.size() > n) {
q.erase(std::min_element(q.begin(), q.end()));
}
}
return q;
}
And I have made up some tests:
#include <random>
#include <iostream>
#include <chrono>
std::vector<int> filter_nth_element(std::vector<int> v, int n);
std::vector<int> filter_pqueue(std::vector<int> v, int n);
std::vector<int> filter_set(std::vector<int> v, int n);
std::vector<int> filter_deque(std::vector<int> v, int n);
std::vector<int> filter_vector(std::vector<int> v, int n);
struct stopclock {
typedef std::chrono::high_resolution_clock high_resolution_clock;
std::chrono::time_point<high_resolution_clock> start, end;
stopclock() : start(high_resolution_clock::now()) {}
~stopclock() {
using namespace std::chrono;
auto elapsed = high_resolution_clock::now() - start;
auto elapsed_ms = duration_cast<milliseconds>(elapsed);
std::cout << elapsed_ms.count() << " ";
}
};
int main() {
// randomly initialize input array
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dist;
std::vector<int> v(10000000);
for (auto &i: v)
i = dist(gen);
// run tests
for (std::vector<int>::size_type x = 5; x <= 100; x+=5) {
// keep this many values
std::cout << x << " ";
{
stopclock t;
auto result = filter_nth_element(v, x);
}
{
stopclock t;
auto result = filter_pqueue(v, x);
}
{
stopclock t;
auto result = filter_set(v, x);
}
{
stopclock t;
auto result = filter_deque(v, x);
}
{
stopclock t;
auto result = filter_vector(v, x);
}
std::cout << "\n";
}
}
And I found it quite interesting to see the relative performance of these approaches (compiled with -O3 - I think I have to think a bit about these results):
