Okay, here is one way I can think of it.
First of all, we really cannot use the Rcpp::DataFrame
object type in Rcpp
as it really is a loose list of vectors. So, I've lowered the threshold for this problem by creating a Rcpp::NumericMatrix
that matches the sampled data. From here, can use a std::map
to count unique rows. This is simplified since the Rcpp::NumericMatrix
has a .row()
attribute enabling subset by row. So, each row is then converted to a std::vector<T>
, which is used as a key for the map. Then, we add each std::vector<T>
to the std::map
and increment its count value. Lastly, we export the std::map
to the desired matrix format.
#include <Rcpp.h>
// [[Rcpp::export]]
Rcpp::NumericMatrix unique_rows( Rcpp::NumericMatrix & v)
{
// Initialize a map
std::map<std::vector<double>, int> count_rows;
// Clear map
count_rows.clear();
// Count each element
for (int i = 0; i != v.nrow(); ++i) {
// Pop from R Matrix
Rcpp::NumericVector a = v.row(i);
// Convert R vector to STD vector
std::vector<double> b = Rcpp::as< std::vector<double> >(a);
// Add to map
count_rows[ b ] += 1;
}
// Make output matrix
Rcpp::NumericMatrix o(count_rows.size(), v.ncol()+1);
// Hold count iteration
unsigned int count = 0;
// Start at the 1st element and move to the last element in the map.
for( std::map<std::vector<double>,int>::iterator it = count_rows.begin();
it != count_rows.end(); ++it )
{
// Grab the key of the matrix
std::vector<double> temp_o = it->first;
// Tack on the vector, probably can be speed up.
temp_o.push_back(it->second);
// Convert from std::vector to Rcpp::NumericVector
Rcpp::NumericVector mm = Rcpp::wrap(temp_o);
// Store in a NumericMatrix
o.row(count) = mm;
count++;
}
return o;
}
Then we go with:
a = matrix(c(1, 1, 1, 1, 2,
1, 1, 1, 1, 2,
2, 2, 2, 2, 3,
2, 2, 2, 2, 3,
3, 3, 3, 3, 1), ncol = 5, byrow = T)
unique_rows(a)
Giving:
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 1 1 1 2 2
[2,] 2 2 2 2 3 2
[3,] 3 3 3 3 1 1