I suppose that one way to do it would be entirely in mapReduce:
First a mapper:
var mapper = function () {
for ( var k in this ) {
if ( k != '_id' )
emit( { name: k }, this[k] );
}
};
And then for the reducer:
var reducer = function ( key, values ) {
var unique = [];
Array.prototype.inArray = function(value) {
for( var i=0; i < this.length; i++) {
if ( this[i] == value ) return true;
}
return false;
};
Array.prototype.addToSet = function(value) {
if ( this.length == 0 ) {
this.push(value);
} else if ( !this.inArray(value) ) {
this.push(value);
}
};
values.forEach(function(value) {
unique.addToSet(value);
});
return { values: unique };
};
And then run the operation for the output:
db.collection.mapReduce(mapper,reducer,{ out: { inline: 1 } })
Which gives "nice" mapReduce style output:
{
"results" : [
{
"_id" : {
"name" : "color"
},
"value" : {
"values" : [
"red",
"blue",
"green",
"black"
]
}
},
{
"_id" : {
"name" : "size"
},
"value" : {
"values" : [
"small",
"medium",
"large"
]
}
}
],
"timeMillis" : 2,
"counts" : {
"input" : 5,
"emit" : 10,
"reduce" : 2,
"output" : 2
},
"ok" : 1,
}
As long as you are fine with generating the keys, then you can build like this:
They way you are listing things would make things more difficult, but what would be wrong with the following:
db.collection.aggregate([
{ "$group": {
"_id": false,
"size": { "$addToSet": "$size" },
"color": { "$addToSet": "$color" }
}}
])
Which gives the result:
{
"result" : [
{
"_id" : false,
"size" : [
"large",
"medium",
"small"
],
"color" : [
"black",
"green",
"blue",
"red"
]
}
],
"ok" : 1
}
So you do have the two distinct sets in one pass.
Doing it how you present it is possible, but just more work:
db.collection.aggregate([
// Project with the "name" as an array of possible
{ "$project": {
"size": 1,
"color": 1,
"name": { "$cond": [ 1, [ "size", "color" ], 0 ] }
}},
// Unwind the "name" values. Create duplicates
{ "$unwind": "$name" },
// Conditionally assign the fields to "value"
{ "$project": {
"name": 1,
"value": {"$cond": [
{ "$eq": [ "$name", "size"] },
"$size",
"$color"
]}
}},
// Group the results by name
{ "$group": {
"_id": "$name",
"values": { "$addToSet": "$value" },
}},
// Project the fields you want
{ "$project": {
"_id": 0,
"name": "$_id",
"values": 1
}}
])
And that gives you your projected results.
There is a "funny" use of $cond in there, that should be able to be replaced with with $literal operator in future versions where the "name" gets assigned. After that assigned array is unwound there is now two of everything, but that doesn't matter because of the $addToSet operation later.
So then the "value" gets conditionally assigned based on what matched. Group the results on name, and you have two documents keyed by name with the respective values.
Enjoy.