The best way to do this in in MongoDB 3.4 using the $split
operator to split your string into an array of substring as mentioned here and because we need to $unwind
the array down in the pipeline, we need to do this in a sub-pipeline using the $facet
operator for maximum efficiency.
db.collection.aggregate([
{ "$facet": {
"results": [
{ "$project": {
"values": { "$split": [ "$foo", " " ] }
}},
{ "$unwind": "$values" },
{ "$group": {
"_id": "$values",
"count": { "$sum": 1 }
}}
]
}}
])
which produces:
{
"results" : [
{
"_id" : "boo",
"count" : 2
},
{
"_id" : "baz",
"count" : 3
},
{
"_id" : "bar",
"count" : 2
}
]
}
From MongoDB 3.2 backwards, the only way to do this is with mapReduce
.
var reduceFunction = function(key, value) {
var results = {};
for ( var items of Array.concat(value)) {
for (var item of items) {
results[item] = results[item] ? results[item] + 1 : 1;
}
};
return results;
}
db.collection.mapReduce(
function() { emit(null, this.foo.split(" ")); },
reduceFunction,
{ "out": { "inline": 1 } }
)
which returns:
{
"results" : [
{
"_id" : null,
"value" : {
"bar" : 2,
"baz" : 3,
"boo" : 2
}
}
],
"timeMillis" : 30,
"counts" : {
"input" : 3,
"emit" : 3,
"reduce" : 1,
"output" : 1
},
"ok" : 1
}
You should consider to use a .forEach()
method in the reduce function if your MongoDB version doesn't support a the for...of
statement.