This is an important question since mongodb cannot index null values (i.e. do not query for nulls or you will be waiting for a long time), so it is best to entirely avoid nulls and set default values using setOnInsert
.
Here is a recursive solution to removing nulls:
/**
* RETRIEVES A LIST OF ALL THE KEYS IN A DOCUMENT, WHERE THE VALUE IS 'NULL' OR 'UNDEFINED'
*
* @param doc
* @param keyName
* @param nullKeys
*/
function getNullKeysRecursively(doc, keyName, nullKeys)
{
for (var item_property in doc)
{
// SKIP BASE-CLASS STUFF
if (!doc.hasOwnProperty(item_property))
continue;
// SKIP ID FIELD
if (item_property === "_id")
continue;
// FULL KEY NAME (FOR SUB-DOCUMENTS)
var fullKeyName;
if (keyName)
fullKeyName = keyName + "." + item_property;
else
fullKeyName = item_property;
// DEBUGGING
// print("fullKeyName: " + fullKeyName);
// NULL FIELDS - MODIFY THIS BLOCK TO ADD CONSTRAINTS
if (doc[item_property] === null || doc[item_property] === undefined)
nullKeys[fullKeyName] = 1;
// RECURSE OBJECTS / ARRAYS
else if (doc[item_property] instanceof Object || doc[item_property] instanceof Array)
getNullKeysRecursively(doc[item_property], fullKeyName, nullKeys);
}
}
/**
* REMOVES ALL PROPERTIES WITH A VALUE OF 'NULL' OR 'UNDEFINED'.
* TUNE THE 'LIMIT' VARIABLE TO YOUR MEMORY AVAILABILITY.
* ONLY CLEANS DOCUMENTS THAT REQUIRE CLEANING, FOR EFFICIENCY.
* USES bulkWrite FOR EFFICIENCY.
*
* @param collectionName
*/
function removeNulls(collectionName)
{
var coll = db.getCollection(collectionName);
var lastId = ObjectId("000000000000000000000000");
var LIMIT = 10000;
while (true)
{
// GET THE NEXT PAGE OF DOCUMENTS
var page = coll.find({ _id: { $gt: lastId } }).limit(LIMIT);
if (! page.hasNext())
break;
// BUILD BULK OPERATION
var arrBulkOps = [];
page.forEach(function(item_doc)
{
lastId = item_doc._id;
var nullKeys = {};
getNullKeysRecursively(item_doc, null, nullKeys);
// ONLY UPDATE MODIFIED DOCUMENTS
if (Object.keys(nullKeys).length > 0)
// UNSET INDIVIDUAL FIELDS, RATHER THAN REWRITE THE ENTIRE DOC
arrBulkOps.push(
{ updateOne: {
"filter": { _id: item_doc._id },
"update": { $unset: nullKeys }
} }
);
});
coll.bulkWrite(arrBulkOps, { ordered: false } );
}
}
// GO GO GO
removeNulls('my_collection');
document before:
{
"_id": ObjectId("5a53ed8f6f7c4d95579cb87c"),
"first_name": null,
"last_name": "smith",
"features": {
"first": {
"a": 1,
"b": 2,
"c": null
},
"second": null,
"third" : {},
"fourth" : []
},
"other": [
null,
123,
{
"a": 1,
"b": "hey",
"c": null
}
]
}
document after:
{
"_id" : ObjectId("5a53ed8f6f7c4d95579cb87c"),
"last_name" : "smith",
"features" : {
"first" : {
"a" : 1,
"b" : 2
}
},
"other" : [
null,
123,
{
"a" : 1,
"b" : "hey"
}
]
}
As you can see, it removes null
, undefined
, empty objects and empty arrays. If you need it to be more/less aggressive, it is a matter of modifying the block "NULL FIELDS - MODIFY THIS BLOCK TO ADD CONSTRAINTS".
edits welcome, especially @stennie