You should be looping with .bulkWrite()
to commit the updates. The main thing to note here is what you are actually iterating, which is the collection items as well as the members of the target array to transform.
And either blow array the entire array with it's replacement:
var ops = [];
db.docs.find({
"item2": {
"$elemMatch": {
"subitem1": { "$exists": true },
"subitem2": { "$exists": true }
}
}
}).forEach(function(doc) {
doc.item2 = doc.item2.map(function(el) {
return { "subitem": { "field1": el.subitem1, "field2": el.subitem2 } };
});
ops.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "item2": doc.item2 } }
}
});
// Test outside array looping
if ( ops.length == 1000 ) {
db.docs.bulkWrite(ops);
ops = []
}
});
if ( ops.length > 0 )
db.docs.bulkWrite(ops);
Or preferably use positional matches as updates:
var ops = [];
db.docs.find({
"item2": {
"$elemMatch": {
"subitem1": { "$exists": true },
"subitem2": { "$exists": true }
}
}
}).forEach(function(doc) {
doc.item2.forEach(function(item) {
var updoc = { "subitem": { "field1": item.subitem1, "field2": item.subitem2 } };
ops.push({
"updateOne": {
"filter": {
"_id": doc._id,
"item2": {
"$elemMatch": {
"subitem1": item.subitem1,
"subitem2": item.subitem2
}
}
},
"update": { "$set": { "item2.$": updoc } }
}
});
// Test inside the array looping
if ( ops.length == 1000 ) {
db.docs.bulkWrite(ops);
ops = []
}
});
});
if ( ops.length > 0 )
db.docs.bulkWrite(ops);
The reason why the latter case is better is the writes are actually atomic for each element so in high volume environments you would not get conflicting writes from other processes.
That's the speedy and safe way to transform your current array content. The first way will run a bit faster but I really would not recommend it on a live system. The second will still be very quick, but since it's updating one array element at a time in operations then there is a bit more to do.
In both cases the actual "wire communication" with the server happens only one in one thousand operations, so this removes the overhead of sending the request and waiting for the response of every single update.