You need to loop over the cursor and update each document using the $set
update operator. Of course to do this you use "bulk" operations for maximum efficiency. That being said the approach will differ depending on your MongoDB server version and your PyMongo version.
From MongoDB 3.2 you need to use Bulk Write Operations and the bulkWrite()
method.
var requests = [];
var cursor = db.collection.find( { "field.1": { "$exists": true } }, { "field": 1 } );
cursor.forEach( document => {
requests.push({
"updateOne": {
"filter" : { "_id": document._id },
"update" : { "field": { "$set": document.field[0] } }
}
});
if (requests.length === 1000) {
db.collection.bulkWrite(requests);
requests = [];
}
});
if (requests.length > 0) {
db.collection.bulkWrite(requests);
}
This query using the PyMongo 3.0 driver which provides the you need to use the bulk_write()
method gives the following:
from pymongo import UpdateOne
requests = [];
cursor = db.collection.find({"field.1": {"$exists": True}}, {"field": 1})
for document in cursor:
requests.append(UpdateOne({'_id': document['_id']}, {'$set': {'field': document['field'][0]}}))
if len(requests) == 1000:
# Execute per 1000 operations
db.collection.bulk_write(requests)
requests = []
if len(requests) > 0:
# clean up queues
db.collection.bulk_write(requests)
From MongoDB 2.6 you need to use the now deprecated Bulk API.
var bulk = db.collection.initializeUnorderedBulkOp();
var count = 0;
// cursor is the same as in the previous version using MongoDB 3.2
cursor.forEach(function(document) {
bulk.find( { "_id": document._id } ).updateOne( { "$set": { "field": document.field[0] } } );
count++;
if (count % 1000 === 0) {
bulk.execute();
bulk = db.collection.initializedUnorderedBulkOp();
}
});
// Again clean up queues
if (count > 0 ) {
bulk.execute();
}
Translate into Python gives the following.
bulk = db.collection.initialize_unordered_bulk_op()
count = 0
for doc in cursor:
bulk.find({'_id': doc['_id']}).update_one({'$set': {'field': doc['field'][0]}})
count = count + 1
if count == 1000:
bulk.execute()
bulk = db.collection.initialize_unordered_bulk_op()
if count > 0:
bulk.execute()