3

I've having a slight issue when trying to sort by a nested array in Mongodb using MongooseJs.

a) A product contains tasks, each task has subTasks.
b) The task has a order & so does each subTask (task.order & task.subtask.order).

Here is a sample product document:

db.products.find({_id: ObjectId("554a13d4b692088a38f01f3b")})

Result:

{
"_id" : ObjectId("554a13d4b692088a38f01f3b"),
"title" : "product title",
"order" : 3,
"description" : "Description here ",
"status" : "live",
"tasks" : [ 
    {
        "title" : "task 1",
        "description" : "task 1 desc",
        "order" : 10,
        "_id" : ObjectId("554a13d4b692088a38f01f3a"),
        "status" : "live",
        "subTasks" : [ 
            {
                "title" : "task 1 sub 1",
                "content" : "aaa",
                "order" : -2,
                "_id" : ObjectId("554a13d4b692088a38f01f5a"),
                "status" : "live"
            }, 
            {
                "title" : "task 1 sub 2",
                "content" : "aaa",
                "order" : 1,
                "_id" : ObjectId("554a13d4b692088a38f01f3a"),
                "status" : "live"
            }, 
            {
                "title" : "task 1 sub 4",
                "content" : "aaa",
                "order" : 8,
                "_id" : ObjectId("554a13d4b692088a38f01f4a"),
                "status" : "live"
            }, 
            {
                "title" : "task 1 sub 3 ",
                "content" : "aaa",
                "order" : 2,
                "_id" : ObjectId("5550d0a61662211332d9a973"),
                "status" : "live"
            }
        ]
    }, 
    {
        "title" : "task 2",
        "description" : "task desc 2",
        "order" : 1,
        "_id" : ObjectId("5550855f9ee2db4e3958d299"),
        "status" : "live",
        "subTasks" : [ 
            {
                "title" : "task 2 sub 1",
                "content" : "bbb",
                "order" : 1,
                "_id" : ObjectId("55508f459ee2db4e3958d29a"),
                "status" : "live"
            }
        ]
    }, 
    {
        "title" : "task 3",
        "description" : "task 3 desc",
        "order" : 2,
        "_id" : ObjectId("5551b844bb343a620f85f323"),
        "status" : "live",
        "subTasks" : [ 
            {
                "title" : "task 3 sub 2",
                "content" : "cccc",
                "order" : 0,
                "_id" : ObjectId("5551b88abb343a620f85f324"),
                "status" : "live"
            }, 
            {
                "title" : "task 3 sub 4",
                "content" : "cccc",
                "order" : 1,
                "_id" : ObjectId("5551b8f1bb343a620f85f325"),
                "status" : "hidden"
            }, 
            {
                "title" : "task 3 sub 3",
                "content" : "ccc",
                "order" : 2,
                "_id" : ObjectId("5551ba40bb343a620f85f327"),
                "status" : "hidden"
            }, 
            {
                "title" : "task 3 sub 1",
                "content" : "cccc",
                "order" : -1,
                "_id" : ObjectId("5551bcb8c31283c051d30b7c"),
                "status" : "hidden"
            }
        ]
    }
]

}

I'm using the Mongodb Aggregation pipeline to order the tasks & the subTasks within them. Here is what I have so far:

    db.products.aggregate([
    {
        $project: {
            "tasks" : 1
        }
    },
    {
        $match: {
            _id: ObjectId("554a13d4b692088a38f01f3b")
        }
    },
    {
        $unwind: "$tasks"
    },
    {
        $project: {
            "tasks": 1,
            "subTasks": 1
        }
    },
    {
        $unwind: "$tasks.subTasks"
    },
    {
        $sort: {
            "tasks.subTasks.order": 1
        }
    },
    {
        $sort: {
            "tasks.order": 1
        }
    }
])

Results:

{
"result": [
    {
        "_id": ObjectId("554a13d4b692088a38f01f3b"),
        "tasks": {
            "title": "task 2",
            "description": "task desc 2",
            "order": 1,
            "_id": ObjectId("5550855f9ee2db4e3958d299"),
            "status": "live",
            "subTasks": {
                "title": "task 2 sub 1",
                "content": "bbb",
                "order": 1,
                "_id": ObjectId("55508f459ee2db4e3958d29a"),
                "status": "live"
            }
        }
    },
    {
        "_id": ObjectId("554a13d4b692088a38f01f3b"),
        "tasks": {
            "title": "task 3",
            "description": "task 3 desc",
            "order": 2,
            "_id": ObjectId("5551b844bb343a620f85f323"),
            "status": "live",
            "subTasks": {
                "title": "task 3 sub 1",
                "content": "cccc",
                "order": -1,
                "_id": ObjectId("5551bcb8c31283c051d30b7c"),
                "status": "hidden"
            }
        }
    },
    {
        "_id": ObjectId("554a13d4b692088a38f01f3b"),
        "tasks": {
            "title": "task 3",
            "description": "task 3 desc",
            "order": 2,
            "_id": ObjectId("5551b844bb343a620f85f323"),
            "status": "live",
            "subTasks": {
                "title": "task 3 sub 2",
                "content": "cccc",
                "order": 0,
                "_id": ObjectId("5551b88abb343a620f85f324"),
                "status": "live"
            }
        }
    },
    {
        "_id": ObjectId("554a13d4b692088a38f01f3b"),
        "tasks": {
            "title": "task 3",
            "description": "task 3 desc",
            "order": 2,
            "_id": ObjectId("5551b844bb343a620f85f323"),
            "status": "live",
            "subTasks": {
                "title": "task 3 sub 4",
                "content": "cccc",
                "order": 1,
                "_id": ObjectId("5551b8f1bb343a620f85f325"),
                "status": "hidden"
            }
        }
    },
    {
        "_id": ObjectId("554a13d4b692088a38f01f3b"),
        "tasks": {
            "title": "task 3",
            "description": "task 3 desc",
            "order": 2,
            "_id": ObjectId("5551b844bb343a620f85f323"),
            "status": "live",
            "subTasks": {
                "title": "task 3 sub 3",
                "content": "ccc",
                "order": 2,
                "_id": ObjectId("5551ba40bb343a620f85f327"),
                "status": "hidden"
            }
        }
    },
    {
        "_id": ObjectId("554a13d4b692088a38f01f3b"),
        "tasks": {
            "title": "task 1",
            "description": "task 1 desc",
            "order": 10,
            "_id": ObjectId("554a13d4b692088a38f01f3a"),
            "status": "live",
            "subTasks": {
                "title": "task 1 sub 1",
                "content": "aaa",
                "order": -2,
                "_id": ObjectId("554a13d4b692088a38f01f5a"),
                "status": "live"
            }
        }
    },
    {
        "_id": ObjectId("554a13d4b692088a38f01f3b"),
        "tasks": {
            "title": "task 1",
            "description": "task 1 desc",
            "order": 10,
            "_id": ObjectId("554a13d4b692088a38f01f3a"),
            "status": "live",
            "subTasks": {
                "title": "task 1 sub 2",
                "content": "aaa",
                "order": 1,
                "_id": ObjectId("554a13d4b692088a38f01f3a"),
                "status": "live"
            }
        }
    },
    {
        "_id": ObjectId("554a13d4b692088a38f01f3b"),
        "tasks": {
            "title": "task 1",
            "description": "task 1 desc",
            "order": 10,
            "_id": ObjectId("554a13d4b692088a38f01f3a"),
            "status": "live",
            "subTasks": {
                "title": "task 1 sub 3 ",
                "content": "aaa",
                "order": 2,
                "_id": ObjectId("5550d0a61662211332d9a973"),
                "status": "live"
            }
        }
    },
    {
        "_id": ObjectId("554a13d4b692088a38f01f3b"),
        "tasks": {
            "title": "task 1",
            "description": "task 1 desc",
            "order": 10,
            "_id": ObjectId("554a13d4b692088a38f01f3a"),
            "status": "live",
            "subTasks": {
                "title": "task 1 sub 4",
                "content": "aaa",
                "order": 8,
                "_id": ObjectId("554a13d4b692088a38f01f4a"),
                "status": "live"
            }
        }
    }
],
"ok": 1

}

Expected result:

{
"_id": ObjectId("554a13d4b692088a38f01f3b"),
"title": "product title",
"order": 3,
"description": "Description here ",
"status": "live",
"tasks": [
    {
        "title": "task 2",
        "description": "task desc 2",
        "order": 1,
        "_id": ObjectId("5550855f9ee2db4e3958d299"),
        "status": "live",
        "subTasks": [
            {
                "title": "task 2 sub 1",
                "content": "bbb",
                "order": 1,
                "_id": ObjectId("55508f459ee2db4e3958d29a"),
                "status": "live"
            }
        ]
    },
    {
        "title": "task 3",
        "description": "task 3 desc",
        "order": 2,
        "_id": ObjectId("5551b844bb343a620f85f323"),
        "status": "live",
        "subTasks": [
            {
                "title": "task 3 sub 1",
                "content": "cccc",
                "order": -1,
                "_id": ObjectId("5551bcb8c31283c051d30b7c"),
                "status": "hidden"
            },
            {
                "title": "task 3 sub 2",
                "content": "cccc",
                "order": 0,
                "_id": ObjectId("5551b88abb343a620f85f324"),
                "status": "live"
            },
            {
                "title": "task 3 sub 3",
                "content": "ccc",
                "order": 2,
                "_id": ObjectId("5551ba40bb343a620f85f327"),
                "status": "hidden"
            }{
                "title": "task 3 sub 4",
                "content": "cccc",
                "order": 1,
                "_id": ObjectId("5551b8f1bb343a620f85f325"),
                "status": "hidden"
            }
        ]
    }{
        "title": "task 1",
        "description": "task 1 desc",
        "order": 10,
        "_id": ObjectId("554a13d4b692088a38f01f3a"),
        "status": "live",
        "subTasks": [
            {
                "title": "task 1 sub 1",
                "content": "aaa",
                "order": -2,
                "_id": ObjectId("554a13d4b692088a38f01f5a"),
                "status": "live"
            },
            {
                "title": "task 1 sub 2",
                "content": "aaa",
                "order": 1,
                "_id": ObjectId("554a13d4b692088a38f01f3a"),
                "status": "live"
            },
            {
                "title": "task 1 sub 3 ",
                "content": "aaa",
                "order": 2,
                "_id": ObjectId("5550d0a61662211332d9a973"),
                "status": "live"
            },
            {
                "title": "task 1 sub 4",
                "content": "aaa",
                "order": 8,
                "_id": ObjectId("554a13d4b692088a38f01f4a"),
                "status": "live"
            }
        ]
    }
]

I'm really close, all the ordering seems to be working. I just need some help to put the subTasks back inside the parents. Any help greatly appreciated.

Thanks

Neil Lunn
  • 148,042
  • 36
  • 346
  • 317
Sharry
  • 150
  • 1
  • 11

1 Answers1

5

You are making a mistake right from the beginning of your aggregation pipeline

$project: {
            "tasks" : 1
        }

by which you lose all your data. So first of all you need to reserve it doing:

$project: {
             tasks: 1,
             doc: {
                     title: "$title", 
                     order: "$order", 
                     description: "$description", 
                     status: "$status"
                  }
          }

Afterwards perform your $unwinds as you do in your question:

{$unwind: "$tasks"}, {$unwind: "$tasks.subTasks"}

Then do the sorting. You need to do the sorting with compound keys, otherwise the ordering by tasks.subTasks.order will not hold as soon as you sort by tasks.order. So:

{$sort: {"tasks.order": -1, "tasks.subTasks.order": 1}}

And then comes the hard part. You need to $group back the results, and the first step is to $push back the subTasks, but first of all, again you need to preserve the task attributes:

$project: {
             doc: 1, 
             task_id: "$tasks._id", 
             tasks_doc: {
                           title: "$tasks.title", 
                           description: "$tasks.description", 
                           order: "$tasks.order", 
                           status: "$tasks.status"
                        }, 
             subTasks: "$tasks.subTasks"
          }

...collect subTasks:

$group: {
           _id: {
                   _id: "$_id", 
                   task_id: "$task_id", 
                   doc: "$doc", 
                   task_doc: "$tasks_doc"
                }, 
           subTasks: {
                        $push: "$subTasks"
                     }
        }

And the same for the tasks. Pay attention that during the $grouping you also need to project back the task_doc attributes:

$group: {
           _id: {
                   _id: "$_id._id", 
                   doc: "$_id.doc"
                }, 
           tasks: {
                     $push: {
                               _id: "$_id.task_id", 
                               title: "$_id.task_doc.title", 
                               description: "$_id.task_doc.description",
                               order: "$_id.task_doc.order", 
                               status: "$_id.task_doc.status" 
                               subTasks: "$subTasks"
                            }
                  }
        }

And then project back the root doc attributes:

$project: {
             _id: "$_id._id", 
             title: "$_id.doc.title", 
             description: "$_id.doc.description", 
             order: "$_id.doc.order", 
             status: "$_id.doc.status", 
             tasks: 1
          }

That is basically it. Here is the full raw aggregation pipeline, so you can test and see whether you get the desired result:

[
 {$match: {_id: ObjectId("554a13d4b692088a38f01f3b")}}, 
 {$project: {tasks: 1, doc: {title: "$title", order: "$order", description: "$description", status: "$status"}}}, 
 {$unwind: "$tasks"}, 
 {$unwind: "$tasks.subTasks"}, 
 {$sort: {"tasks.order": -1, "tasks.subTasks.order": 1}}, 
 {$project: {doc: 1, task_id: "$tasks._id", tasks_doc: {title: "$tasks.title", description: "$tasks.description", order: "$tasks.order", status: "$tasks.status"}, subTasks: "$tasks.subTasks"}}, 
 {$group: {_id: {_id: "$_id", task_id: "$task_id", doc: "$doc", task_doc: "$tasks_doc"}, subTasks: {$push: "$subTasks"}}}, 
 {$group: {_id: {_id: "$_id._id", doc: "$_id.doc"}, tasks: {$push: {_id: "$_id.task_id", title: "$_id.task_doc.title", description: "$_id.task_doc.description", order: "$_id.task_doc.order", status: "$_id.task_doc.status", subTasks: "$subTasks"}}}}, 
 {$project: {_id: "$_id._id", title: "$_id.doc.title", description: "$_id.doc.description", order: "$_id.doc.order", status: "$_id.doc.status", tasks: 1}}
]

UPDATE

If an array field is empty or does not exist (is null) the $unwind operation on that field will return empty result. The solution to this situation is initially setting up the null/empty field to some zero value, e.g. "<empty-array>". Note, that you have to do this $projection for each array, before its $unwind.

Take a look at this answer about how to use the $ifNull operator. Also check out the $size operator here.

After dealing with this part, you need to $group back the results, and this can be achieved using the $cond operator, to check against the "<empty-array>" value

Community
  • 1
  • 1
bagrat
  • 7,158
  • 6
  • 29
  • 47
  • 2
    While this is an excellent answer(+1), I'd like to add a few things. The first mistake isn't in the pipeline, but in the modeling. The overuse of embedding, well exemplified in OPs data model may well lead to a point where a product reaches the 16MB size limit of BSON documents, aside from the fact that frequent and expensive document migrations are very probable, if not certain. – Markus W Mahlberg May 12 '15 at 17:10
  • Thanks for pointing it out. Having the guarantee that the initial documents are less than 16MB I assume that all the intermediate documents of aggregation pipeline will also fit in the size limit as we do not add any data, moreover we split the initial documents. So I think there is no problem with the size limit, or am I missing something? – bagrat May 12 '15 at 18:01
  • Or you mean the initial data model? – bagrat May 12 '15 at 18:03
  • I was referring to the initial data model ;) – Markus W Mahlberg May 12 '15 at 18:25
  • Ok, so a reasonable solution would be to move at least the `subTasks` into separate solution. – bagrat May 12 '15 at 18:53
  • 3
    I'd have a products collection and a tasks collection, and use [materialized paths](http://docs.mongodb.org/manual/tutorial/model-tree-structures-with-materialized-paths/#model-tree-structures-with-materialized-paths) to reference the parents, and a field referring to the according product. – Markus W Mahlberg May 12 '15 at 19:01
  • Agree, that's a better approach – bagrat May 12 '15 at 19:10
  • Thank you for the well details answer. I agree with the comments above. Each product will only have a maximum of 10 tasks with 3 sub tasks in each. Thanks again :) – Sharry May 13 '15 at 08:01
  • @n9code If no subTasks exist within the task it doesn't bring through the task itself within the response. Is it possible to bring them back too please? eg: If task 2 contains no subTasks: `{ "title" : "task 2", "description" : "task desc 2", "order" : 1, "_id" : ObjectId("5550855f9ee2db4e3958d299"), "status" : "live", "subTasks" : [ ] }` – Sharry May 13 '15 at 11:41
  • Yes, if an array is empty or does not exist, the `$unwind` operation on that field returns an empty result. Let me update the answer. – bagrat May 13 '15 at 11:51
  • Please, much much appreciated. – Sharry May 13 '15 at 12:36
  • @Sharry I think this case goes beyond the scope of the question, so I have explained and gave references for how to solve it, but did not post the explicit solution. If you need more details, it would be better if you post another question, specific for this situation, and we will try to work it out in detail. – bagrat May 13 '15 at 12:45
  • Let us [continue this discussion in chat](http://chat.stackoverflow.com/rooms/77715/discussion-between-sharry-and-n9code). – Sharry May 13 '15 at 12:47