1

I have a collection test which have following values from which i have to get document based on "value" field which i can easily get from below given query.

db.getCollection('test').find({"value" : 100})

but the real problem is that i have list of "value" fields such as [100,104,200152,.......] this list can be really long and i want my result in below given format in order to reduce number of mongo query as this is taking too much time, if list containing "values" is too large then i have to preform multiple mongo queries to fetch all the records.

{100:[
    /* 1 */

    {
        "_id" : "C1",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }

    /* 2 */
    {
        "_id" : "C2",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }

    /* 3 */
    {
        "_id" : "C3",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }

    /* 4 */
    {
        "_id" : "C4",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }

    /* 5 */
    {
        "_id" : "CO",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }

    /* 6 */
    {
        "_id" : "DD",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }

    /* 7 */
    {
        "_id" : "EX",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }],

104:

    [{
        "_id" : "AU",
        "value" : 104,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }],
200152:

    [
    {
        "_id" : "GenFile",
        "value" : 200152,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }
    ]

DB

/* 1 */
{
    "_id" : "AU",
    "value" : 104,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 2 */
{
    "_id" : "C1",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 3 */
{
    "_id" : "C2",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 4 */
{
    "_id" : "C3",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 5 */
{
    "_id" : "C4",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 6 */
{
    "_id" : "CO",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 7 */
{
    "_id" : "DD",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 8 */
{
    "_id" : "EX",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 9 */
{
    "_id" : "GS_SEG",
    "value" : 124755350,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 10 */
{
    "_id" : "GenFile",
    "value" : 200152,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}
ngShravil.py
  • 4,742
  • 3
  • 18
  • 30

2 Answers2

1

You are searching in the right direction, you will want to prevent your code to spend most of it's time on network requests. The pymongo $in operator selects the documents where the value of a field equals any value in the specified array.

In your case it would look like this:

# Set or build a list of the values
list_with_values =  [100, 104, 200152]

# Make one call to the DB, asking for all of the matching records.
result = db.getCollection('test').find({"value" : {"$in": list_with_values})

Further reference on how the $in operator works: http://docs.mongodb.org/manual/reference/operator/query/in/

marcuse
  • 3,389
  • 3
  • 29
  • 50
  • this will give me the whole documents then again i have to perform python query to separate based on their value. Since number of documents returned after this query will be so huge that performing a groupby "value" in python will also be time consuming. – virendra singh deora May 28 '20 at 11:07
  • I understand your problem, there is of course a difference in extracting the data and transforming it. You may take a look at this answer: https://stackoverflow.com/a/38145104/9247792 – marcuse May 28 '20 at 11:16
1

You can use the following aggregation to do the work for you. But, it will have value fields 100,104 as strings instead of numbers (I had to use toString operator, otherwise I was getting error).

db.collection.aggregate([
  {
    $group: {
      _id: "$value",
      root: {
        $push: "$$ROOT"
      }
    }
  },
  {
    $project: {
      k: {
        $toString: "$_id"
      },
      v: "$root",
      _id: 0
    }
  },
  {
    $group: {
      _id: null,
      x: {
        $push: "$$ROOT"
      }
    }
  },
  {
    $project: {
      _id: 0,
      x: {
        $arrayToObject: "$x"
      }
    }
  },
  {
    $replaceRoot: {
      newRoot: "$x"
    }
  }
])

Following will be the output:

[
  {
    "1.24755e+08": [
      {
        "_id": "GS_SEG",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 1.2475535e+08
      }
    ],
    "100": [
      {
        "_id": "C1",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      },
      {
        "_id": "C2",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      },
      {
        "_id": "C3",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      },
      {
        "_id": "C4",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      },
      {
        "_id": "CO",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      },
      {
        "_id": "DD",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      },
      {
        "_id": "EX",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      }
    ],
    "104": [
      {
        "_id": "AU",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 104
      }
    ],
    "200152": [
      {
        "_id": "GenFile",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 200152
      }
    ]
  }
]
ngShravil.py
  • 4,742
  • 3
  • 18
  • 30