I am trying to build custom geospatial indexes using MongoDB's B-tree indexes, as I find Mongo's native implementation limiting for my own case. In order to fulfill my geospatial queries which will effectively search Mongo using a compound index, I need to filter by the location.locIndexKey
field with multiple ranges, among other fields.
So far, the only solution I could come up with to support this kind of queryies was using Mongo's $or
operator. However, this performed badly since it's an or
query and Mongo has to examine the same keys on the index again and again. In order to overcome this inefficiency, I need a way to make Mongo use multiple index-bounds on that field instead of replicating the query with or
phrases for each defined bound in the query.
This is my query:
db.users.find({
"gender":2,
"preferences.feed.gender":1,
"age":{"$gte":18,"$lte":55},
"feedPrefChangeDay":{"$gte":1553461200,"$lte":1554066000},
"$or":[{"location.locIndexKey":{"$gte":NumberLong(1493233547543052300),"$lte":NumberLong(1493242343636074500)}},{"location.locIndexKey":{"$gte":NumberLong(1493242343636074500),"$lte":NumberLong(1493251139729096700)}},{"location.locIndexKey":{"$gte":NumberLong(1493287011295953000),"$lte":NumberLong(1493287148734906400)}}]
}).limit(20);
As you can see, in order to express multiple ranges on field location.locIndexKey
, I had to use the $or
operator. This is the shortened version of query planner's execution stats:
{
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 17762,
"totalKeysExamined" : 196192,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 351,
"works" : 196193,
"advanced" : 0,
"needTime" : 196191,
"needYield" : 0,
"saveState" : 19944,
"restoreState" : 19944,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 20,
"inputStage" : {
"stage" : "FETCH",
"nReturned" : 0,
"executionTimeMillisEstimate" : 351,
"works" : 196193,
"advanced" : 0,
"needTime" : 196191,
"needYield" : 0,
"saveState" : 19944,
"restoreState" : 19944,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 0,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "OR",
"nReturned" : 0,
"executionTimeMillisEstimate" : 351,
"works" : 196192,
"advanced" : 0,
"needTime" : 196191,
"needYield" : 0,
"saveState" : 19944,
"restoreState" : 19944,
"isEOF" : 1,
"invalidates" : 0,
"dupsTested" : 0,
"dupsDropped" : 0,
"recordIdsForgotten" : 0,
"inputStages" : [
{
"stage" : "IXSCAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 10,
"works" : 4534,
"advanced" : 0,
"needTime" : 4533,
"needYield" : 0,
"saveState" : 19944,
"restoreState" : 19944,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"gender" : 1.0,
"preferences.feed.gender" : 1.0,
"age" : 1.0,
"feedPrefChangeDay" : 1.0,
"location.locIndexKey" : 1.0
},
"indexName" : "gender_1_preferences.feed.gender_1_age_1_feedPrefChangeDay_1_location.locIndexKey_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"gender" : [],
"preferences.feed.gender" : [],
"age" : [],
"feedPrefChangeDay" : [],
"location.locIndexKey" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"gender" : [
"[2.0, 2.0]"
],
"preferences.feed.gender" : [
"[1.0, 1.0]"
],
"age" : [
"[18.0, 55.0]"
],
"feedPrefChangeDay" : [
"[1553461200.0, 1554066000.0]"
],
"location.locIndexKey" : [
"[1493569998101151700, 1493572197124407300]"
]
},
"keysExamined" : 4534,
"seeks" : 4534,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
},
{
"stage" : "IXSCAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 4534,
"advanced" : 0,
"needTime" : 4533,
"needYield" : 0,
"saveState" : 19944,
"restoreState" : 19944,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"gender" : 1.0,
"preferences.feed.gender" : 1.0,
"age" : 1.0,
"feedPrefChangeDay" : 1.0,
"location.locIndexKey" : 1.0
},
"indexName" : "gender_1_preferences.feed.gender_1_age_1_feedPrefChangeDay_1_location.locIndexKey_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"gender" : [],
"preferences.feed.gender" : [],
"age" : [],
"feedPrefChangeDay" : [],
"location.locIndexKey" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"gender" : [
"[2.0, 2.0]"
],
"preferences.feed.gender" : [
"[1.0, 1.0]"
],
"age" : [
"[18.0, 55.0]"
],
"feedPrefChangeDay" : [
"[1553461200.0, 1554066000.0]"
],
"location.locIndexKey" : [
"[1493587581697261600, 1493587590287196200]"
]
},
"keysExamined" : 4534,
"seeks" : 4534,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
},
{
"stage" : "IXSCAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 4534,
"advanced" : 0,
"needTime" : 4533,
"needYield" : 0,
"saveState" : 19944,
"restoreState" : 19944,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"gender" : 1.0,
"preferences.feed.gender" : 1.0,
"age" : 1.0,
"feedPrefChangeDay" : 1.0,
"location.locIndexKey" : 1.0
},
"indexName" : "gender_1_preferences.feed.gender_1_age_1_feedPrefChangeDay_1_location.locIndexKey_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"gender" : [],
"preferences.feed.gender" : [],
"age" : [],
"feedPrefChangeDay" : [],
"location.locIndexKey" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"gender" : [
"[2.0, 2.0]"
],
"preferences.feed.gender" : [
"[1.0, 1.0]"
],
"age" : [
"[18.0, 55.0]"
],
"feedPrefChangeDay" : [
"[1553461200.0, 1554066000.0]"
],
"location.locIndexKey" : [
"[1493981215449940000, 1493990011542962200]"
]
},
"keysExamined" : 4534,
"seeks" : 4534,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
If you check indexBounds
above, you will see that each range for location.locIndexKey
is applied to a single query and combined with or
. However, if I choose to run the same query using Mongo's native geospatial operator $geoWithin
:
db.users.find({
"gender" : 2.0,
"preferences.feed.gender" : 1.0,
"age" : {
"$gte" : 18.0,
"$lte" : 55.0
},
"feedPrefChangeDay" : {
"$gte" : 1553461200.0,
"$lte" : 1554066000.0
},
"location.loc" : {
"$geoWithin" : {
"$centerSphere" : [
[
0.0,
0.0
],
0.00784806152880239
]
}
}
}).limit(20);
I get the following response from the query planner:
{
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 7,
"totalKeysExamined" : 4506,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "LIMIT",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 4506,
"advanced" : 0,
"needTime" : 4505,
"needYield" : 0,
"saveState" : 35,
"restoreState" : 35,
"isEOF" : 1,
"invalidates" : 0,
"limitAmount" : 20,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"location.loc" : {
"$geoWithin" : {
"$centerSphere" : [
[
0.0,
0.0
],
0.00784806152880239
]
}
}
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 4506,
"advanced" : 0,
"needTime" : 4505,
"needYield" : 0,
"saveState" : 35,
"restoreState" : 35,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 0,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 4506,
"advanced" : 0,
"needTime" : 4505,
"needYield" : 0,
"saveState" : 35,
"restoreState" : 35,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"gender" : 1.0,
"preferences.feed.gender" : 1.0,
"age" : 1.0,
"feedPrefChangeDay" : 1.0,
"location.loc" : "2dsphere"
},
"indexName" : "gender_1_preferences.feed.gender_1_age_1_feedPrefChangeDay_1_location.loc_2dsphere",
"isMultiKey" : false,
"multiKeyPaths" : {
"gender" : [],
"preferences.feed.gender" : [],
"age" : [],
"feedPrefChangeDay" : [],
"location.loc" : []
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"gender" : [
"[2.0, 2.0]"
],
"preferences.feed.gender" : [
"[1.0, 1.0]"
],
"age" : [
"[18.0, 55.0]"
],
"feedPrefChangeDay" : [
"[1553461200.0, 1554066000.0]"
],
"location.loc" : [
"[360287970189639680, 360287970189639680]",
"[378302368699121664, 378302368699121664]",
"[382805968326492160, 382805968326492160]",
"[383931868233334784, 383931868233334784]",
"[384213343210045440, 384213343210045440]",
"[384230935396089856, 384230935396089856]",
"[384235333442600960, 384235333442600960]",
"[384236432954228736, 384236432954228736]",
"[384236432954228737, 384236982710042623]",
"[384266119768178688, 384266119768178688]",
"[384266119768178689, 384274915861200895]",
"[384274915861200897, 384283711954223103]",
"[384283711954223104, 384283711954223104]",
"[384283711954223105, 384318896326311935]",
"[384318896326311937, 384354080698400767]",
"[1080863910568919040, 1080863910568919040]",
"[1134907106097364992, 1134907106097364992]",
"[1148417904979476480, 1148417904979476480]",
"[1151795604700004352, 1151795604700004352]",
"[1152640029630136320, 1152640029630136320]",
"[1152789563211513857, 1152798359304536063]",
"[1152798359304536064, 1152798359304536064]",
"[1152798359304536065, 1152807155397558271]",
"[1152833543676624896, 1152833543676624896]",
"[1152846737816158208, 1152846737816158208]",
"[1152850036351041536, 1152850036351041536]",
"[1152850586106855425, 1152851135862669311]",
"[1152851135862669312, 1152851135862669312]",
"[1152851135862669313, 1152859931955691519]",
"[1152868728048713728, 1152868728048713728]",
"[1152877524141735937, 1152886320234758143]",
"[1152886320234758145, 1152921504606846975]",
"[1152921504606846977, 1152956688978935807]",
"[1152956688978935809, 1152991873351024639]",
"[1152991873351024640, 1152991873351024640]",
"[1152991873351024641, 1152992423106838527]",
"[1152992972862652416, 1152992972862652416]",
"[1152996271397535744, 1152996271397535744]",
"[1153009465537069056, 1153009465537069056]",
"[1153035853816135681, 1153044649909157887]",
"[1153044649909157888, 1153044649909157888]",
"[1153044649909157889, 1153053446002180095]",
"[1153202979583557632, 1153202979583557632]",
"[1154047404513689600, 1154047404513689600]",
"[1157425104234217472, 1157425104234217472]",
"[1170935903116328960, 1170935903116328960]",
"[1224979098644774912, 1224979098644774912]",
"[1921488928515293185, 1921524112887382015]",
"[1921524112887382017, 1921559297259470847]",
"[1921559297259470848, 1921559297259470848]",
"[1921559297259470849, 1921594481631559679]",
"[1921606026503651329, 1921606576259465215]",
"[1921606576259465216, 1921606576259465216]",
"[1921607675771092992, 1921607675771092992]",
"[1921612073817604096, 1921612073817604096]",
"[1921629666003648512, 1921629666003648512]",
"[1921911140980359168, 1921911140980359168]",
"[1923037040887201792, 1923037040887201792]",
"[1927540640514572288, 1927540640514572288]",
"[1945555039024054272, 1945555039024054272]"
]
},
"keysExamined" : 4506,
"seeks" : 4506,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
}
}
As you can see, Mongo takes advantage of multiple index bounds to fulfill this query and its much more effective.
I believe the inefficiency in the original query happens because Mongo's query planner doesn't check what's inside the $or
expression. I think that it should be more clever to understand that there is just one field with multiple ranges inside the expression and build the query using multiple index bounds. Sadly, this is not the case.
My question: is there any way that I can force Mongo to use multiple index bounds for my query, so that it's as efficient as the native geospatial query?
Any help would be appreciated.
Thanks!