10

Let's say that we got a collection of users, each with a birthday in BSON type date format.

How can we run a query to find out all the users who got a birthday in the next 30 days ?

Neil Lunn
  • 148,042
  • 36
  • 346
  • 317
quocnguyen
  • 192
  • 2
  • 10
  • was this a homework question? I see a few questions extremely similar to this one within a few days of each other... – Asya Kamsky Mar 04 '14 at 16:57
  • No this wasn't a homework, in fact i'm building a e-commerce website that sent a happy birthday to customer with a voucher code and asking them if they want to buy something before their birthday. You know, it will increase revenue somehow. – quocnguyen Mar 05 '14 at 19:45

4 Answers4

12

Aggregation framework is definitely the right approach - anything that requires JS on the server is a performance problem, while aggregations all run in the server in native code.

While it's possible to transform the birthday into dates of upcoming birthdays and then do a range query, I prefer to do it a slightly different way myself.

The only "prerequisite is to compute today's day of the year". There are ways to do this in various languages, so this could be done in the application layer before calling the aggregation, passing this number to it. I was going to call mine todayDayOfYear but I realized you can let aggregation framework figure it out based on today, so the only variable will be today's date.

var today=new Date();

I'm assuming document that includes name and birthday, adjust appropriately for variations

var p1 = { "$project" : {
            "_id" : 0,
            "name" : 1,
            "birthday" : 1,
            "todayDayOfYear" : { "$dayOfYear" : today }, 
            "dayOfYear" : { "$dayOfYear" : "$birthday"}
} };

Now, project how many days from today till their next birthday:

var p2 = { "$project" : {
        "name" : 1,
        "birthday" : 1,
        "daysTillBirthday" : { "$subtract" : [
             { "$add" : [ 
                     "$dayOfYear",
             { "$cond" : [{"$lt":["$dayOfYear","$todayDayOfYear"]},365,0 ] }
             ] },
             "$todayDayOfYear"
        ] }
} };

Exclude all but the ones within desired range:

var m = { "$match" : { "daysTillBirthday" : { "$lt" : 31 } } };

Now run the aggregation with:

db.collection.aggregate( p1, p2, m );

to get back a list of names, birthdays and days till birthday for all lucky folks whose birthday is within 30 days.

EDIT

@Sean999 caught an interesting edge case - people who were born in a leap year after February 28th will have their calculation off by one. The following is aggregation that correctly adjusts for that:

var p1 = { "$project" : { 
            "_id" : 0,
            "name" : 1,
            "birthday" : 1, 
            "todayDayOfYear" : { "$dayOfYear" : ISODate("2014-03-09T12:30:51.515Z") },
            "leap" : { "$or" : [ 
                  { "$eq" : [ 0, { "$mod" : [ { "$year" : "$birthday" }, 400 ] } ] }, 
                  { "$and" : [ 
                        { "$eq" : [ 0, { "$mod" : [ { "$year" : "$birthday" }, 4 ] } ] }, 
                        { "$ne" : [ 0, { "$mod" : [ { "$year" : "$birthday" }, 100 ] } ] } ] } ] },
            "dayOfYear" : { "$dayOfYear" : "$birthday" } } };

var p1p = { "$project" : {
                "name" : 1,
                "birthday" : 1,
                "todayDayOfYear" : 1,
                "dayOfYear" : { "$subtract" : [ 
                      "$dayOfYear", 
                      { "$cond" : [ { "$and" : [ "$leap", { "$gt" : [ "$dayOfYear", 59 ] } ] }, 1, 0 ] } ] }
        }
}

p2 and m stay the same as above.

Test input:

db.birthdays.find({},{name:1,birthday:1,_id:0})
{ "name" : "Ally", "birthday" : ISODate("1975-06-12T00:00:00Z") }
{ "name" : "Ben", "birthday" : ISODate("1968-04-03T00:00:00Z") }
{ "name" : "Mark", "birthday" : ISODate("1949-12-23T00:00:00Z") }
{ "name" : "Paul", "birthday" : ISODate("2014-03-04T15:59:05.374Z") }
{ "name" : "Paul", "birthday" : ISODate("2011-02-07T00:00:00Z") }
{ "name" : "Sean", "birthday" : ISODate("2004-01-31T00:00:00Z") }
{ "name" : "Tim", "birthday" : ISODate("2008-02-28T00:00:00Z") }
{ "name" : "Sandy", "birthday" : ISODate("2005-01-31T00:00:00Z") }
{ "name" : "Toni", "birthday" : ISODate("2009-02-28T00:00:00Z") }
{ "name" : "Sam", "birthday" : ISODate("2005-03-31T00:00:00Z") }
{ "name" : "Max", "birthday" : ISODate("2004-03-31T00:00:00Z") }
{ "name" : "Jen", "birthday" : ISODate("1971-04-03T00:00:00Z") }
{ "name" : "Ellen", "birthday" : ISODate("1996-02-28T00:00:00Z") }
{ "name" : "Fanny", "birthday" : ISODate("1996-02-29T00:00:00Z") }
{ "name" : "Gene", "birthday" : ISODate("1996-03-01T00:00:00Z") }
{ "name" : "Edgar", "birthday" : ISODate("1997-02-28T00:00:00Z") }
{ "name" : "George", "birthday" : ISODate("1997-03-01T00:00:00Z") }

Output:

db.birthdays.aggregate( p1, p1p, p2, {$sort:{daysTillBirthday:1}});
{ "name" : "Sam", "birthday" : ISODate("2005-03-31T00:00:00Z"), "daysTillBirthday" : 22 }
{ "name" : "Max", "birthday" : ISODate("2004-03-31T00:00:00Z"), "daysTillBirthday" : 22 }
{ "name" : "Ben", "birthday" : ISODate("1968-04-03T00:00:00Z"), "daysTillBirthday" : 25 }
{ "name" : "Jen", "birthday" : ISODate("1971-04-03T00:00:00Z"), "daysTillBirthday" : 25 }
{ "name" : "Ally", "birthday" : ISODate("1975-06-12T00:00:00Z"), "daysTillBirthday" : 95 }
{ "name" : "Mark", "birthday" : ISODate("1949-12-23T00:00:00Z"), "daysTillBirthday" : 289 }
{ "name" : "Sean", "birthday" : ISODate("2004-01-31T00:00:00Z"), "daysTillBirthday" : 328 }
{ "name" : "Sandy", "birthday" : ISODate("2005-01-31T00:00:00Z"), "daysTillBirthday" : 328 }
{ "name" : "Paul", "birthday" : ISODate("2011-02-07T00:00:00Z"), "daysTillBirthday" : 335 }
{ "name" : "Tim", "birthday" : ISODate("2008-02-28T00:00:00Z"), "daysTillBirthday" : 356 }
{ "name" : "Toni", "birthday" : ISODate("2009-02-28T00:00:00Z"), "daysTillBirthday" : 356 }
{ "name" : "Ellen", "birthday" : ISODate("1996-02-28T00:00:00Z"), "daysTillBirthday" : 356 }
{ "name" : "Fanny", "birthday" : ISODate("1996-02-29T00:00:00Z"), "daysTillBirthday" : 356 }
{ "name" : "Edgar", "birthday" : ISODate("1997-02-28T00:00:00Z"), "daysTillBirthday" : 356 }
{ "name" : "Gene", "birthday" : ISODate("1996-03-01T00:00:00Z"), "daysTillBirthday" : 357 }
{ "name" : "George", "birthday" : ISODate("1997-03-01T00:00:00Z"), "daysTillBirthday" : 357 }
{ "name" : "Paul", "birthday" : ISODate("2014-03-04T15:59:05.374Z"), "daysTillBirthday" : 360 }

You can see that people with same birthday now have same number of days till birthday whether they were born on a leap year or not. Match step can now be performed for the cut-off designed.

EDIT

As of version 3.5.11 there are several date manipulation expressions in aggregation pipeline that make this significantly simpler to write. In particular, the $dateFromParts expression allows constructing a date from various parts, allowing this aggregation:

var today = new Date();
var a1 = {$addFields:{
    today:{$dateFromParts:{year:{$year:today},month:{$month:today},day:{$dayOfMonth:today}}},
    birthdayThisYear:{$dateFromParts:{year:{$year:today}, month:{$month:"$birthday"}, day:{$dayOfMonth:"$birthday"}}}, 
    birthdayNextYear:{$dateFromParts:{year:{$add:[1,{$year:today}]}, month:{$month:"$birthday"}, day:{$dayOfMonth:"$birthday"}}}
}};
var a2 = {$addFields:{
    nextBirthday:{$cond:[ {$gte:[ "$birthdayThisYear", "$today"]}, "$birthdayThisYear", "$birthdayNextYear"]}
}};
var p1 = {$project:{
    name:1, 
    birthday:1, 
    daysTillNextBirthday:{$divide:[ 
        {$subtract:["$nextBirthday", "$today"]}, 
        24*60*60*1000  /* milliseconds in a day */
     ]}, 
    _id:0
}};
var s1 = {$sort:{daysTillNextBirthday:1}};
db.birthdays.aggregate([ a1, a2, p1, s1 ]);

You can set "today" to any date (leap year or not) and see that the calculation is now always correct and much simpler.

Asya Kamsky
  • 41,784
  • 5
  • 109
  • 133
  • of course, that's why I add 365 to dayOfYear if the birthday already happened this year. (I add either 0 or 365 depending on whether dayOfYear is less than today's dayOfYear or not) – Asya Kamsky Mar 04 '14 at 16:50
  • that's elegant, but it makes the assumption that there are 365 days in every year. – code_monk Mar 04 '14 at 19:55
  • @sean9999 Yes very elegant, (personal facepalm for missing it). Determining a leap year is trivial for most languages, so just pass that in to generate. One solution for [JavaScript](http://stackoverflow.com/a/16353241/2313887). – Neil Lunn Mar 04 '14 at 21:06
  • no, @sean9999 leap years don't make a difference, unless you are trying to find who has birthday in the next 60+ days and then it only matters when you're running this query during Jan or Feb. Honestly, try it. Leap year day is more than 30 days away from the beginning of the year :) – Asya Kamsky Mar 05 '14 at 03:17
  • The only thing I guess to add is that the project *could* be only one $project stage. Getting the **current** day of year is trivial for most languages and there should be no need for a separate pipeline stage just in order to project out the same value against all documents. But as already commented, elegant and clean to solve the basic problem. I already up-voted. – Neil Lunn Mar 05 '14 at 13:10
  • yes, I checked - agg framework does *not* collapse sequential $project stages that it could, so it would technically be slightly more efficient to just do a single $project but I decided this was more readable. – Asya Kamsky Mar 05 '14 at 21:01
  • there are many cases where this would not produce the correct result. consider for instance a user born on a leap year on august 15. The value for $dayOfYear will differ from the value for August 15 for all other years. The simple fact is that birthdays are defined by a month and a dayOfMonth. – code_monk Mar 08 '14 at 12:02
  • @sean9999 got it - you were talking about the birthday year being leap year (I misunderstood your question as being about the year we query in being a leap year). You are absolutely right. There are a couple of ways to fix it NeilLunn already mentioned determining LY in JS, but since I specifically didn't want to use JS, the solution I was originally going to post but abandoned due to it being longer, and that's transforming the date of birth into this year's date in agg framework. – Asya Kamsky Mar 09 '14 at 12:38
  • I'm going to add the steps to compute leap year birthdays day of year correctly (still in agg framework) as an edit on the post. – Asya Kamsky Mar 09 '14 at 13:05
  • If I understand correct this will not work correct if **current** year is leap – gstackoverflow Aug 04 '17 at 09:35
  • You're right, looks like I applied the leap year correction to the wrong part of the original expression! :) – Asya Kamsky Aug 04 '17 at 18:41
  • To clarify, if "today" is during leap year *and* after February 29th, then the days till birthday will be off by one. I don't have time to check now, but I think the leap correction needs to be added both to the birthdate and today's date. There is a simpler way to check if it's a leap year in aggregation framework itself... and 3.6 will introduce a bunch of new date operators that will make all of this unnecessary! :) – Asya Kamsky Aug 04 '17 at 19:00
3

With the clear thing being that the birth dates are the date of birth and are in the past, but we want to search in the future right? Yeah nice trap.

But we can do, with some projection in aggregation, for one method of solving.

First a little setup for variables we need:

var start_time = new Date(),
    end_time = new Date();

end_time.setDate(end_time.getDate() + 30 );

var monthRange = [ start_time.getMonth() + 1, end_time.getMonth() + 1 ];

var start_string =  start_time.getFullYear().toString() +
    ("0" + (start_time.getMonth()+1)).slice(-2) +
    ("0" + (start_time.getDate()-1)).slice(-2);   

var end_string =  end_time.getFullYear().toString() +
    ("0" + (end_time.getMonth()+1)).slice(-2) +
    ("0" + (end_time.getDate()-1)).slice(-2); 

var start_year = start_time.getFullYear();
var end_year = end_time.getFullYear();

Then run that through aggregate:

db.users.aggregate([
    {"$project": { 
        "name": 1,
        "birthdate": 1,
        "matchYear": {"$concat":[
            // Substituting the year into the current year
            {"$substr":[{"$cond":[
                {"$eq": [{"$month": "$birthdate"}, monthRange[0]]},
                start_year,
                // Being careful to see if we moved into the next year
                {"$cond":[
                    {"$lt": monthRange},
                    start_year,
                    end_year
                ]}
            ]},0,4]},
            {"$cond":[
                {"$lt":[10, {"$month": "$birthdate"}]},
                {"$substr":[{"$month": "$birthdate"},0,2]},
                {"$concat":["0",{"$substr":[{"$month": "$birthdate"},0,2]}]}
            ]},
            {"$cond":[
                {"$lt":[10, {"$dayOfMonth": "$birthdate"}]},
                {"$substr":[{"$dayOfMonth": "$birthdate"},0,2]},
                {"$concat":["0",{"$substr":[{"$dayOfMonth": "$birthdate"},0,2]}]}
            ]}
        ]}
    }},

    // Small optimize for the match stage
    {"sort": { "matchYear": 1}},

    // And match on the range now that it's lexical
    {"$match": { "matchYear": {"$gte": start_string, "$lte": end_string } }}

])

I suppose the same applies for mapReduce if your mind works better that way. But the results would only yield true or false no matter which way you shook it. But you'd probably just need a mapper and the syntax is a bit clearer:

var mapFunction = function () {

    var mDate = new Date( this.birthdate.valueOf() );

     if ( mDate.getMonth() + 1 < monthRange[0] ) {
         mDate.setFullYear(start_year);
     } else if ( monthRange[0] < monthRange[1] ) {
         mDate.setFullYear(start_year);
     } else {
         mDate.setFullYear(end_year);
     }

     var matched = (mDate >= start_time && mDate <= end_time);

     var result = {
         name: this.name,
         birthdate: this.birthdate,
         matchDate: mDate,
         matched: matched
     };

     emit( this._id, result );
};

Then you would pass that in to mapReduce, picking up all the variables that were defined before:

db.users.mapReduce(
    mapFunction, 
    function(){},           // reducer is not called
   { 
       out: { inline: 1 },
       scope: { 
           start_year: start_year,
           end_year: end_year,
           start_time: start_time,
           end_time: end_time,
           monthRange: monthRange 
       } 
   }

)

But really, at least store the "Birth Month" in a real field as part of your user record. Because then you can narrow down the matches and not process your whole collection. Just add the additional $match at the start of the pipeline:

{"$match": "birthMonth": {"$in": monthRange }}

With the field present in the document that will save disk thrashing in the future.

Final Note

The other form that should work is just throwing raw JavaScript into find. That can be done as a shortcut where you don't provide any additional query conditions. But for the confused the documentation is under the $where operator, and essentially the same thing as passing in JavaScript to $where.

However, any attempt at this would just not produce a result. Hence the other methods. Not sure if there was a good reason or if it's a bug.

Anyhow all testing, aside from the earlier year rollover testing, was done on these documents. One result should not appear where the initial starting date was from "2014-03-03".

{ "name" : "bill",  "birthdate" : ISODate("1973-03-22T00:00:00Z") }
{ "name" : "fred",  "birthdate" : ISODate("1974-04-17T00:00:00Z") }
{ "name" : "mary",  "birthdate" : ISODate("1961-04-01T00:00:00Z") }
{ "name" : "wilma", "birthdate" : ISODate("1971-03-17T00:00:00Z") }
Neil Lunn
  • 148,042
  • 36
  • 346
  • 317
  • you shouldn't be coercing dates into strings in aggregation framework (technically you shouldn't be allowed to and it was a bug that let it slip through, so it'll stop working at some point). It's also not necessary since you can handle them as dates. – Asya Kamsky Mar 04 '14 at 15:10
  • @AsyaKamsky Believe me I'd rather the whole thing be dates. – Neil Lunn Mar 04 '14 at 20:29
  • you can do it - just use date math. I was originally going to make that my answer, but the one I posted is shorter and simpler to follow. – Asya Kamsky Mar 05 '14 at 03:15
  • @AsyaKamsky I actually **did** consider doing the date math, but that solution seemed more obtuse than the *casting* that is done mostly here. But of course, it's not great, and I constantly tell people to **not use strings for dates**. Not the point, (as I believe working from the day of year is the clearest) but to say that the $substr coercion from Int to String in this way is a bug, well it's been around for a **long** time and seems to be quite widely used. So changing that behavior **will** break a lot of peoples code. Something to consider. – Neil Lunn Mar 05 '14 at 13:34
  • @NeilLunn +1 although i give my bounty to Asya Kamsky, your explanation is very clear and helpful. Thanks. – quocnguyen Mar 05 '14 at 20:13
  • @NeilLunn as it turns out, implicitly coercing various types to strings runs into a problem with certain types, so likely when this unintentional feature is removed, a toString() conversion method will be provided. If you want to see amusing side effect of $substr on non-string type, try it with $substr:[larger float number, 0,3] or something like that :) – Asya Kamsky Mar 05 '14 at 21:03
  • @AsyaKamsky I am not doing float conversions and no-one else I have seen is doing that either. There are plenty of final projections of a truncated date though. For grouping by Year Month, or day. I would actually use a document with just those keys as a grouping key. Floating point math should be avoided like the plague anyway. – Neil Lunn Mar 05 '14 at 21:51
1

A solution would be to pass a function to the find Mongo operation. See the inline comments:

// call find
db.users.find(function () {

   // convert BSON to Date object
   var bDate   = new Date(this.birthday * 1000)

       // get the present moment
     , minDate = new Date()

       // add 30 days from this moment (days, hours, minutes, seconds, ms)
     , maxDate = new Date(minDate.getTime() + 30 * 24 * 60 * 60 * 1000);

   // modify the year of the birthday Date object
   bDate.setFullYear(minDate.getFullYear());

   // return a boolean value
   return (bDate > minDate && bDate < maxDate);
});
Ionică Bizău
  • 109,027
  • 88
  • 289
  • 474
  • This **should** work, but for the life of me I could not get it to do so. I wrote a note on it in the answer I submitted also showing the data I was testing against. And yes, I did change the name of the field. The other part was this is subject to a bug when the month is December. BTW missing comma after the first variable declaration. – Neil Lunn Mar 04 '14 at 06:20
  • @NeilLunn I am sure there are other more efficient ways. I tested this and it works fine. No comma is missing. I put the commas in the next row. Please review and test my code. – Ionică Bizău Mar 04 '14 at 06:47
  • Yes. I know that it should. And that is what I did. In fact it was the first thing that came to mind. But I just get no results. – Neil Lunn Mar 04 '14 at 06:52
  • And the weird thing is, take exactly the same code and submit to mapReduce, changing the last line to emit. And the correct true and false counts come out. So hence. Stumped. – Neil Lunn Mar 04 '14 at 06:55
  • Got it. The 1000 multiply for epoch is not needed. At least that's how it returns for me from 2.4.8 and up. – Neil Lunn Mar 04 '14 at 09:20
  • @NeilLunn I supposed that the birthday field contains a number like `1393932985` (computed using `Math.floor(new Date().getTime() / 1000)`). – Ionică Bizău Mar 04 '14 at 11:39
  • What I was saying is that if you do not multiply at all this works. With native BSON dates. With the multiplication it does not. At least in what I have tested. And that is from 2.4.8 and up to 2.6.0-rc0. If It works in this form on your platform then that is interesting indeed. It seems reasonable that when someone is asking about dates then they are actually using dates. To me that is. – Neil Lunn Mar 04 '14 at 11:51
  • this is not a good idea (the answer) - there is absolutely no reason in the world to use javascript for this - the server can handle this natively without using JS. – Asya Kamsky Mar 04 '14 at 15:08
  • @IonicăBizău +1 since i use nodejs in backend, passing a javascript function wasn't a problem for me at all. However, I think Asya Kamsky was right, for language other than javascript solve the problem without passing a js function would be better. – quocnguyen Mar 05 '14 at 20:10
1

I think the most elegant and usually the most efficient solution is to use the aggregation framework. To obtain birthdays we need to discard all information about the dates except $month and $dayOfMonth. We create new compound fields using those values, pivot on them, and away we go!

This javascript can be executed from the mongo console, and operates on a collection called users with a field called birthday. It returns a list of user IDs, grouped by birthday.

var next30days = [];
var today = Date.now();
var oneday = (1000*60*60*24);
var in30days = Date.now() + (oneday*30);

//  make an array of all the month/day combos for the next 30 days    
for (var i=today;i<in30days;i=i+oneday) {
    var thisday = new Date(i);
    next30days.push({
        "m": thisday.getMonth()+1,
        "d": thisday.getDate()
    });
}

var agg = db.users.aggregate([
    {
        '$project': {
            "m": {"$month": "$birthday"},
            "d": {"$dayOfMonth": "$birthday"}
        }
    },
    {
        "$match": {
            "$or": next30days
        }
    },
    {
        "$group": {
            "_id": {
                "month": "$m",
                "day": "$d",
            },
            "userids": {"$push":"$_id"}
        }
    }
]);

printjson(agg);
code_monk
  • 9,451
  • 2
  • 42
  • 41
  • I like the general approach of this, but I think there is a more elegant way to do this without precomputing the entire 30 day range array of d/m pairs... – Asya Kamsky Mar 04 '14 at 15:31
  • i thought about reducing that to a range query, so rather than a 30-element list, it would look like ` { "$or": [ {"month":2,"dayofmonth": {"$gte": 15}}, {"month":3,"dayofmonth": {"$lte": 14}} ] } ` – code_monk Mar 04 '14 at 15:36
  • you can actually generate the birthdays as regular dates to do a range query on, or there's another way to do this that I like - I'll write it up as my answer... – Asya Kamsky Mar 04 '14 at 15:43