1

I have the following collection in mongo:

{
    "_id" : ObjectId("506217890b50f300d020d237"),
    "o_orderkey" : NumberLong(1),
    "o_orderstatus" : "O",
    "o_totalprice" : 173665.47,
    "o_orderdate" : ISODate("1996-01-02T02:00:00Z"),
    "o_orderpriority" : "5-LOW",
    "o_clerk" : "Clerk#000000951",
    "o_shippriority" : 0,
    "o_comment" : "blithely final dolphins solve-- blithely blithe packages nag blith",
    "customer" : {
            "c_custkey" : NumberLong(36901),
            "c_name" : "Customer#000036901",
            "c_address" : "TBb1yDZcf 8Zepk7apFJ",
            "c_phone" : "23-644-998-4944",
            "c_acctbal" : 4809.84,
            "c_mktsegment" : "AUTOMOBILE",
            "c_comment" : "regular accounts after the blithely pending dependencies play blith",
            "c_nationkey" : {
                    "n_nationkey" : NumberLong(13),
                    "n_name" : "JORDAN",
                    "n_comment" : "blithe, express deposits boost carefully busy accounts. furiously pending depos",
                    "n_regioin" : {
                            "r_regionkey" : NumberLong(4),
                            "r_name" : "MIDDLE EAST",
                            "r_comment" : "furiously unusual packages use carefully above the unusual, exp"
                    }
            }
    },
    "o_lineitem" : [
        {
            "l_linenumber" : 1,
            "l_quantity" : 17,
            "l_extendedprice" : 21168.23,
            "l_discount" : 0.04,
            "l_tax" : 0.02,
            "l_returnflag" : "N",
            "l_linestatus" : "O",
            "l_shipdate" : ISODate("1996-03-13T03:00:00Z"),
            "l_commitdate" : ISODate("1996-02-12T03:00:00Z"),
            "l_receiptdate" : ISODate("1996-03-22T03:00:00Z"),
            "l_shipinstruct" : "DELIVER IN PERSON",
            "l_shipmode" : "TRUCK",
            "l_comment" : "blithely regular ideas caj",
            "partsupp" : {
                "ps_availqty" : 6157,
                "ps_supplycost" : 719.17,
                "ps_comment" : "blithely ironic packages haggle quickly silent platelets. silent packages must have to nod. slyly special theodolites along the blithely ironic packages nag above the furiously pending acc",
                "ps_partkey" : {
                    "p_partkey" : NumberLong(155190),
                    "p_name" : "slate lavender tan lime lawn",
                    "p_mfgr" : "Manufacturer#4",
                    "p_brand" : "Brand#44",
                    "p_type" : "PROMO BRUSHED NICKEL",
                    "p_size" : 9,
                    "p_container" : "JUMBO JAR",
                    "p_retailprice" : 1245.19,
                    "p_comment" : "regular, final dol"
                },
                "ps_suppkey" : {
                    "s_suppkey" : NumberLong(7706),
                    "s_name" : "Supplier#000007706",
                    "s_address" : "BlHq75VoMNCoU380SGiS9fTWbGpeI",
                    "s_phone" : "33-481-218-6643",
                    "s_acctbal" : -379.71,
                    "s_comment" : "carefully pending ideas after the instructions are alongside of the dolphins. slyly pe",
                    "s_nationkey" : {
                        "n_nationkey" : NumberLong(23),
                        "n_name" : "UNITED KINGDOM",
                        "n_comment" : "fluffily regular pinto beans breach according to the ironic dolph",
                        "n_regioin" : {
                            "r_regionkey" : NumberLong(3),
                            "r_name" : "EUROPE",
                            "r_comment" : "special, bold deposits haggle foxes. platelet"
                        }
                    }
                }
            }
        },
        .
        .
        .
    ]
}

And im trying to translate the following sql query:

select
    s_acctbal, 
    s_name, 
    n_name, 
    p_partkey, 
    p_mfgr, 
    s_address, 
    s_phone, 
    s_comment
from 
    part, 
    supplier, 
    partsupp, 
    nation, 
    region
where 
    p_partkey = ps_partkey
    and s_suppkey = ps_suppkey
    and p_size = 15
    and p_type like '%BRASS'
    and s_nationkey = n_nationkey
    and n_regionkey = r_regionkey
    and r_name = 'EUROPE'
    and ps_supplycost = (
        select 
            min(ps_supplycost)
        from 
            partsupp, supplier, 
            nation, region
        where 
            p_partkey = ps_partkey
            and s_suppkey = ps_suppkey
            and s_nationkey = n_nationkey
            and n_regionkey = r_regionkey
            and r_name = 'EUROPE'
    )
order by 
    s_acctbal desc, 
    n_name, 
    s_name, 
    p_partkey;

My function that i was trying:

db.runCommand({
    mapreduce: "ordersfull",
    query: {
    },
    map: function Map() {
        var pattern = /BRASS$/g;

        for(var i in this.o_lineitem){
            var p_size = this.o_lineitem[i].partsupp.ps_partkey.p_size;
            var p_type = this.o_lineitem[i].partsupp.ps_partkey.p_type;
            var region = this.o_lineitem[i].partsupp.ps_suppkey.s_nationkey.n_regioin.r_name;

            if(p_size==15 && p_type.match(pattern)!=null && region == "EUROPE"){
                emit("",{
                    s_acctbal: this.o_lineitem[i].partsupp.ps_suppkey.s_acctbal,
                    s_name: this.o_lineitem[i].partsupp.ps_suppkey.s_name,
                    n_name: this.o_lineitem[i].partsupp.ps_suppkey.s_nationkey.n_name,
                    p_partkey: this.o_lineitem[i].partsupp.ps_partkey.p_partkey,
                    p_mfgr: this.o_lineitem[i].partsupp.ps_partkey.p_mfgr,
                    s_address: this.o_lineitem[i].partsupp.ps_suppkey.s_address,
                    s_phone: this.o_lineitem[i].partsupp.ps_suppkey.s_phone,
                    s_comment: this.o_lineitem[i].partsupp.ps_suppkey.s_comment
                } );
            }

        }
    },
    reduce: function(key, values) {
    },
    out: 'query002'
});

In my result i got null value for all entries, what happen?

walves
  • 2,026
  • 6
  • 28
  • 46
  • I would try building up the query progressively (i.e. start with something basic that works, then add complexity in small steps). I notice you are emitting blank keys in your map() with no reduce() function, so this may be part of the problem. Depending on the size of your result set, you may also want to consider using the new [Aggregation Framework](http://docs.mongodb.org/manual/applications/aggregation/) in MongoDB 2.2. – Stennie Oct 10 '12 at 05:37
  • How many documents you need to aggregate? And what version of MongoDB are you using? – Stennie Oct 10 '12 at 13:57
  • Hi Stennie, excuse me i was on vacation. Thank you vary much for you precious tips. Im using mongo version 2.0.6. And my collection has about 6 millions objects. – walves Nov 04 '12 at 13:54

1 Answers1

0

You can debug your MapReduce output by including print() or printjson() statements in the JavaScript functions. The resulting print output will be saved in your MongoDB log.

There are several issues with your MapReduce:

  • the for .. in loop will not work as you expect .. you should instead use array.forEach(..)
  • if you are iterating an array, you will already have a reference to the array item and should not use array[index]
  • you should emit() with a unique key name if you don't want unexpected grouping
  • your reduce() should return a value matching the structure of the emitted data
  • you should ideally use a query parameter to limit the documents that need to be inspected

Given you appear to just be iterating documents without doing any grouping or reduce(), you may find it easier to fetch the documents and perform the same matching in your application code.

In any case, the map() function should actually look more like:

var map = function () {
    var pattern = /BRASS$/;

    this.o_lineitem.forEach(function(item) {
        var partKey = item.partsupp.ps_partkey;
        var suppKey = item.partsupp.ps_suppkey;

        var region = suppKey.s_nationkey.n_regioin.r_name;

        if (partKey.p_size==15 && partKey.p_type.match(pattern) !=null && region == "EUROPE") {
            emit(suppKey.s_name,
                {
                    s_acctbal: suppKey.s_acctbal,
                    s_name:    suppKey.s_name,
                    n_name:    suppKey.s_nationkey.n_name,
                    p_partkey: partKey.p_partkey,
                    p_mfgr:    partKey.p_mfgr,
                    s_address: suppKey.s_address,
                    s_phone:   suppKey.s_phone,
                    s_comment: suppKey.s_comment
                }
            );
        }
    })
}

It would be easier to translate this query into the new Aggregation Framework in MongoDB 2.2, given your data structure and the desired multiple matching and sorting.

There are some current limitations to be aware of (such as the present 16MB maximum on output from the Aggregation pipeline), but you will likely find the queries easier to create and debug.

Here is a commented example using the Aggregation Framework, including initial match criteria for order status, date, and part/supplier items of interest:

db.ordersfull.aggregate(
    // Find matching documents first (can take advantage of index)
    { $match: {
        o_orderstatus: 'O',
        o_orderdate: { $gte: new ISODate('2012-10-01') },
        $and: [
            { o_lineitem: { $elemMatch: { 'partsupp.ps_partkey.p_size': 15 }} },
            { o_lineitem: { $elemMatch: { 'partsupp.ps_partkey.p_type': { $exists : true } }} },        
            { o_lineitem: { $elemMatch: { 'partsupp.ps_suppkey.s_nationkey.n_regioin.r_name': 'EUROPE'}} }
        ]
    }},

    // Filter to fields of interest
    { $project: {
        _id: 0,
        o_lineitem: 1
    }},

    // Convert line item arrays into document stream
    { $unwind: '$o_lineitem' },

    // Match desired line items
    { $match: {
        'o_lineitem.partsupp.ps_partkey.p_size': 15,
        'o_lineitem.partsupp.ps_partkey.p_type': /BRASS$/, 
        'o_lineitem.partsupp.ps_suppkey.s_nationkey.n_regioin.r_name': 'EUROPE'
    }},

    // Final field selection
    { $project: {
        s_acctbal: '$o_lineitem.partsupp.ps_suppkey.s_acctbal',
        s_name:    '$o_lineitem.partsupp.ps_suppkey.s_name',
        n_name:    '$o_lineitem.partsupp.ps_suppkey.s_nationkey.n_name',
        p_partkey: '$o_lineitem.partsupp.ps_partkey.p_partkey',
        p_mfgr:    '$o_lineitem.partsupp.ps_partkey.p_mfgr',
        s_address: '$o_lineitem.partsupp.ps_suppkey.s_address',
        s_phone:   '$o_lineitem.partsupp.ps_suppkey.s_phone',
        s_comment: '$o_lineitem.partsupp.ps_suppkey.s_comment'
    }},

    // Sort the output
    { $sort: {
        s_acctbal: -1,
        n_name: 1,
        s_name: 1,
        p_partkey: 1
    }}
)
Community
  • 1
  • 1
Stennie
  • 63,885
  • 14
  • 149
  • 175