0

I am attempting to take in a JSON object, iterate through it, remove duplicate and redundant data and output it in order by date (the date-stamp is in the JSON object).

The list of specifics are:

  1. Input data will be in date order
  2. The data from the newest date should be preferred
  3. Duplicate IDs count as duplicates. Duplicate emails count as duplicates. Both must be unique in the dataset. Duplicate values elsewhere do not count as duplicates.
  4. If the dates are identical the data from the record provided last in the list should be preferred

This must be done in pure JavaScript. Here is the code that I have so far:

<html>
<head>
<script>

// Step One load the JSON into a variable
var Data = {"leads":[
{
"_id": "jkj238238jdsnfsj23",
"email": "foo@bar.com",
"firstName":  "John",
"lastName": "Smith",
"address": "123 Street St",
"entryDate": "2014-05-07T17:30:20+00:00"
},
{
"_id": "edu45238jdsnfsj23",
"email": "mae@bar.com",
"firstName":  "Ted",
"lastName": "Masters",
"address": "44 North Hampton St",
"entryDate": "2014-05-07T17:31:20+00:00"
},
{
"_id": "wabaj238238jdsnfsj23",
"email": "bog@bar.com",
"firstName":  "Fran",
"lastName": "Jones",
"address": "8803 Dark St",
"entryDate": "2014-05-07T17:31:20+00:00"
},
{
"_id": "jkj238238jdsnfsj23",
"email": "coo@bar.com",
"firstName":  "Ted",
"lastName": "Jones",
"address": "456 Neat St",
"entryDate": "2014-05-07T17:32:20+00:00"
},
{
"_id": "sel045238jdsnfsj23",
"email": "foo@bar.com",
"firstName":  "John",
"lastName": "Smith",
"address": "123 Street St",
"entryDate": "2014-05-07T17:32:20+00:00"
},
{
"_id": "qest38238jdsnfsj23",
"email": "foo@bar.com",
"firstName":  "John",
"lastName": "Smith",
"address": "123 Street St",
"entryDate": "2014-05-07T17:32:20+00:00"
},
{
"_id": "vug789238jdsnfsj23",
"email": "foo1@bar.com",
"firstName":  "Blake",
"lastName": "Douglas",
"address": "123 Reach St",
"entryDate": "2014-05-07T17:33:20+00:00"
},
{
"_id": "wuj08238jdsnfsj23",
"email": "foo@bar.com",
"firstName":  "Micah",
"lastName": "Valmer",
"address": "123 Street St",
"entryDate": "2014-05-07T17:33:20+00:00"
},
{
"_id": "belr28238jdsnfsj23",
"email": "mae@bar.com",
"firstName":  "Tallulah",
"lastName": "Smith",
"address": "123 Water St",
"entryDate": "2014-05-07T17:33:20+00:00"
},
{
"_id": "jkj238238jdsnfsj23",
"email": "bill@bar.com",
"firstName":  "John",
"lastName": "Smith",
"address": "888 Mayberry St",
"entryDate": "2014-05-07T17:33:20+00:00"
}]
}; 
</script>

</head>

<body>

<script type="text/javascript">

// Debug console.log
console.log(Data.leads);       // shows objects in leads array from the Data     Object
console.log(Data.leads.length);// shows the number length of the leads array


</script>


</body>
</html>

I am able to see the objects through the console.log(), but whenever I try to manipulate them I keep getting errors and undefined when I view the output. I am unclear on the precise mechanisms to do this.

Many thanks in advance, I am at my wits end.

Ok, to expand on this I set up an account on JSFIDDLE link

dwulf
  • 3
  • 4

1 Answers1

1

I have forked your jsfiddle: http://jsfiddle.net/limowankenobi/pak34wrz/

It still needs more work as I am just removing the duplicates without taking into consideration the dates. You should be able to modify it to use the dates as per your requirements.

I have created two small functions:

The first one groups an array (the parameter data) by a property of the elements of the array (the parameter column).

The result will be an object (a map) whose properties (keys) are the unique values of the selected column, and each value will be a list of records with that key.

function groupBy(column, data) {
  var groups = {};

  data.forEach(function (itm) {
    groups[itm[column]] = groups[itm[column]] || [];
    groups[itm[column]].push(itm);
  });

  return groups;
}

The second function is to flatten a map of lists. It takes an object an iterates over each of the properties of the object (keys) and choses one element. In this case I am choosing the first element but I believe you should modify this to choose the element based on the dates.

function uniquify(groups) {
  var unique = [];
  for (var key in groups) {
    if (groups.hasOwnProperty(key)) {
      unique.push(groups[key][0]);
    }
  }

  return unique;
}

In this way the function that does what you need will look something like:

function arrUnique(arr) {
  var groupsById = groupBy("_id", arr);
  var uniqueIds = uniquify(groupsById);
  var groupsByEmail = groupBy("email", uniqueIds);

  return uniquify(groupsByEmail);
}

For example, the result of groupBy("_id", arr) is

{
"jkj238238jdsnfsj23": [
    {
        "_id": "jkj238238jdsnfsj23",
        "email": "foo@bar.com",
        "firstName": "John",
        "lastName": "Smith",
        "address": "123 Street St",
        "entryDate": "2014-05-07T17:30:20+00:00"
    },
    {
        "_id": "jkj238238jdsnfsj23",
        "email": "coo@bar.com",
        "firstName": "Ted",
        "lastName": "Jones",
        "address": "456 Neat St",
        "entryDate": "2014-05-07T17:32:20+00:00"
    },
    {
        "_id": "jkj238238jdsnfsj23",
        "email": "bill@bar.com",
        "firstName": "John",
        "lastName": "Smith",
        "address": "888 Mayberry St",
        "entryDate": "2014-05-07T17:33:20+00:00"
    }
],
"edu45238jdsnfsj23": [
    {
        "_id": "edu45238jdsnfsj23",
        "email": "mae@bar.com",
        "firstName": "Ted",
        "lastName": "Masters",
        "address": "44 North Hampton St",
        "entryDate": "2014-05-07T17:31:20+00:00"
    }
],
"wabaj238238jdsnfsj23": [
    {
        "_id": "wabaj238238jdsnfsj23",
        "email": "bog@bar.com",
        "firstName": "Fran",
        "lastName": "Jones",
        "address": "8803 Dark St",
        "entryDate": "2014-05-07T17:31:20+00:00"
    }
],
"sel045238jdsnfsj23": [
    {
        "_id": "sel045238jdsnfsj23",
        "email": "foo@bar.com",
        "firstName": "John",
        "lastName": "Smith",
        "address": "123 Street St",
        "entryDate": "2014-05-07T17:32:20+00:00"
    }
],
"qest38238jdsnfsj23": [
    {
        "_id": "qest38238jdsnfsj23",
        "email": "foo@bar.com",
        "firstName": "John",
        "lastName": "Smith",
        "address": "123 Street St",
        "entryDate": "2014-05-07T17:32:20+00:00"
    }
],
"vug789238jdsnfsj23": [
    {
        "_id": "vug789238jdsnfsj23",
        "email": "foo1@bar.com",
        "firstName": "Blake",
        "lastName": "Douglas",
        "address": "123 Reach St",
        "entryDate": "2014-05-07T17:33:20+00:00"
    }
],
"wuj08238jdsnfsj23": [
    {
        "_id": "wuj08238jdsnfsj23",
        "email": "foo@bar.com",
        "firstName": "Micah",
        "lastName": "Valmer",
        "address": "123 Street St",
        "entryDate": "2014-05-07T17:33:20+00:00"
    }
],
"belr28238jdsnfsj23": [
    {
        "_id": "belr28238jdsnfsj23",
        "email": "mae@bar.com",
        "firstName": "Tallulah",
        "lastName": "Smith",
        "address": "123 Water St",
        "entryDate": "2014-05-07T17:33:20+00:00"
    }
]
}

Then calling uniqify on the result of the grouping by _id we get:

[
{
    "_id": "jkj238238jdsnfsj23",
    "email": "foo@bar.com",
    "firstName": "John",
    "lastName": "Smith",
    "address": "123 Street St",
    "entryDate": "2014-05-07T17:30:20+00:00"
},
{
    "_id": "edu45238jdsnfsj23",
    "email": "mae@bar.com",
    "firstName": "Ted",
    "lastName": "Masters",
    "address": "44 North Hampton St",
    "entryDate": "2014-05-07T17:31:20+00:00"
},
{
    "_id": "wabaj238238jdsnfsj23",
    "email": "bog@bar.com",
    "firstName": "Fran",
    "lastName": "Jones",
    "address": "8803 Dark St",
    "entryDate": "2014-05-07T17:31:20+00:00"
},
{
    "_id": "sel045238jdsnfsj23",
    "email": "foo@bar.com",
    "firstName": "John",
    "lastName": "Smith",
    "address": "123 Street St",
    "entryDate": "2014-05-07T17:32:20+00:00"
},
{
    "_id": "qest38238jdsnfsj23",
    "email": "foo@bar.com",
    "firstName": "John",
    "lastName": "Smith",
    "address": "123 Street St",
    "entryDate": "2014-05-07T17:32:20+00:00"
},
{
    "_id": "vug789238jdsnfsj23",
    "email": "foo1@bar.com",
    "firstName": "Blake",
    "lastName": "Douglas",
    "address": "123 Reach St",
    "entryDate": "2014-05-07T17:33:20+00:00"
},
{
    "_id": "wuj08238jdsnfsj23",
    "email": "foo@bar.com",
    "firstName": "Micah",
    "lastName": "Valmer",
    "address": "123 Street St",
    "entryDate": "2014-05-07T17:33:20+00:00"
},
{
    "_id": "belr28238jdsnfsj23",
    "email": "mae@bar.com",
    "firstName": "Tallulah",
    "lastName": "Smith",
    "address": "123 Water St",
    "entryDate": "2014-05-07T17:33:20+00:00"
}
]

If we group this result by email now, we get:

{
"foo@bar.com": [
    {
        "_id": "jkj238238jdsnfsj23",
        "email": "foo@bar.com",
        "firstName": "John",
        "lastName": "Smith",
        "address": "123 Street St",
        "entryDate": "2014-05-07T17:30:20+00:00"
    },
    {
        "_id": "sel045238jdsnfsj23",
        "email": "foo@bar.com",
        "firstName": "John",
        "lastName": "Smith",
        "address": "123 Street St",
        "entryDate": "2014-05-07T17:32:20+00:00"
    },
    {
        "_id": "qest38238jdsnfsj23",
        "email": "foo@bar.com",
        "firstName": "John",
        "lastName": "Smith",
        "address": "123 Street St",
        "entryDate": "2014-05-07T17:32:20+00:00"
    },
    {
        "_id": "wuj08238jdsnfsj23",
        "email": "foo@bar.com",
        "firstName": "Micah",
        "lastName": "Valmer",
        "address": "123 Street St",
        "entryDate": "2014-05-07T17:33:20+00:00"
    }
],
"mae@bar.com": [
    {
        "_id": "edu45238jdsnfsj23",
        "email": "mae@bar.com",
        "firstName": "Ted",
        "lastName": "Masters",
        "address": "44 North Hampton St",
        "entryDate": "2014-05-07T17:31:20+00:00"
    },
    {
        "_id": "belr28238jdsnfsj23",
        "email": "mae@bar.com",
        "firstName": "Tallulah",
        "lastName": "Smith",
        "address": "123 Water St",
        "entryDate": "2014-05-07T17:33:20+00:00"
    }
],
"bog@bar.com": [
    {
        "_id": "wabaj238238jdsnfsj23",
        "email": "bog@bar.com",
        "firstName": "Fran",
        "lastName": "Jones",
        "address": "8803 Dark St",
        "entryDate": "2014-05-07T17:31:20+00:00"
    }
],
"foo1@bar.com": [
    {
        "_id": "vug789238jdsnfsj23",
        "email": "foo1@bar.com",
        "firstName": "Blake",
        "lastName": "Douglas",
        "address": "123 Reach St",
        "entryDate": "2014-05-07T17:33:20+00:00"
    }
]
}

And finally applying the uniqify on this results in:

[
{
    "_id": "jkj238238jdsnfsj23",
    "email": "foo@bar.com",
    "firstName": "John",
    "lastName": "Smith",
    "address": "123 Street St",
    "entryDate": "2014-05-07T17:30:20+00:00"
},
{
    "_id": "edu45238jdsnfsj23",
    "email": "mae@bar.com",
    "firstName": "Ted",
    "lastName": "Masters",
    "address": "44 North Hampton St",
    "entryDate": "2014-05-07T17:31:20+00:00"
},
{
    "_id": "wabaj238238jdsnfsj23",
    "email": "bog@bar.com",
    "firstName": "Fran",
    "lastName": "Jones",
    "address": "8803 Dark St",
    "entryDate": "2014-05-07T17:31:20+00:00"
},
{
    "_id": "vug789238jdsnfsj23",
    "email": "foo1@bar.com",
    "firstName": "Blake",
    "lastName": "Douglas",
    "address": "123 Reach St",
    "entryDate": "2014-05-07T17:33:20+00:00"
}
]
Edgar Hernandez
  • 4,020
  • 1
  • 24
  • 27