1

I have a JSON list with duplicates I need to remove, but I can't find a way to do it.

This is the solution that I have.

I want to keep the first item found with a given ID, and remove the next ones with the same ID.

The problem is, it tries to remove even the first item.

var gindex = [];

function removeDuplicate(list) {

    $.each(list, function(i, val){
        console.log(val.id);
        console.log(gindex);
        if($.inArray(val.id, gindex) == -1) { //in array, so leave this item
            gindex.push(val.id);
        }
        else // found already one with the id, delete it
        {
            list.splice(i, 1);
        }

        if(val.children) {
            val.children = removeDuplicate(val.children);
        }

    });

    return list;
}

gindex = [];
list = removeDuplicate(parsed_list);
console.log(window.JSON.stringify(list));

finally, this is the original list :

[
  {
    "id": 0,
    "children": [
      {
        "id": 1,
        "children": [
          {
            "id": 2, // with my algorithm, this one get also flagged for deletion
          }
        ]
      },
      {
        "id": 2, // remove this one
      },
      {
        "id": 3,
      },
      {
        "id": 4, // with my algorithm, this one get also flagged for deletion
        "children": [
          { 
            "id": 5, // with my algorithm, this one get also flagged for deletion
            "children": [
              {
                "id": 6, // with my algorithm, this one get also flagged for deletion
              }
            ]
          }
        ]
      },
      {
        "id": 5, // remove this one
        "children": [
          {
            "id": 6, // remove this one
          }
        ]
      },
      {
        "id": 6, // remove this one
      },
      {
        "id": 7,
      }
    ]
  }
]

and this is the result I would like to obtain

[
  {
    "id": 0,
    "children": [
      {
        "id": 1,
        "children": [
          {
            "id": 2,
          }
        ]
      },
      {
        "id": 3,
      },
      {
        "id": 4,
        "children": [
          {
            "id": 5,
            "children": [
              {
                "id": 6,
              }
            ]
          }
        ]
      },
      {
        "id": 7,
      }
    ]
  }
]

thank you for your reply.

user3597823
  • 55
  • 1
  • 6
  • 2
    And what's the problem with your current solution? – Felix Kling Jul 09 '14 at 14:50
  • Uncaught TypeError: Cannot read property 'id' of undefined. – user3597823 Jul 09 '14 at 14:58
  • the problem must be in your algorithm, since the duplicate 2, 4 and 5 are being removed... if you want to remove the first 4 and 5 you must be doing something wrong elsewhere – SaintLike Jul 09 '14 at 14:58
  • I want to keep the first item found with a given ID, and remove the next ones with the same ID – user3597823 Jul 09 '14 at 14:59
  • I assume $.each doesn't like it when you mutate the array while iterating over it. Consider creating a new array instead. – Felix Kling Jul 09 '14 at 15:03
  • I think that when I try to remove an item with ID X, it removes all the objects with ID X and not just the current one. I tried to put an attribute instead of removing the item. The attribute is set even in the previous first item. – user3597823 Jul 09 '14 at 15:22
  • It looks like you want to perform a BFS instead of a DFS. Mutating the array while iterating over it is still a problem though. – Felix Kling Jul 09 '14 at 16:10
  • FYI, when I run your code, the first element with a specific ID (read from top to bottom) is **not** marked for deletion: http://jsfiddle.net/PETS2/ . If it is for you, then your are using a different code than posted here or your data structure contains multiple references to the same element. The latter is not a problem though. The issues are still: 1) You need to do BFS instead of DFS and 2) don't mutate the array while iterating over it. – Felix Kling Jul 09 '14 at 16:13

1 Answers1

0

I tried creating my own logic for this (probably more general than what you want), but it may help you debug your code. See the jsFiddle.

The core of the logic is

/**
 * Walk through an object or array and remove duplicate elements where the 'id' key is duplicated
 * Depends on a seenIds object (using it as a set)
 */
function processData(el) {
    // If the element is an array...
    if ($.isArray(el)) {
        for (var i = 0; i < el.length; i++) {
            var value = el[i];
            processData(value);

            // If the child is now empty, remove it from the array
            if (checkForEmpty(value)) {
                el.splice(i, 1);
                i--; // Fix index after splicing (http://stackoverflow.com/a/9882349/1370556)
            }
        }
    }
    // If the element is an object...
    else if ($.isPlainObject(el)) {
        for (var key in el) {
            // Make sure the key is not part of the prototype chain
            if (el.hasOwnProperty(key)) {
                var value = el[key];

                if (key == 'id') {
                    // If the key has been seen, remove it
                    if (seenIds[value]) {
                        delete el[key];
                        continue; // Skip further processing
                    } else seenIds[value] = true;
                }

                processData(value);

                // If the child is now empty, remove it from the object
                if (checkForEmpty(value)) delete el[key];
            }
        }
    }
}
lebolo
  • 2,120
  • 4
  • 29
  • 44