2

I want to merge two JSON files that contain information about movies. These files have some items in common. I want to write a third file which includes all the movies without repeating them.

Here's what I have so far:

const fs = require('fs');
const path = require('path');

const readMovies1 = () => {
  return new Promise((resolve, reject) => {
    fs.readFile(path.join(__dirname,'../models/movies.json'), 'utf8', (err, data1) => {
      if(err) reject(err);
      let data = JSON.parse(data1);
      resolve(data);
    });
  });
};

const readMovies2 = (data1) => {
  return new Promise((resolve, reject) =>{
    fs.readFile(path.join(__dirname,'../models/movies2.json'), 'utf8', (err, data) => {
      if(err) reject(err);
      data = JSON.parse(data);
      resolve([data1,data]);
    });
  });
};

const merging = (data1, data2) => {
   var args = arguments;
   var hash = {};
   var arr = [];
   for (var i = 0; i < args.length; i++) {
      for (var j = 0; j < args[i].length; j++) {
        if (hash[args[i][j]] !== true) {
          arr[arr.length] = args[i][j];
          hash[args[i][j]] = true;
        }
      }
    }
return arr;

};
readMovies1()
  .then(readMovies2)
    .catch((err) => console.error(err))
  .then((data) => console.log(merging(data[0],data[1])))
    .catch((err) => console.error(err));

But the console output gives me this:

  [ undefined,
  '/',
  'U',
  's',
  'e',
  'r',
  'g',
  'o',
  'n',
  'z',
  'P',
  'j',
  'c',
  't',
  'J',
  'S',
  'a',
  'd',
  'm',
  '-',
  'v',
  'i',
  'l',
  'b',
  '.' ]

I found the merging function in a question here on stack overflow but it was merging an array of numbers, and I am merging arrays of objects. I don't know if that has something to do with my problem.

Here's one of my source files (the other follows the same pattern but has some other movies):

[{
  "title": "Spider-Man: Homecoming",
  "usersScore": "92%",
  "criticsScore": "89%"
}, {
  "title": "Girls Trip",
  "usersScore": "89%",
  "criticsScore": "83%"
}, {
  "title": "Captain Underpants: The First Epic Movie (Captain Underpants)",
  "usersScore": "87%",
  "criticsScore": "62%"
}, {
  "title": "Guardians of the Galaxy Vol. 2",
  "usersScore": "82%",
  "criticsScore": "88%"
}, {
  "title": "Wonder Woman",
  "usersScore": "92%",
  "criticsScore": "89%"
}, {
  "title": "First They Killed My Father",
  "usersScore": "88%",
  "criticsScore": "83%"
}, {
  "title": "Baby Driver",
  "usersScore": "93%",
  "criticsScore": "87%"
}, {
  "title": "Demon",
  "usersScore": "91%",
  "criticsScore": "56%"
}, {
  "title": "The Music of Strangers: Yo-Yo Ma and the Silk Road Ensemble",
  "usersScore": "84%",
  "criticsScore": "85%"
}, {
  "title": "Colossal",
  "usersScore": "80%",
  "criticsScore": "59%"
}, {
  "title": "Certain Women",
  "usersScore": "92%",
  "criticsScore": "51%"
}, {
  "title": "Godzilla Resurgence (Shin Godzilla)",
  "usersScore": "84%",
  "criticsScore": "73%"
}, {
  "title": "My Cousin Rachel",
  "usersScore": "76%",
  "criticsScore": "47%"
}, {
  "title": "The Meyerowitz Stories (New and Selected)",
  "usersScore": "93%",
  "criticsScore": "84%"
}, {
  "title": "Raw",
  "usersScore": "90%",
  "criticsScore": "77%"
}, {
  "title": "The Wedding Plan",
  "usersScore": "86%",
  "criticsScore": "65%"
}, {
  "title": "Maudie",
  "usersScore": "88%",
  "criticsScore": "92%"
}, {
  "title": "Heal the Living (Réparer les vivants)",
  "usersScore": "90%",
  "criticsScore": "70%"
}, {
  "title": "Lady Macbeth",
  "usersScore": "89%",
  "criticsScore": "72%"
}, {
  "title": "The Exception (The Kaiser's Last Kiss)",
  "usersScore": "76%",
  "criticsScore": "67%"
}, {
  "title": "Citizen Jane: Battle for the City",
  "usersScore": "94%",
  "criticsScore": "61%"
}, {
  "title": "The Beguiled",
  "usersScore": "78%",
  "criticsScore": "50%"
}, {
  "title": "The Big Sick",
  "usersScore": "98%",
  "criticsScore": "89%"
}, {
  "title": "The Little Hours",
  "usersScore": "77%",
  "criticsScore": "53%"
}, {
  "title": "A Ghost Story",
  "usersScore": "91%",
  "criticsScore": "66%"
}, {
  "title": "The Hero",
  "usersScore": "77%",
  "criticsScore": "64%"
}, {
  "title": "Megan Leavey",
  "usersScore": "84%",
  "criticsScore": "83%"
}, {
  "title": "Band Aid",
  "usersScore": "85%",
  "criticsScore": "73%"
}, {
  "title": "It Comes At Night",
  "usersScore": "89%",
  "criticsScore": "43%"
}, {
  "title": "The Midwife (Sage femme)",
  "usersScore": "86%",
  "criticsScore": "82%"
}, {
  "title": "Brawl in Cell Block 99",
  "usersScore": "93%",
  "criticsScore": "75%"
}, {
  "title": "Gerald's Game",
  "usersScore": "89%",
  "criticsScore": "78%"
}]
medicengonzo
  • 479
  • 1
  • 7
  • 23
  • 1
    What does your source data look like? – theGleep Oct 17 '17 at 14:00
  • 1
    https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/assign – Andrew Lohr Oct 17 '17 at 14:04
  • A much simpler way to think of it is to just concatenate the two arrays and then filter out only the unique (or sometimes called distinct) values. Merging is usually understood as a different problem and can get much more complex than just picking unique properties. Searching for "merging" will yield mostly irrelevant results for your case. – Marko Gresak Oct 17 '17 at 14:05
  • 1
    @BCartolo OP wants to "merge" arrays, not objects. – Marko Gresak Oct 17 '17 at 14:06
  • @AndrewLohr OP wants to "merge" arrays, not objects. Also, `Object.assign` doesn't really merge, it will just overwrite the properties and it works shallowly, i.e. only on the top level. Any deeply nested values will not be merged correctly. If the intention was to merge objects, one would have to implement custom handling for the deeply nested objects or use a library that does this, e.g. `lodash.merge`. – Marko Gresak Oct 17 '17 at 14:08
  • It is useful to think about applying a kind of join operation on the JSON files, similar to this one [https://stackoverflow.com/questions/7251163/sql-style-join-on-json-data](https://stackoverflow.com/questions/7251163/sql-style-join-on-json-data) – Abdulrahman Bres Oct 17 '17 at 14:15
  • Your functions `readMovies1()` and `readMovies2()` are exactly equal. Don't copy and paste code you need two times. Make a single function `readJsonFile(path)` and call it two times. – Tomalak Oct 17 '17 at 14:57

4 Answers4

1

What you actually need is to combine the two arrays then filter them to remove the duplicates:

const merge = (data1, data2) => {
    // keeps track of already existing titles to avoid duplicates
    let existingIndexes = {};

    // check the the arguments to make sure the code does not break
    data1 = data1 instanceof Array ? data1 : [];
    data2 = data2 instanceof Array ? data2 : [];

    // return a concatenated and filtered copy result
    return data1.concat(data2).filter((movie) => {
        if (existingIndexes.hasOwnProperty(movie.title)) {
            existingIndexes[movie.title] = true;
            return true;
        }
        return false;
    });
};
M0nst3R
  • 5,186
  • 1
  • 23
  • 36
  • instead of `existingIndexes = []`, use `existingIndexes = {}` - object key lookups are more efficient than calling `.indexOf()` on an array. – Tomalak Oct 17 '17 at 14:55
  • Yeah I agree but if I use objects, I would need to add not only a key but a value as well, granted it can be a simple boolean, but still, this one looks cleaner. – M0nst3R Oct 17 '17 at 14:56
  • 1
    `.filter(movie => index.hasOwnProperty(movie.title) || index[movie.title] = true);` :) -- Admittedly, that's probably not something I'd use in production code. – Tomalak Oct 17 '17 at 15:01
  • Yeah. And it has a bug, too. It should be `!index.hasOwnProperty(movie.title)`. Saw it too late to correct it, the comment is locked now. I find it too hard to read, too. I would use a variant of this with a proper `if`. – Tomalak Oct 17 '17 at 15:09
  • I agree, but I do not think this is one of the cases where `hasOwnProprerty` trumps `indexOf` since we are dealing with a simple collection of strings. – M0nst3R Oct 17 '17 at 15:11
  • Using `hasOwnProperty()` has sustantially better runtime characteristics than `indexOf()`. Compare: https://jsperf.com/object-hasownproperty-vs-array-indexof/2 - Object keys lookups simply are much more efficient than iterating an array. – Tomalak Oct 17 '17 at 15:24
  • Although the performance is tied to the data size and I do not think it is the case of big data in here, I have to admit it is much faster indeed. I guess I can update the answer with the faster version. – M0nst3R Oct 17 '17 at 15:30
  • 1
    Sure, absolute running time is tied to the data size. With a few hundred array entries it won't matter all that much. But still, `indexOf()` is *O(n)* while `hasOwnProperty()` is effectively *O(1)*. Its something to keep in mind - and the reason why I prefer objects for things like this - it's never wrong. – Tomalak Oct 17 '17 at 16:02
0

Something like that should work.

const getTitles = movies => movies.map(movie => movie.title);
const merge = (data1, data2) => {
        const titles1 = getTitles(data1);
        return data1.concat(data2.filter(movie => !titles1.includes(movie.title)));
    }
Mateusz
  • 222
  • 1
  • 11
0

My favorite way to handle removing duplicates from arrays is to use an Object as a dictionary and then convert back to an array. This isn't the most efficient way, but I like how simple the code ends up. We need some way to track duplicate values by key (in this case your movie title), and Objects easily handle this for us.

const merge = (key, arr1, arr2) => {
    // Should probably check our parameters, but I'm leaving it out
    // Our "dictionary" that will help prevent duplicates by using keys
    let dictionary = {};

    // Values in arr2 will overwrite values in arr1 if there is a duplicate
    arr1.concat(arr2).forEach(item => {
        dictionary[item[key]] = item;
        // To always keep the first value, you could replace the previous line with this:
        // dictionary[item[key]] = dictionary[item[key]] || item;
    });

    return Object.keys(dictionary).map(i => dictionary[i]);
};
Nathan
  • 390
  • 1
  • 2
  • 10
0

You can use array#reduce to get unique movies in both of your array.

var data1 = [{"title":"Spider-Man: Homecoming","usersScore":"92%","criticsScore":"89%"},{"title":"Girls Trip","usersScore":"89%","criticsScore":"83%"},{"title":"Captain Underpants: The First Epic Movie (Captain Underpants)","usersScore":"87%","criticsScore":"62%"},{"title":"Guardians of the Galaxy Vol. 2","usersScore":"82%","criticsScore":"88%"},{"title":"Wonder Woman","usersScore":"92%","criticsScore":"89%"},{"title":"First They Killed My Father","usersScore":"88%","criticsScore":"83%"},{"title":"Baby Driver","usersScore":"93%","criticsScore":"87%"},{"title":"Demon","usersScore":"91%","criticsScore":"56%"},{"title":"The Music of Strangers: Yo-Yo Ma and the Silk Road Ensemble","usersScore":"84%","criticsScore":"85%"},{"title":"Colossal","usersScore":"80%","criticsScore":"59%"},{"title":"Certain Women","usersScore":"92%","criticsScore":"51%"},{"title":"Godzilla Resurgence (Shin Godzilla)","usersScore":"84%","criticsScore":"73%"},{"title":"My Cousin Rachel","usersScore":"76%","criticsScore":"47%"},{"title":"The Meyerowitz Stories (New and Selected)","usersScore":"93%","criticsScore":"84%"},{"title":"Raw","usersScore":"90%","criticsScore":"77%"},{"title":"The Wedding Plan","usersScore":"86%","criticsScore":"65%"},{"title":"Maudie","usersScore":"88%","criticsScore":"92%"},{"title":"Heal the Living (Réparer les vivants)","usersScore":"90%","criticsScore":"70%"},{"title":"Lady Macbeth","usersScore":"89%","criticsScore":"72%"},{"title":"The Exception (The Kaiser's Last Kiss)","usersScore":"76%","criticsScore":"67%"},{"title":"Citizen Jane: Battle for the City","usersScore":"94%","criticsScore":"61%"},{"title":"The Beguiled","usersScore":"78%","criticsScore":"50%"},{"title":"The Big Sick","usersScore":"98%","criticsScore":"89%"},{"title":"The Little Hours","usersScore":"77%","criticsScore":"53%"},{"title":"A Ghost Story","usersScore":"91%","criticsScore":"66%"},{"title":"The Hero","usersScore":"77%","criticsScore":"64%"},{"title":"Megan Leavey","usersScore":"84%","criticsScore":"83%"},{"title":"Band Aid","usersScore":"85%","criticsScore":"73%"},{"title":"It Comes At Night","usersScore":"89%","criticsScore":"43%"},{"title":"The Midwife (Sage femme)","usersScore":"86%","criticsScore":"82%"},{"title":"Brawl in Cell Block 99","usersScore":"93%","criticsScore":"75%"},{"title":"Gerald's Game","usersScore":"89%","criticsScore":"78%"}],
    data2 = [{ "title": "The Midwife (Sage femme)","usersScore": "86%","criticsScore": "82%"}, {"title": "Brawl in Cell Block 99","usersScore": "93%", "criticsScore": "75%"}, {"title": "Gerald's Game","usersScore": "89%","criticsScore": "78%" }];

const merging = (data1, data2) => {
   let unique = data1.concat(data2).reduce((map, movie) => {
      return map[movie.title] = movie, map;
   }, Object.create(null));
   return Object.values(unique);
};
console.log(merging(data1,data2));
.as-console-wrapper { max-height: 100% !important; top: 0; }
Hassan Imam
  • 21,956
  • 5
  • 41
  • 51