I'm not sure how efficient my solution is, especially if/when the dataset size increases but I think it's pretty close to what you're looking for.
var data = ['apple iphone 65gb', 'apple ipad mini 32gb', 'apple ipad mini 64gb', 'apple ipad air 64gb', 'apple ipad air 32gb', 'panasonic gh4', 'samsung s2 galaxy', 'samsung s2 galaxy red', 'samsung s3 galaxy']
let i = 0
let obj = {}
function checkArrays(arrA, arrB) {
let index
for (let i = 0; i < arrA.length; i++) {
if (arrA[i] !== arrB[i]) return index = i
}
return index;
}
const refineArr = (data) => {
arr = []
for (let i = 0; i < data.length; i++) {
let one = data[i].split("")
let two = data[i + 1] ? data[i + 1].split("") : data[i + 1]
if (two) {
let x = checkArrays(one, two)
var index1, index2
one.forEach((y, e) => {
if (y === " " && e >= x) {
return index1 = e
}
})
if (!arr.includes(one.slice(0, index1).join("").trim()) &&
!arr.includes(one.slice(0, index2).join("").trim())) {
arr.push(one.slice(0, index1).join("").trim())
}
two.forEach((y, i) => {
if (y === " " && i >= x) {
return index2 = i
}
})
if (!arr.includes(two.slice(0, index2).join("").trim()) &&
!arr.includes(two.slice(0, index1).join("").trim())) {
arr.push(two.slice(0, index2).join("").trim())
}
}
}
var newArr = [...new Set(arr)]
generateObject(newArr)
}
const generateObject = (arr) => {
for (let i = 0; i < arr.length; i++) {
if (!obj[arr[i]]) {
obj[arr[i]] = data.filter((x) => {
return x.includes(arr[i])
})
}
}
console.log(obj)
}
refineArr(data)
.as-console-wrapper { top: 0; max-height: 100% !important; }
First, I tried to refine the original array to just include the keys that would be included. As I looped through data
I split the string at each index (and the next index so long as the item at i+1
was defined). Then I passed those two arrays to checkArrays
where I compare each character and then return the index at which they stop being the same.
Example: apple ipad mini ...
as an array is
["a", "p", "p", "l", "e", " ", "i", "p", "a", "d", " ", "m", "i", "n", "i", ...]
and apple ipad air...
as an array is
["a", "p", "p", "l", "e", " ", "i", "p", "a", "d", " ", "a", "i", "r",...]
And the index in which they stop being similar is 11
.
Then I need to find the index (for both) at which they are different, plus the next space because I want to ensure I slice the array at a whole word. So I look for the element that is a space AND has an index greater than the difference index.
I do this for both arrays as the indices will be different.
For ["a", "p", "p", "l", "e", " ", "i", "p", "a", "d", " ", "m", "i", "n", "i", ...]
it is 15
.
For ["a", "p", "p", "l", "e", " ", "i", "p", "a", "d", " ", "a", "i", "r", ...]
it is 14
.
There was a situation where I'd end up with like apple ipad m
being pushed into the arr
after apple ipad mini
but that was because I need to test both indices for each array (since at the first loop apple ipad mini ...
was the second word but in the second loop it was the first word). I compensated for this with these lines:
if (!arr.includes(one.slice(0, index1).join("").trim())
&& !arr.includes(one.slice(0, index2).join("").trim())){
arr.push(one.slice(0, index1).join("").trim())
}
and
if (!arr.includes(two.slice(0, index2).join("").trim())
&& !arr.includes(two.slice(0, index1).join("").trim())){
arr.push(two.slice(0, index2).join("").trim())
}
After we're done with that, I returned a new array using var newArr = [... new Set(arr)]
to ensure that any repeated values were omitted. At this point you'd end up with an array like ["apple iphone", "apple ipad mini", "apple ipad air", "panasonic", "samsung s2", "samsung s3"]
. These will be the keys in our object.
Finally, generateObject
loops through the new array and essentially assigns the keys values of a filtered collection of items that include
the key. So for the key apple ipad mini
you would get a filtered collection of ["apple ipad mini 32gb", "apple ipad mini 64gb"]
Again, I think this solution needs refinement for efficiency's sake but I think it could help get you started at least logic wise.