As others and I have stated in the comments: your approach is bruteforce, which in this case is quadratic in the input size. There are several optimisation possibilities. For binary values (i.e. booleans) splitting the array into categories is trivial. Number values like age may be clustered, e.g. into ranges. And you should definitively adopt early abort as mentioned by mm759. TLDR: there's a table and conclusion at the bottom.
Consider the bruteforce approach (for reference):
// The result is a list of matches [[candidate.id, match.id (or -1)]]
function bruteforce(arr) {
var matches = [];
for(var i = 0; i < arr.length; ++i) {
var candidate = arr[i], num = 12;
var isCandidate = !candidate.isSingle && candidate.isAvailable && candidate.sex == 1;
var cSex = candidate.sex;
var cSexPref = candidate.sexPref;
var cAgeMin = candidate.age - num;
var cAgeMax = candidate.age + num;
var result = !isCandidate ? -1 : arr.reduce(function(p,c,k,a){
return k != i &&
c.sex == cSexPref &&
c.sexPref == cSex &&
!c.isSingle && c.isAvailable &&
c.age <= cAgeMax &&
c.age >= cAgeMin ? k : p;
}, -1);
if(isCandidate)
matches.push([i, result]);
}
return matches;
}
The category approach might look like this:
function useTheCategory(arr) {
// preprocessing the data
var wmNonsingleAvailables = [];
var wwNonsingleAvailables = [];
var mwNonsingleAvailables = [];
var mmNonsingleAvailables = [];
// split the data into categories
arr.forEach(function(c) {
if(!c.isSingle && c.isAvailable) {
if(c.sex == 0) {
if(c.sexPref == 1)
wmNonsingleAvailables.push(c);
else
wwNonsingleAvailables.push(c);
} else {
if(c.sexPref == 0)
mwNonsingleAvailables.push(c);
else
mmNonsingleAvailables.push(c);
}
}
});
var matches = [];
for(var i = 0; i < arr.length; ++i) {
var candidate = arr[i], num = 12;
var isCandidate = !candidate.isSingle && candidate.isAvailable && candidate.sex == 1;
var cSex = candidate.sex;
var cSexPref = candidate.sexPref;
var cAgeMin = candidate.age - num;
var cAgeMax = candidate.age + num;
if(isCandidate) {
var category = null;
// find the relevant category (in this case)
// a more complex approach/split might include multiple categories here
if(cSex == 0) {
if(cSexPref == 1)
category = mwNonsingleAvailables;
else if(cSexPref == 0)
category = wwNonsingleAvailables;
} else if(cSex == 1) {
if(cSexPref == 0)
category = wmNonsingleAvailables;
else if(cSexPref == 1)
category = mmNonsingleAvailables;
}
var result = -1;
if(category == null) {
// always handle the error case...
console.log("logic error: missing category!");
console.log("candidate: " + JSON.stringify(candidate));
} else {
// the tests for matching sex/single/availability are left-overs and not necessarily required,
// they are left in here to show that the reduce is not the culprit of your slowdown
var match = category.reduce(function(p,c,k,a){
return c.id != i &&
c.sex == cSexPref &&
c.sexPref == cSex &&
!c.isSingle && c.isAvailable &&
c.age <= cAgeMax &&
c.age >= cAgeMin ? k : p;
}, -1);
// translate to arr index
if(match != -1)
result = category[match].id;
}
matches.push([i, result]);
}
}
return matches;
}
And the age range bucket approach might look like this:
function useAgeRange(arr) {
// preprocessing the data
var ranges = [1, 2, 3, 4, 5]; // find appropriate ranges to spread the entries evenly (analyse your data, more below...)
var ageBuckets = [];
// find the range of age values
var ageRange = arr.length == 0 ? [0, 0] : arr.reduce(function(p,c) {
var min = c.age < p[0] ? c.age : p[0];
var max = c.age > p[1] ? c.age : p[1];
return [min, max];
}, [arr[0].age, arr[0].age]);
// build the buckets (floor for nicer values)
for(var age = Math.floor(ageRange[0]), maxAge = ageRange[1], step = 0; age <= maxAge; age += step) {
// update step size
if(step == 0)
step = ranges[0];
else
step = ranges[Math.min(ranges.length - 1, ranges.indexOf(step) + 1)];
ageBuckets.push({
nextAge: age + step,
bucket: [],
});
}
function findBucketIndex(age) {
// min i with age < ageBuckets[i].nextAge
for(var i = 0, maxi = ageBuckets.length - 1; i < maxi; ++i)
if(age < ageBuckets[i + 1].nextAge)
return i;
return -1;
}
arr.forEach(function(c) {
ageBuckets[findBucketIndex(c.age)].bucket.push(c);
});
var matches = [];
for(var i = 0; i < arr.length; ++i) {
var candidate = arr[i], num = 12;
var isCandidate = !candidate.isSingle && candidate.isAvailable && candidate.sex == 1;
var cSex = candidate.sex;
var cSexPref = candidate.sexPref;
var cAgeMin = candidate.age - num;
var cAgeMax = candidate.age + num;
if(isCandidate) {
// Find range intersection with ageBuckets
var startBucket = findBucketIndex(cAgeMin);
var endBucket = findBucketIndex(cAgeMax);
if(startBucket < 0) startBucket = 0;
if(endBucket < 0) endBucket = ageBuckets.length - 1;
var result = -1;
// now only search those candidate buckets
for(var b = startBucket; b <= endBucket; ++b) {
var bucket = ageBuckets[b].bucket;
var match = bucket.reduce(function(p,c,k,a){
return c.id != i &&
c.sex == cSexPref &&
c.sexPref == cSex &&
!c.isSingle && c.isAvailable &&
c.age <= cAgeMax &&
c.age >= cAgeMin ? k : p;
}, -1);
// translate to arr index
if(match >= 0)
result = bucket[match].id;
}
matches.push([i, result]);
}
}
return matches;
}
I created a benchmark showing the improvements of the two approaches on jsfiddle. Both are effective by themselves (even including the preprocessing, values will differ across systems and browsers):
N Search space Brute force Categories Range buckets
(#matches) (relative timing values)
20000 2500 200 34 140
40000 5000 1400 180 556
80000 10000 5335 659 2582
160000 20000 17000 2450 16900
Analysing your data to find which approach is appropriate is everything: my benchmark generates an exponential distribution (ages 18-20 are 28% of the data points, 21-32 another 27%, 33-52 another 27% with 53-77 leaving about 18%). The range-approach doesn't deal well with this distribution as we can see in the timings above (that's for a fixed num = 12
years and 14 buckets) because for most of the queries an age range of 24 spans 55% of the data.