According to this SO answer, we have:
SELECT DISTINCT s.id
FROM students s
LEFT JOIN grades g ON g.student_id = s.id
WHERE g.student_id IS NOT NULL
Which uses a LEFT JOIN
. But it can be implemented with a "semijoin" like this:
SELECT s.id
FROM students s
WHERE EXISTS (SELECT 1 FROM grades g
WHERE g.student_id = s.id)
How do you write these two queries in JavaScript, given two collections students
and grades
? The first one I think is implemented with a "nested loop join" (or could be optimized with an index join data structure, but I don't know how to implement that just yet).
const query1Results = nestedJoin(students, grades, (s, g) => {
return Boolean(g.student_id && g.student_id === s.id)
}).map(([s]) => s.id)
function nestedJoin(R, S, compare) {
const out = []
for (const r of R) {
for (const s of S) {
if (compare(r, s)) {
out.push([ r, s ])
}
}
}
return out
}
How would you do the second one though?
const query2Results = semiJoin(students, grades, (s, g) => {
return g.student_id === s.id
}).map(([s]) => s.id)
function semiJoin(R, S) {
const out = []
for (const r of R) {
for (const s of S) {
if (compare(r, s)) {
out.push([ r, s ])
}
}
}
return out
}
Basically, I implemented it the same way.
const students = [
{ id: 1 },
{ id: 2 },
{ id: 3 },
{ id: 4 },
{ id: 5 },
{ id: 6 },
]
const grades = [
{ id: 1, student_id: 1 },
{ id: 2, student_id: 1 },
{ id: 3, student_id: 1 },
{ id: 4, student_id: 1 },
{ id: 5, student_id: 1 },
{ id: 6, student_id: 1 },
{ id: 10, student_id: 2 },
{ id: 20, student_id: 2 },
{ id: 30, student_id: 2 },
{ id: 40, student_id: 2 },
{ id: 50, student_id: 2 },
{ id: 60, student_id: 2 },
{ id: 100, student_id: 3 },
{ id: 200, student_id: 3 },
{ id: 300, student_id: 3 },
{ id: 400, student_id: 3 },
{ id: 500, student_id: 3 },
{ id: 600, student_id: 3 },
]
const expectedOutput = [
1, 2, 3
]