I'd go with the three table setup like so
CREATE TABLE languages
(
`id` int not null auto_increment primary key,
`language` varchar(32) unique
);
CREATE TABLE users
(
`id` int not null auto_increment primary key,
`name` varchar(32)
);
CREATE TABLE user_language
(
`user_id` int,
`language_id` int,
primary key (user_id, language_id)
);
IMHO if you're not having millions of users and all possible languages and looking for flexibility and not fighting for miliseconds, especially if you check for more than 2 languages at a time, you can achieve it by using MAX()
or SUM()
aggregates in HAVING
clause.
Here some sample queries for you:
-- Speaks both French AND Spanish
SELECT u.name
FROM user_language ul JOIN languages l
ON ul.language_id = l.id JOIN users u
ON ul.user_id = u.id
GROUP BY u.id
HAVING MAX(l.language = 'french') = 1
AND MAX(l.language = 'spanish') = 1;
Output:
| NAME |
|------|
| John |
| Jack |
-- Speaks both French OR Spanish
SELECT u.name
FROM user_language ul JOIN languages l
ON ul.language_id = l.id JOIN users u
ON ul.user_id = u.id
GROUP BY u.id
HAVING MAX(l.language = 'french') +
MAX(l.language = 'spanish') > 0;
Output:
| NAME |
|------|
| John |
| Jack |
| Jill |
-- Speaks any language French OR Spanish BUT NOT English
SELECT u.name
FROM user_language ul JOIN languages l
ON ul.language_id = l.id JOIN users u
ON ul.user_id = u.id
GROUP BY u.id
HAVING MAX(l.language = 'french') +
MAX(l.language = 'spanish') > 0
AND MAX(l.language = 'english') = 0;
Output:
| NAME |
|------|
| Jack |
-- Speaks any language but English
SELECT u.name
FROM user_language ul JOIN languages l
ON ul.language_id = l.id JOIN users u
ON ul.user_id = u.id
GROUP BY u.id
HAVING MAX(l.language = 'english') = 0;
Output:
| NAME |
|------|
| Jack |
-- What languages does Jack speak
SELECT l.language
FROM user_language ul JOIN languages l
ON ul.language_id = l.id JOIN users u
ON ul.user_id = u.id
WHERE u.name = 'Jack';
Output:
| LANGUAGE |
|----------|
| french |
| spanish |
-- How many languages do users speak
SELECT u.name, COUNT(*) no_of_languages
FROM users u LEFT JOIN user_language ul
ON u.id = ul.user_id
GROUP BY u.id;
Output:
| NAME | NO_OF_LANGUAGES |
|------|-----------------|
| John | 3 |
| Jack | 2 |
| Jill | 2 |
-- How many users do speak a particular language
SELECT l.language, COUNT(*) no_of_users
FROM languages l LEFT JOIN user_language ul
ON l.id = ul.language_id
GROUP BY l.id;
Output:
| LANGUAGE | NO_OF_USERS |
|----------|-------------|
| english | 2 |
| french | 2 |
| spanish | 3 |
Now in a real application you most likely won't be using language or user names but rather deal with ids that are coming from your UI (drop down boxes or whatever). Therefore you'll be able to eliminate one join from the equation and your query would look rather like this
-- Speaks both French AND Spanish with Ids
SELECT u.name
FROM user_language ul JOIN users u
ON ul.user_id = u.id
GROUP BY u.id
HAVING MAX(ul.language_id = 2) = 1
AND MAX(ul.language_id = 3) = 1;
Here is SQLFiddle demo