is there any function in mysql similar to php's iconv to convert accented utf8 characters to ascii string?
I want SQL query which looks like this:
SELECT some_func("LĄBĄS VĄKĄRŪŠ")
and returns labas vakarus
is there any function in mysql similar to php's iconv to convert accented utf8 characters to ascii string?
I want SQL query which looks like this:
SELECT some_func("LĄBĄS VĄKĄRŪŠ")
and returns labas vakarus
I found this MySQL transliteration function which may be what you're looking for.
You can read more about it at the associated blog post from the author.
My advice would be not to trust both MySQL and PHP iconv()
transliterations to produce the same results as they are both guessworks. It would be best if you simply stick to one, either MySQL or PHP's side only.
Function for creating search engine friendly URL addresses in MySQL.
DELIMITER ;;
CREATE FUNCTION `cool_url` (`original` varchar(512) CHARSET utf8mb4 COLLATE utf8mb4_bin) RETURNS varchar(512) CHARSET utf8mb4
BEGIN
DECLARE translit VARCHAR(512) DEFAULT '';
DECLARE len INT(3) DEFAULT 0;
DECLARE pos INT(3) DEFAULT 1;
DECLARE letter VARCHAR(2);
SET original = TRIM(LOWER(original));
SET len = CHAR_LENGTH(original);
WHILE (pos <= len) DO
# get one letter from original
SET letter = SUBSTRING(original, pos, 1) COLLATE utf8mb4_bin;
CASE TRUE
# basic chars
WHEN letter IN ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','0','1','2','3','4','5','6','7','8','9' COLLATE utf8mb4_bin) THEN SET letter = letter;
# chars with diacritics and azbuka
WHEN letter IN('á','à','â','ä','å','ā','ą','ă' COLLATE utf8mb4_bin) THEN SET letter = 'a';
WHEN letter IN('б' COLLATE utf8mb4_bin) THEN SET letter = 'b';
WHEN letter IN('č','ć','ç','ć' COLLATE utf8mb4_bin) THEN SET letter = 'c';
WHEN letter IN('ď','đ','д' COLLATE utf8mb4_bin) THEN SET letter = 'd';
WHEN letter IN('é','ě','ë','ё','è','ê','ē','ę','ь','э','œ' COLLATE utf8mb4_bin) THEN SET letter = 'e';
WHEN letter IN('ф','ѳ' COLLATE utf8mb4_bin) THEN SET letter = 'f';
WHEN letter IN('ģ','ğ','г' COLLATE utf8mb4_bin) THEN SET letter = 'g';
WHEN letter IN('í','î','ï','ī','î','і','и','й' COLLATE utf8mb4_bin) THEN SET letter = 'i';
WHEN letter IN('ķ','к' COLLATE utf8mb4_bin) THEN SET letter = 'k';
WHEN letter IN('ľ','ĺ','ļ','ł','л' COLLATE utf8mb4_bin) THEN SET letter = 'l';
WHEN letter IN('м' COLLATE utf8mb4_bin) THEN SET letter = 'm';
WHEN letter IN('ň','ņ','ń','ñ','н' COLLATE utf8mb4_bin) THEN SET letter = 'n';
WHEN letter IN('ó','ö','ø','õ','ô','ő','ơ','о' COLLATE utf8mb4_bin) THEN SET letter = 'o';
WHEN letter IN('п' COLLATE utf8mb4_bin) THEN SET letter = 'p';
WHEN letter IN('ŕ','ř','р' COLLATE utf8mb4_bin) THEN SET letter = 'r';
WHEN letter IN('š','ś','ș','ş','с','ß' COLLATE utf8mb4_bin) THEN SET letter = 's';
WHEN letter IN('ť','ț','т' COLLATE utf8mb4_bin) THEN SET letter = 't';
WHEN letter IN('ú','ů','ü','ù','û','ū','ű','ư','у' COLLATE utf8mb4_bin) THEN SET letter = 'u';
WHEN letter IN('в' COLLATE utf8mb4_bin) THEN SET letter = 'v';
WHEN letter IN('ý','ы','ѵ' COLLATE utf8mb4_bin) THEN SET letter = 'y';
WHEN letter IN('ž','ź','ż','з' COLLATE utf8mb4_bin) THEN SET letter = 'z';
WHEN letter = 'х' COLLATE utf8mb4_bin THEN SET letter = 'kh'; -- is not X
WHEN letter = 'ю' COLLATE utf8mb4_bin THEN SET letter = 'ju';
WHEN letter = 'я' COLLATE utf8mb4_bin THEN SET letter = 'ja';
WHEN letter = 'ж' COLLATE utf8mb4_bin THEN SET letter = 'zh';
WHEN letter = 'ч' COLLATE utf8mb4_bin THEN SET letter = 'ch';
WHEN letter = 'ш' COLLATE utf8mb4_bin THEN SET letter = 'sh';
WHEN letter = 'щ' COLLATE utf8mb4_bin THEN SET letter = 'shch';
WHEN letter = 'ѣ' COLLATE utf8mb4_bin THEN SET letter = 'ie';
WHEN letter = 'ц' COLLATE utf8mb4_bin THEN SET letter = 'ts';
WHEN letter = 'ъ' COLLATE utf8mb4_bin THEN SET letter = ''; -- hard sign
WHEN letter = 'ь' COLLATE utf8mb4_bin THEN SET letter = ''; -- soft sign
ELSE SET letter = '-';
END CASE;
# joining new string
SET translit = CONCAT(translit, letter);
SET pos = pos + 1;
END WHILE;
# replacing more dashes by one
WHILE (translit REGEXP '\-{2,}') DO
SET translit = REPLACE(translit, '--', '-');
END WHILE;
RETURN TRIM(BOTH '-' FROM translit);
END;;
DELIMITER ;
Example (test):
mysql> SELECT cool_url('BŒautiful day áàâäåāąă б čćçć ďđд éěëёèêēęьэœ фѳ ģğг íîïīîіий ķк ľĺļłл м ňņńñн óöøõôőơо п ŕřр šśșşсß ťțт úůüùûūűưу в ýыѵ žźżз х ю я ж ч ш щ ѣ ц ъ ь');
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| beautiful-day-aaaaaaaa-b-cccc-ddd-eeeeeeeeeee-ff-ggg-iiiiiiii-kk-lllll-m-nnnnn-oooooooo-p-rrr-ssssss-ttt-uuuuuuuuu-v-yyy-zzzz-kh-ju-ja-zh-ch-sh-sh-ie-ts-e |
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.01 sec)
I needed a dirty but quick solution to create several thousand of slugs for post tags in wp. So this is the code taken from the answers above that I modified for vowels and numbers. Hope it'll help someone someday as it help me today. :)
DELIMITER $$
DROP FUNCTION IF EXISTS `transliterate` $$
CREATE FUNCTION `transliterate` (original VARCHAR(512)) RETURNS VARCHAR(512)
BEGIN
DECLARE translit VARCHAR(512) DEFAULT '';
DECLARE len INT(3) DEFAULT 0;
DECLARE pos INT(3) DEFAULT 1;
DECLARE letter CHAR(2);
SET original = TRIM(LOWER(original));
SET len = CHAR_LENGTH(original);
WHILE (pos <= len) DO
SET letter = SUBSTRING(original, pos, 1);
CASE TRUE
WHEN letter IN('á','à','â','ä','å','ā','ą','ă') THEN SET letter = 'a';
WHEN letter IN('č','ć','ç','ć') THEN SET letter = 'c';
WHEN letter IN('ď','đ') THEN SET letter = 'd';
WHEN letter IN('é','ě','ë','è','ê','ē','ę') THEN SET letter = 'e';
WHEN letter IN('ģ','ğ') THEN SET letter = 'g';
WHEN letter IN('í','î','ï','ī','î') THEN SET letter = 'i';
WHEN letter IN('ķ') THEN SET letter = 'k';
WHEN letter IN('ľ','ĺ','ļ','ł') THEN SET letter = 'l';
WHEN letter IN('ň','ņ','ń','ñ') THEN SET letter = 'n';
WHEN letter IN('ó','ö','ø','õ','ô','ő','ơ') THEN SET letter = 'o';
WHEN letter IN('ŕ','ř') THEN SET letter = 'r';
WHEN letter IN('š','ś','ș','ş') THEN SET letter = 's';
WHEN letter IN('ť','ț') THEN SET letter = 't';
WHEN letter IN('ú','ů','ü','ù','û','ū','ű','ư') THEN SET letter = 'u';
WHEN letter IN('ý') THEN SET letter = 'y';
WHEN letter IN('ž','ź','ż') THEN SET letter = 'z';
WHEN letter = 'а' THEN SET letter = 'a';
WHEN letter = 'б' THEN SET letter = 'b';
WHEN letter = 'в' THEN SET letter = 'v';
WHEN letter = 'г' THEN SET letter = 'g';
WHEN letter = 'д' THEN SET letter = 'd';
WHEN letter = 'е' THEN SET letter = 'e';
WHEN letter = 'ё' THEN SET letter = 'e';
WHEN letter = 'ж' THEN SET letter = 'z';
WHEN letter = 'з' THEN SET letter = 'z';
WHEN letter = 'и' THEN SET letter = 'i';
WHEN letter = 'й' THEN SET letter = 'i';
WHEN letter = 'к' THEN SET letter = 'k';
WHEN letter = 'л' THEN SET letter = 'l';
WHEN letter = 'м' THEN SET letter = 'm';
WHEN letter = 'н' THEN SET letter = 'n';
WHEN letter = 'о' THEN SET letter = 'o';
WHEN letter = 'п' THEN SET letter = 'p';
WHEN letter = 'р' THEN SET letter = 'r';
WHEN letter = 'с' THEN SET letter = 's';
WHEN letter = 'т' THEN SET letter = 't';
WHEN letter = 'у' THEN SET letter = 'u';
WHEN letter = 'ф' THEN SET letter = 'f';
WHEN letter = 'х' THEN SET letter = 'ch';
WHEN letter = 'ц' THEN SET letter = 'c';
WHEN letter = 'ч' THEN SET letter = 'c';
WHEN letter = 'ш' THEN SET letter = 's';
WHEN letter = 'щ' THEN SET letter = 's';
WHEN letter = 'ъ' THEN SET letter = '';
WHEN letter = 'ы' THEN SET letter = 'y';
WHEN letter = 'ь' THEN SET letter = 'e';
WHEN letter = 'э' THEN SET letter = 'e';
WHEN letter = 'ю' THEN SET letter = 'ju';
WHEN letter = 'я' THEN SET letter = 'ja';
WHEN letter = '0' THEN SET letter = '0';
WHEN letter = '1' THEN SET letter = '1';
WHEN letter = '2' THEN SET letter = '2';
WHEN letter = '3' THEN SET letter = '3';
WHEN letter = '4' THEN SET letter = '4';
WHEN letter = '5' THEN SET letter = '5';
WHEN letter = '6' THEN SET letter = '6';
WHEN letter = '7' THEN SET letter = '7';
WHEN letter = '8' THEN SET letter = '8';
WHEN letter = '9' THEN SET letter = '9';
WHEN letter IN ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','x','y','z')
THEN SET letter = letter;
ELSE
SET letter = '-';
END CASE;
SET translit = CONCAT(translit, letter);
SET pos = pos + 1;
END WHILE;
WHILE (translit REGEXP '\-{2,}') DO
SET translit = REPLACE(translit, '--', '-');
END WHILE;
RETURN TRIM(BOTH '-' FROM translit);
END $$
DELIMITER ;
A modification on @sr9yar 's answer to include Greek characters:
DELIMITER $$
DROP FUNCTION IF EXISTS `transliterate` $$
CREATE FUNCTION `transliterate` (original VARCHAR(512)) RETURNS VARCHAR(512)
BEGIN
DECLARE translit VARCHAR(512) DEFAULT '';
DECLARE len INT(3) DEFAULT 0;
DECLARE pos INT(3) DEFAULT 1;
DECLARE letter CHAR(2);
SET original = TRIM(LOWER(original));
SET len = CHAR_LENGTH(original);
WHILE (pos <= len) DO
SET letter = SUBSTRING(original, pos, 1);
CASE TRUE
WHEN letter IN('á','à','â','ä','å','ā','ą','ă') THEN SET letter = 'a';
WHEN letter IN('č','ć','ç','ć') THEN SET letter = 'c';
WHEN letter IN('ď','đ') THEN SET letter = 'd';
WHEN letter IN('é','ě','ë','è','ê','ē','ę') THEN SET letter = 'e';
WHEN letter IN('ģ','ğ') THEN SET letter = 'g';
WHEN letter IN('í','î','ï','ī','î') THEN SET letter = 'i';
WHEN letter IN('ķ') THEN SET letter = 'k';
WHEN letter IN('ľ','ĺ','ļ','ł') THEN SET letter = 'l';
WHEN letter IN('ň','ņ','ń','ñ') THEN SET letter = 'n';
WHEN letter IN('ó','ö','ø','õ','ô','ő','ơ') THEN SET letter = 'o';
WHEN letter IN('ŕ','ř') THEN SET letter = 'r';
WHEN letter IN('š','ś','ș','ş') THEN SET letter = 's';
WHEN letter IN('ť','ț') THEN SET letter = 't';
WHEN letter IN('ú','ů','ü','ù','û','ū','ű','ư') THEN SET letter = 'u';
WHEN letter IN('ý') THEN SET letter = 'y';
WHEN letter IN('ž','ź','ż') THEN SET letter = 'z';
WHEN letter = 'а' THEN SET letter = 'a';
WHEN letter = 'б' THEN SET letter = 'b';
WHEN letter = 'в' THEN SET letter = 'v';
WHEN letter = 'г' THEN SET letter = 'g';
WHEN letter = 'д' THEN SET letter = 'd';
WHEN letter = 'е' THEN SET letter = 'e';
WHEN letter = 'ё' THEN SET letter = 'e';
WHEN letter = 'ж' THEN SET letter = 'z';
WHEN letter = 'з' THEN SET letter = 'z';
WHEN letter = 'и' THEN SET letter = 'i';
WHEN letter = 'й' THEN SET letter = 'i';
WHEN letter = 'к' THEN SET letter = 'k';
WHEN letter = 'л' THEN SET letter = 'l';
WHEN letter = 'м' THEN SET letter = 'm';
WHEN letter = 'н' THEN SET letter = 'n';
WHEN letter = 'о' THEN SET letter = 'o';
WHEN letter = 'п' THEN SET letter = 'p';
WHEN letter = 'р' THEN SET letter = 'r';
WHEN letter = 'с' THEN SET letter = 's';
WHEN letter = 'т' THEN SET letter = 't';
WHEN letter = 'у' THEN SET letter = 'u';
WHEN letter = 'ф' THEN SET letter = 'f';
WHEN letter = 'х' THEN SET letter = 'ch';
WHEN letter = 'ц' THEN SET letter = 'c';
WHEN letter = 'ч' THEN SET letter = 'c';
WHEN letter = 'ш' THEN SET letter = 's';
WHEN letter = 'щ' THEN SET letter = 's';
WHEN letter = 'ъ' THEN SET letter = '';
WHEN letter = 'ы' THEN SET letter = 'y';
WHEN letter = 'ь' THEN SET letter = 'e';
WHEN letter = 'э' THEN SET letter = 'e';
WHEN letter = 'ю' THEN SET letter = 'ju';
WHEN letter = 'я' THEN SET letter = 'ja';
WHEN letter IN ('α','ά') THEN SET letter = 'a';
WHEN letter IN ('β') THEN SET letter = 'b';
WHEN letter IN ('γ') THEN SET letter = 'g';
WHEN letter IN ('δ') THEN SET letter = 'd';
WHEN letter IN ('ε','έ') THEN SET letter = 'e';
WHEN letter IN ('ζ') THEN SET letter = 'z';
WHEN letter IN ('η','ή') THEN SET letter = 'h';
WHEN letter IN ('θ') THEN SET letter = 'th';
WHEN letter IN ('ι','ί','ϊ','ΐ') THEN SET letter = 'i';
WHEN letter IN ('κ') THEN SET letter = 'k';
WHEN letter IN ('λ') THEN SET letter = 'l';
WHEN letter IN ('μ') THEN SET letter = 'm';
WHEN letter IN ('ν') THEN SET letter = 'n';
WHEN letter IN ('ξ') THEN SET letter = 'j';
WHEN letter IN ('ο','ό') THEN SET letter = 'o';
WHEN letter IN ('π') THEN SET letter = 'p';
WHEN letter IN ('ρ') THEN SET letter = 'r';
WHEN letter IN ('σ','ς') THEN SET letter = 's';
WHEN letter IN ('τ') THEN SET letter = 't';
WHEN letter IN ('υ','ύ','ϋ','ΰ') THEN SET letter = 'y';
WHEN letter IN ('φ') THEN SET letter = 'f';
WHEN letter IN ('χ') THEN SET letter = 'x';
WHEN letter IN ('ψ') THEN SET letter = 'c';
WHEN letter IN ('ω','ώ') THEN SET letter = 'w';
WHEN letter = '0' THEN SET letter = '0';
WHEN letter = '1' THEN SET letter = '1';
WHEN letter = '2' THEN SET letter = '2';
WHEN letter = '3' THEN SET letter = '3';
WHEN letter = '4' THEN SET letter = '4';
WHEN letter = '5' THEN SET letter = '5';
WHEN letter = '6' THEN SET letter = '6';
WHEN letter = '7' THEN SET letter = '7';
WHEN letter = '8' THEN SET letter = '8';
WHEN letter = '9' THEN SET letter = '9';
WHEN letter IN ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','x','y','z')
THEN SET letter = letter;
ELSE
SET letter = '-';
END CASE;
SET translit = CONCAT(translit, letter);
SET pos = pos + 1;
END WHILE;
WHILE (translit REGEXP '\-{2,}') DO
SET translit = REPLACE(translit, '--', '-');
END WHILE;
RETURN TRIM(BOTH '-' FROM translit);
END
Be warned though, performance is abysmal. I get anything from 0,06s to 5s (!) per record, on a run of the mill shared host.