I am trying to create a search function in mysql that searches simular unicode character,
So for example when I search for a city 'Amsterdam' by search-string 'amster', I want the following all to return the same results:
> - select * from table where city like '%amster%' (latin)
> - select * from table where city like '%ΆμσtER%' (greek mixed with latin combined)
> - select * from table where city like '%Амстер%' (full unicode)
> - select * from table where city like '%àмstÉr%' (with accents)
I tried to create a mysql function that translates all characters into their ASCII counterparts. For example 'σ'(greek), 'с'(cyrillic), š, ś, Ş all become a latin 's'. The function works fine but is so slow that it renders large searches useless. The function is :
CREATE FUNCTION englishfyString(INPUT varchar(250))
RETURNS VARCHAR(250)
BEGIN
DECLARE OUTPUT VARCHAR(250);
DECLARE len1 INT(4) DEFAULT 0;
DECLARE pos1 INT(4) DEFAULT 1;
DECLARE letter1 CHAR(4);
DECLARE len2 INT(4) DEFAULT 0;
DECLARE pos2 INT(4) DEFAULT 0;
DECLARE letter2 CHAR(4);
DECLARE string_a CHAR(250);
DECLARE string_ae CHAR(250);
DECLARE string_b CHAR(250);
DECLARE string_c CHAR(250);
DECLARE string_ch CHAR(250);
DECLARE string_d CHAR(250);
DECLARE string_dz CHAR(250);
DECLARE string_e CHAR(250);
DECLARE string_f CHAR(250);
DECLARE string_g CHAR(250);
DECLARE string_h CHAR(250);
DECLARE string_i CHAR(250);
DECLARE string_j CHAR(250);
DECLARE string_k CHAR(250);
DECLARE string_l CHAR(250);
DECLARE string_ll CHAR(250);
DECLARE string_m CHAR(250);
DECLARE string_n CHAR(250);
DECLARE string_nj CHAR(250);
DECLARE string_ny CHAR(250);
DECLARE string_o CHAR(250);
DECLARE string_p CHAR(250);
DECLARE string_ps CHAR(250);
DECLARE string_r CHAR(250);
DECLARE string_s CHAR(250);
DECLARE string_sh CHAR(250);
DECLARE string_ss CHAR(250);
DECLARE string_sz CHAR(250);
DECLARE string_t CHAR(250);
DECLARE string_th CHAR(250);
DECLARE string_ts CHAR(250);
DECLARE string_u CHAR(250);
DECLARE string_v CHAR(250);
DECLARE string_w CHAR(250);
DECLARE string_x CHAR(250);
DECLARE string_y CHAR(250);
DECLARE string_ya CHAR(250);
DECLARE string_ye CHAR(250);
DECLARE string_yo CHAR(250);
DECLARE string_yu CHAR(250);
DECLARE string_z CHAR(250);
DECLARE string_null CHAR(250);
SET OUTPUT = '';
SET INPUT = TRIM(LOWER(INPUT));
SET len1 = CHAR_LENGTH(INPUT);
SET string_null= FROM_BASE64('0YzRig==');
SET string_a = FROM_BASE64('0LDOsc6syZnDocOgw6LHjsSDw6PhuqPIp+G6ocOkw6XhuIHEgcSF4baP4rGlyIHhuqXhuqfhuqvhuqnhuq3huq/hurHhurXhurPhurfHu8ehx5/IgciDyZHhtIDJkA==');
SET string_ae = FROM_BASE64('w6bHvcej');
SET string_b = FROM_BASE64('0LHOsmLhuIPhuIXhuIfGgMmTxoPhtoDIuA==');
SET string_c = FROM_BASE64('xIfEicSNxItjzITDp+G4ici8xojhtIQ=');
SET string_ch = FROM_BASE64('0YfRhc+H0Zs=');
SET string_d = FROM_BASE64('0LTOtMSRxI/huIvhuJHhuI3huJPhuI/EkcOwZMymyZbJl8aM4bWt4baB4baR');
SET string_dz = FROM_BASE64('0Z8=');
SET string_e = FROM_BASE64('w6nDqMOq4biZxJvEleG6veG4m+G6u8SXw6vEk8ipxJnhtpLJh8iF4bq/4buB4buF4buD4bid4biX4biVyIfhurnhu4fFk+G1q9C10Y3OtdGUzq3EleKxuOKCrA==');
SET string_f = FROM_BASE64('0YTPhuG4n8aS4bWu4baC');
SET string_g = FROM_BASE64('0LPOs9KRx7XEn8Sdx6fEocSj4bihx6XJoOG2g8mixYs=');
SET string_h = FROM_BASE64('xKXIn+G4p+G4o+G4qeG4peG4q+G6lsSn4rGoyaY=');
SET string_i = FROM_BASE64('0LjRi9GWzrfOrsOtw6zErcOux5DDr+G4r8SpxK/Eq+G7iciJyIvhu4vhuK3JqOG1u+G2lmnEscmqzq/OuQ==');
SET string_j = FROM_BASE64('0ZLRmMS1yYnHsMi3yp3Jn8qE4bSK');
SET string_k = FROM_BASE64('0LrOusS34bixx6nEt+G4s+G4tcaZ4rGq4baE6p2B');
SET string_l = FROM_BASE64('0LvOu8S6xL7EvOG4t+G4ueG4veG4u8WCxYDGmuKxoQ==');
SET string_ll = FROM_BASE64('0Zk=');
SET string_m = FROM_BASE64('0LzOvOG4v+G5geG5g+G1r+G2hsmx');
SET string_n = FROM_BASE64('0L3OvcWIw7HFhMe5xYjDseG5hcWG4bmH4bmL4bmJbsyIybLGnuG1sOG2h8mzyLU=');
SET string_nj = FROM_BASE64('0Zo=');
SET string_ny = FROM_BASE64('xYQ=');
SET string_o = FROM_BASE64('0L7Ov8+Jz47Ds8OyxY/DtOG7keG7k+G7l+G7lceSw7bIq8WRw7XhuY3huY/IrcivyLHDuMe/x6vHrcWN4bmT4bmR4buPyI3Ij8ah4bub4bud4buh4buf4buj4buN4buZybXisbrhtI/PjA==');
SET string_p = FROM_BASE64('0L/PgOG5leG5l+G1vcalcMyD4bWx4baI4bSY');
SET string_ps = FROM_BASE64('z4g=');
SET string_r = FROM_BASE64('0YDPgcWZxZXFmeG5mcWXyJHIk+G5m+G5neG5n8mN');
SET string_s = FROM_BASE64('0YHPg8+Cxb/Fm+G5pcWdxaHhuafhuaHhupvFn+G5o+G5qciZc8yp4bW04baKyoLIv+qcsQ==');
SET string_sh = FROM_BASE64('0YjRiQ==');
SET string_ss = FROM_BASE64('w58=');
SET string_sz = FROM_BASE64('0LY=');
SET string_t = FROM_BASE64('0YLPhMibxaXhuavFo+G5rcib4bmx4bmvxafisabGrcqI4bqX4bW1xqvItg==');
SET string_th = FROM_BASE64('zrjDkMO+w54=');
SET string_ts = FROM_BASE64('0YY=');
SET string_u = FROM_BASE64('0YPPhcO6w7nFrcO7x5TFr8O8x5jHnMeax5bFscWp4bm5xbPFq+G5u+G7p8iVyJfGsOG7qeG7q+G7r+G7reG7seG7peG5s+G5t+G5tcqJ4bW+4baZ4bScz40=');
SET string_v = FROM_BASE64('0LLhub3hub/Ki+G2jOKxseKxtOG0oNCy');
SET string_w = FROM_BASE64('xYLhuoPhuoHFteG6heG6h+G6ieG6mOKxs+G0oQ==');
SET string_x = FROM_BASE64('zr4=');
SET string_y = FROM_BASE64('0LnDvcO94buzxbfhupnDv+G7ueG6j8iz4bu34bu1yY/GtMqP');
SET string_ya = FROM_BASE64('0Y8=');
SET string_ye = FROM_BASE64('0Zc=');
SET string_yo = FROM_BASE64('0ZE=');
SET string_yu = FROM_BASE64('0Y4=');
SET string_z = FROM_BASE64('0LfOtsW8xbrhupHFvsW84bqT4bqVxrbIpeKxrOG1tuG2jsqQypHJgOG0og==');
WHILE(pos1 <= len1) DO
SET letter1 = SUBSTRING(INPUT, pos1, 1);
CASE
WHEN length(trim(letter1)) > 0 THEN
set letter1 =if(LOCATE(letter1,string_null)>0,'',letter1);
set letter1 =if(LOCATE(letter1,string_a)>0,'a',letter1);
set letter1 =if(LOCATE(letter1,string_ae)>0,'ae',letter1);
set letter1 =if(LOCATE(letter1,string_b)>0,'b',letter1);
set letter1 =if(LOCATE(letter1,string_c)>0,'c',letter1);
set letter1 =if(LOCATE(letter1,string_ch)>0,'ch',letter1);
set letter1 =if(LOCATE(letter1,string_d)>0,'d',letter1);
set letter1 =if(LOCATE(letter1,string_dz)>0,'dz',letter1);
set letter1 =if(LOCATE(letter1,string_e)>0,'e',letter1);
set letter1 =if(LOCATE(letter1,string_f)>0,'f',letter1);
set letter1 =if(LOCATE(letter1,string_g)>0,'g',letter1);
set letter1 =if(LOCATE(letter1,string_h)>0,'h',letter1);
set letter1 =if(LOCATE(letter1,string_i)>0,'i',letter1);
set letter1 =if(LOCATE(letter1,string_j)>0,'j',letter1);
set letter1 =if(LOCATE(letter1,string_k)>0,'k',letter1);
set letter1 =if(LOCATE(letter1,string_l)>0,'l',letter1);
set letter1 =if(LOCATE(letter1,string_ll)>0,'ll',letter1);
set letter1 =if(LOCATE(letter1,string_m)>0,'m',letter1);
set letter1 =if(LOCATE(letter1,string_n)>0,'n',letter1);
set letter1 =if(LOCATE(letter1,string_nj)>0,'nj',letter1);
set letter1 =if(LOCATE(letter1,string_ny)>0,'ny',letter1);
set letter1 =if(LOCATE(letter1,string_o)>0,'o',letter1);
set letter1 =if(LOCATE(letter1,string_p)>0,'p',letter1);
set letter1 =if(LOCATE(letter1,string_ps)>0,'ps',letter1);
set letter1 =if(LOCATE(letter1,string_r)>0,'r',letter1);
set letter1 =if(LOCATE(letter1,string_s)>0,'s',letter1);
set letter1 =if(LOCATE(letter1,string_sh)>0,'sh',letter1);
set letter1 =if(LOCATE(letter1,string_ss)>0,'ss',letter1);
set letter1 =if(LOCATE(letter1,string_sz)>0,'sz',letter1);
set letter1 =if(LOCATE(letter1,string_t)>0,'t',letter1);
set letter1 =if(LOCATE(letter1,string_th)>0,'th',letter1);
set letter1 =if(LOCATE(letter1,string_ts)>0,'ts',letter1);
set letter1 =if(LOCATE(letter1,string_u)>0,'u',letter1);
set letter1 =if(LOCATE(letter1,string_v)>0,'v',letter1);
set letter1 =if(LOCATE(letter1,string_w)>0,'w',letter1);
set letter1 =if(LOCATE(letter1,string_x)>0,'x',letter1);
set letter1 =if(LOCATE(letter1,string_y)>0,'y',letter1);
set letter1 =if(LOCATE(letter1,string_ya)>0,'ya',letter1);
set letter1 =if(LOCATE(letter1,string_ye)>0,'ye',letter1);
set letter1 =if(LOCATE(letter1,string_yo)>0,'yo',letter1);
set letter1 =if(LOCATE(letter1,string_yu)>0,'yu',letter1);
set letter1 =if(LOCATE(letter1,string_z)>0,'z',letter1);
ELSE
set letter1 = '';
END CASE;
SET pos1 = pos1 + 1;
SET OUTPUT = concat(OUTPUT,letter1,'');
END WHILE;
RETURN (OUTPUT);
END
Is there a correct way to perform a search on similar characters in mysql? If not, is there a way to increase the performance of the function?
thank you