2

I have a function that removes the accent of words written in different languages than english

public static function removeAccents($str) {
    $a = array('À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'ß', 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'ÿ', 'Ā', 'ā', 'Ă', 'ă', 'Ą', 'ą', 'Ć', 'ć', 'Ĉ', 'ĉ', 'Ċ', 'ċ', 'Č', 'č', 'Ď', 'ď', 'Đ', 'đ', 'Ē', 'ē', 'Ĕ', 'ĕ', 'Ė', 'ė', 'Ę', 'ę', 'Ě', 'ě', 'Ĝ', 'ĝ', 'Ğ', 'ğ', 'Ġ', 'ġ', 'Ģ', 'ģ', 'Ĥ', 'ĥ', 'Ħ', 'ħ', 'Ĩ', 'ĩ', 'Ī', 'ī', 'Ĭ', 'ĭ', 'Į', 'į', 'İ', 'ı', 'IJ', 'ij', 'Ĵ', 'ĵ', 'Ķ', 'ķ', 'Ĺ', 'ĺ', 'Ļ', 'ļ', 'Ľ', 'ľ', 'Ŀ', 'ŀ', 'Ł', 'ł', 'Ń', 'ń', 'Ņ', 'ņ', 'Ň', 'ň', 'ʼn', 'Ō', 'ō', 'Ŏ', 'ŏ', 'Ő', 'ő', 'Œ', 'œ', 'Ŕ', 'ŕ', 'Ŗ', 'ŗ', 'Ř', 'ř', 'Ś', 'ś', 'Ŝ', 'ŝ', 'Ş', 'ş', 'Š', 'š', 'Ţ', 'ţ', 'Ť', 'ť', 'Ŧ', 'ŧ', 'Ũ', 'ũ', 'Ū', 'ū', 'Ŭ', 'ŭ', 'Ů', 'ů', 'Ű', 'ű', 'Ų', 'ų', 'Ŵ', 'ŵ', 'Ŷ', 'ŷ', 'Ÿ', 'Ź', 'ź', 'Ż', 'ż', 'Ž', 'ž', 'ſ', 'ƒ', 'Ơ', 'ơ', 'Ư', 'ư', 'Ǎ', 'ǎ', 'Ǐ', 'ǐ', 'Ǒ', 'ǒ', 'Ǔ', 'ǔ', 'Ǖ', 'ǖ', 'Ǘ', 'ǘ', 'Ǚ', 'ǚ', 'Ǜ', 'ǜ', 'Ǻ', 'ǻ', 'Ǽ', 'ǽ', 'Ǿ', 'ǿ', 'Ά', 'ά', 'Έ', 'έ', 'Ό', 'ό', 'Ώ', 'ώ', 'Ί', 'ί', 'ϊ', 'ΐ', 'Ύ', 'ύ', 'ϋ', 'ΰ', 'Ή', 'ή');
    $b = array('A', 'A', 'A', 'A', 'A', 'A', 'AE', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I', 'D', 'N', 'O', 'O', 'O', 'O', 'O', 'O', 'U', 'U', 'U', 'U', 'Y', 's', 'a', 'a', 'a', 'a', 'a', 'a', 'ae', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 'n', 'o', 'o', 'o', 'o', 'o', 'o', 'u', 'u', 'u', 'u', 'y', 'y', 'A', 'a', 'A', 'a', 'A', 'a', 'C', 'c', 'C', 'c', 'C', 'c', 'C', 'c', 'D', 'd', 'D', 'd', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'G', 'g', 'G', 'g', 'G', 'g', 'G', 'g', 'H', 'h', 'H', 'h', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', 'IJ', 'ij', 'J', 'j', 'K', 'k', 'L', 'l', 'L', 'l', 'L', 'l', 'L', 'l', 'l', 'l', 'N', 'n', 'N', 'n', 'N', 'n', 'n', 'O', 'o', 'O', 'o', 'O', 'o', 'OE', 'oe', 'R', 'r', 'R', 'r', 'R', 'r', 'S', 's', 'S', 's', 'S', 's', 'S', 's', 'T', 't', 'T', 't', 'T', 't', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'W', 'w', 'Y', 'y', 'Y', 'Z', 'z', 'Z', 'z', 'Z', 'z', 's', 'f', 'O', 'o', 'U', 'u', 'A', 'a', 'I', 'i', 'O', 'o', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'A', 'a', 'AE', 'ae', 'O', 'o', 'Α', 'α', 'Ε', 'ε', 'Ο', 'ο', 'Ω', 'ω', 'Ι', 'ι', 'ι', 'ι', 'Υ', 'υ', 'υ', 'υ', 'Η', 'η');
    return str_replace($a, $b, $str);
}

it works:

echo removeAccents("ÁNDALE ÁNDALE!");
// Echoes: ANDALE ANDALE

-------- THE REAL ISSUE IS: I have an associative array that some of its keys have accents because it comes from a SCV file, and all the CSV files have some columns with accents. A column name could be "UBICACIÓN".

The thing is that when I'm trying to remove the accent of that Key it does not work:

 foreach($row as $key=>$value) {
     echo "KEY: ".trim(Utils::removeAccents($key))." - VALUE: ".$value."<br/>";
 }

Echoes something like this for each row:

KEY: REFERENCIA - VALUE: 3500000000
KEY: DESCRIPCIÓN - VALUE: LLANTA 160/60ZR-17 PILOT ROAD2
KEY: SALDO - VALUE: #N/A
KEY: UBICACIÓN - VALUE: #N/A
KEY: COST - VALUE: 243.815

And it SHOULD be:

KEY: REFERENCIA - VALUE: 3500000000
KEY: DESCRIPCION - VALUE: LLANTA 160/60ZR-17 PILOT ROAD2
KEY: SALDO - VALUE: #N/A
KEY: UBICACION - VALUE: #N/A
KEY: COST - VALUE: 243.815

As you can see two of those keys still have accent. I don't know what's the problem, the function works if you put any string there with accent that does not come from the array I want to process.

May be it has to do with Charset or something?

EDIT 1 - This is what var_dump shows if I use it with the main array (Just the first rows obviously, there are thousands there):

array (size=38688)
  0 => 
    array (size=8)
      'REFERENCIA' => string '' (length=0)
      'DESCRIPCIÓN' => string '' (length=0)
      'SALDO' => string '' (length=0)
      'UBICACIÓN' => string '' (length=0)
      'COST' => string '' (length=0)
      ' COSTO MAS IVA ' => string '' (length=0)
      ' PRECIO P ' => string '' (length=0)
      '' => string '' (length=0)
  1 => 
    array (size=8)
      'REFERENCIA' => string '3500000000' (length=10)
      'DESCRIPCIÓN' => string 'LLANTA 160/60ZR-17 PILOT ROAD2' (length=30)
      'SALDO' => string '#N/A' (length=4)
      'UBICACIÓN' => string '#N/A' (length=4)
      'COST' => string '243.815' (length=7)
      ' COSTO MAS IVA ' => string ' $ 331.588,40 ' (length=14)
      ' PRECIO P ' => string ' $ 384.642,54 ' (length=14)
      '' => string '' (length=0)
  2 => 
    array (size=8)
      'REFERENCIA' => string '10779000000' (length=11)
      'DESCRIPCIÓN' => string 'LLANTA 120/70ZR18 59W PIL ROA2' (length=30)
      'SALDO' => string '#N/A' (length=4)
      'UBICACIÓN' => string '#N/A' (length=4)
      'COST' => string '218.900' (length=7)
      ' COSTO MAS IVA ' => string ' $ 297.704,00 ' (length=14)
      ' PRECIO P ' => string ' $ 345.336,64 ' (length=14)
      '' => string '' (length=0)

EDIT 2 -- This is how part of the CSV file looks

REFERENCIA;DESCRIPCION;SALDO;UBICACIÓN;COST; COSTO MAS IVA ; PRECIO P ;
;;;;;;;
3500000000;LLANTA 160/60ZR-17 PILOT ROAD2;#N/A;#N/A;243.815; $ 331.588,40 ; $ 384.642,54 ;
10779000000;LLANTA 120/70ZR18 59W PIL ROA2;#N/A;#N/A;218.900; $ 297.704,00 ; $ 345.336,64 ;
01CD1ML00000;BUSO CREMALLERA D LASER M T-L;#N/A;#N/A;50.000; $ 68.000,00 ; $ 78.880,00 ;
01CD1MM00000;BUSO CREMALLERA D LASER M T-M;#N/A;#N/A;50.000; $ 68.000,00 ; $ 78.880,00 ;
01CD1MS00000;BUSO CREMALLERA D LASER M T-S;#N/A;#N/A;50.000; $ 68.000,00 ; $ 78.880,00 ;
jww
  • 97,681
  • 90
  • 411
  • 885
JuanBonnett
  • 776
  • 3
  • 8
  • 26
  • did you try to isolate that same key string, and use your function? did it work? hard to see whats not working on our side – Kevin Sep 30 '14 at 01:06
  • What are the values that are not displaying correctly ? – Adam Sinclair Sep 30 '14 at 01:06
  • that doesn't seem to help your case, we can just shoot in the dark here, there is nothing there that we can replicate – Kevin Sep 30 '14 at 01:10
  • Well. I don't know, the array is the product of an algorithm that takes an uploaded CSV file and converts it into an associative array... that would be a lot of code to put here :S – JuanBonnett Sep 30 '14 at 01:12
  • Can you try to var_dump the array and show us the specific part that is not working ? – Adam Sinclair Sep 30 '14 at 01:14
  • 1
    here is an attempt you what you're trying to do http://codepad.viper-7.com/NMHnjm – Kevin Sep 30 '14 at 01:14
  • Question updated so you can see what the associative array looks like – JuanBonnett Sep 30 '14 at 01:18
  • @Ghost yes, there it works, but remember that the array we're working with comes from a CSV file. It's not working in my case and I don't know if it has to do with something of Charsets or stuff like that. – JuanBonnett Sep 30 '14 at 01:20
  • http://stackoverflow.com/questions/3371697/replacing-accented-characters-php – Evadecaptcha Sep 30 '14 at 01:21
  • Remember that the problem here is not the Function that replaces the characters. It has to do with something else, because it's not replacing this array that is made from an uploaded CSV file. – JuanBonnett Sep 30 '14 at 01:24
  • sorry, i can attempt to make an answer, but most likely just be guesses – Kevin Sep 30 '14 at 01:31
  • Yep. I'll have to solve this problem in some way. May be I'll use the numeric indexes to process this array, instead of the String Keys. That's what we're made for. – JuanBonnett Sep 30 '14 at 01:32
  • 2
    If this is UTF-8 you may need to normalize the strings before doing naive string replace operations. There are multiple ways to represent `Ó` in bytes. – Steve Clay Sep 30 '14 at 04:35

1 Answers1

0

i found a funtion that may help you

function removeAcentos($string, $slug = false) {
    $string = strtolower($string);

    // Código ASCII das vogais
    $ascii['a'] = range(224, 230);
    $ascii['e'] = range(232, 235);
    $ascii['i'] = range(236, 239);
    $ascii['o'] = array_merge(range(242, 246), array(240, 248));
    $ascii['u'] = range(249, 252);

    // Código ASCII dos outros caracteres
    $ascii['b'] = array(223);
    $ascii['c'] = array(231);
    $ascii['d'] = array(208);
    $ascii['n'] = array(241);
    $ascii['y'] = array(253, 255);

    foreach ($ascii as $key=>$item) {
        $acentos = '';
        foreach ($item AS $codigo) $acentos .= chr($codigo);
        $troca[$key] = '/['.$acentos.']/i';
    }

    $string = preg_replace(array_values($troca), array_keys($troca), $string);

    // Slug?
    if ($slug) {
        // Troca tudo que não for letra ou número por um caractere ($slug)
        $string = preg_replace('/[^a-z0-9]/i', $slug, $string);
        // Tira os caracteres ($slug) repetidos
        $string = preg_replace('/' . $slug . '{2,}/i', $slug, $string);
        $string = trim($string, $slug);
    }

    return $string;
}
$file = fopen("test.csv", "r");  //with you csv example
    while (!feof($file) ) {
      $csv_array = fgetcsv($file,4096,";");
         for($x=0;$x<count($csv_array);$x++){
            echo removeAcentos($csv_array[$x])."<br>";
         }
    }
    fclose($file);

it return:

referencia
descripcion
saldo
ubicacion
cost
costo mas iva
precio p
.
.
.

3500000000
llanta 160/60zr-17 pilot road2
#n/a
#n/a
243.815
$ 331.588,40
$ 384.642,54 
.
.
kraysak
  • 1,746
  • 1
  • 13
  • 14