ucfirst () with accent without mbstring enable

Question

I'm trying to use ucfirst () to capitalize the first letter of a string but when the string contains éàçîô... it won't change it.

String

GÉNIE CIVIL

Should return

Génie civil

but return instead

GÉnie civil

As you can see, it ignored the second letter "É" to be lowercased. I know it's some king of encoding (I'm using UTF-8). Does anyones know how to fix this ?

Edit

I don't have mbstring enable, any other solutions ?

score 1 · Answer 1 · answered Jun 27 '13 at 00:19

First ucfirst() does what it says, it upper case the first character. Therefore the 2nd character and other won't be adjusted like you want. You'll need to strtolower() the other character (possibly running with a similar problem for special character when doing so, but look further I gave you an answer for that too in my example).

Now for a mb_ucfirst() that would use native php function if exist or else emulate it, you can either define your own mb_strtoupper() and mb_substr() when they do not exist, but for this question I wrapped everything in one function.

So my recommended function is:

<?php 
if (!function_exists('mb_ucfirst'))
{
    function mb_ucfirst($string)
    {
        if (function_exists('mb_strtoupper') && function_exists('mb_substr'))
        {
            return mb_strtoupper(mb_substr($string, 0, 1), 'UTF-8').mb_substr($string, 1);
        }
        else
        {
            // Credit to Quicker at http://php.net/manual/en/function.ucfirst.php
            // If it does not work, replace it with another utf8 ucfirst function
            if ($string{0} >= "\xc3")
            {
                return ($string{1} >= "\xa0"
                        ? ($string{0}.chr(ord($string{1})-32))
                        : ($string{0}.$string{1})).substr($string, 2);
            }
            return ucfirst($string);
        }
    }
}
?>

Additional notes

You might also be interested in that library: https://github.com/fluxbb/utf8 -- most coded function are in /core/native.php and in /functions there is ucfirst() and others. That one is probably more tested than mine, even thought I like how small mine look for only your need.

Full example with lot of different functions

Warning: if you do not have mb_string module, you might have to tweak the code a little.

<?php
mb_internal_encoding('UTF-8'); // make sure internal encoding is in UTF-8

function escape_html($string)
{
    $charset = 'UTF-8';
    return htmlspecialchars($string, ENT_QUOTES, $charset);
}
/*
function mb_ucfirst($str)
{
    $currentEncoding = mb_internal_encoding();
    mb_internal_encoding('UTF-8');
    preg_match_all("~^(.)(.*)$~u", $str, $arr);
    $str = mb_strtoupper($arr[1][0]).$arr[2][0];
    mb_internal_encoding($currentEncoding);
    return $str;
}*/
if (!function_exists('mb_ucfirst'))
{
    function mb_ucfirst($string)
    {
        if (function_exists('mb_strtoupper') && function_exists('mb_substr'))
        {
            return mb_strtoupper(mb_substr($string, 0, 1), 'UTF-8').mb_substr($string, 1);
        }
        else
        {
            // Credit to Quicker at http://php.net/manual/en/function.ucfirst.php
            // If it does not work, replace it with your favorite utf8 ucfirst function
            if ($string{0} >= "\xc3")
            {
                return ($string{1} >= "\xa0"
                        ? ($string{0}.chr(ord($string{1})-32))
                        : ($string{0}.$string{1})).substr($string, 2);
            }
            return ucfirst($string);
        }
    }
}
$testTexts = array(
    'GÉNIE CIVIL',
    'gÉnie civil',
    'Génie civil',
    'GENIE CIVIL',
    'Epinard',
    'ÉPinard',
    'épinard',
    'epinard',
    'é',
    'Ç',
    'ç',
);

// Credit to leha_grobov php.net/strtolower, I inverted it ;-)
function strtoupper_utf8($string)
{ 
  $convert_from = array( 
    "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", 
    "v", "w", "x", "y", "z", "à", "á", "â", "ã", "ä", "å", "æ", "ç", "è", "é", "ê", "ë", "ì", "í", "î", "ï", 
    "ð", "ñ", "ò", "ó", "ô", "õ", "ö", "ø", "ù", "ú", "û", "ü", "ý", "а", "б", "в", "г", "д", "е", "ё", "ж", 
    "з", "и", "й", "к", "л", "м", "н", "о", "п", "р", "с", "т", "у", "ф", "х", "ц", "ч", "ш", "щ", "ъ", "ы", 
    "ь", "э", "ю", "я" 
  ); 
  $convert_to = array( 
    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", 
    "V", "W", "X", "Y", "Z", "À", "Á", "Â", "Ã", "Ä", "Å", "Æ", "Ç", "È", "É", "Ê", "Ë", "Ì", "Í", "Î", "Ï", 
    "Ð", "Ñ", "Ò", "Ó", "Ô", "Õ", "Ö", "Ø", "Ù", "Ú", "Û", "Ü", "Ý", "А", "Б", "В", "Г", "Д", "Е", "Ё", "Ж", 
    "З", "И", "Й", "К", "Л", "М", "Н", "О", "П", "Р", "С", "Т", "У", "Ф", "Х", "Ц", "Ч", "Ш", "Щ", "Ъ", "Ъ", 
    "Ь", "Э", "Ю", "Я" 
  ); 

  return str_replace($convert_from, $convert_to, $string); 
} 

// Credit to leha_grobov php.net/strtolower, I twicked it ;-)
function ucfirst_utf8_1($string)
{ 
  $convert_from = array( 
    "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", 
    "v", "w", "x", "y", "z", "à", "á", "â", "ã", "ä", "å", "æ", "ç", "è", "é", "ê", "ë", "ì", "í", "î", "ï", 
    "ð", "ñ", "ò", "ó", "ô", "õ", "ö", "ø", "ù", "ú", "û", "ü", "ý", "а", "б", "в", "г", "д", "е", "ё", "ж", 
    "з", "и", "й", "к", "л", "м", "н", "о", "п", "р", "с", "т", "у", "ф", "х", "ц", "ч", "ш", "щ", "ъ", "ы", 
    "ь", "э", "ю", "я" 
  ); 
  $convert_to = array( 
    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", 
    "V", "W", "X", "Y", "Z", "À", "Á", "Â", "Ã", "Ä", "Å", "Æ", "Ç", "È", "É", "Ê", "Ë", "Ì", "Í", "Î", "Ï", 
    "Ð", "Ñ", "Ò", "Ó", "Ô", "Õ", "Ö", "Ø", "Ù", "Ú", "Û", "Ü", "Ý", "А", "Б", "В", "Г", "Д", "Е", "Ё", "Ж", 
    "З", "И", "Й", "К", "Л", "М", "Н", "О", "П", "Р", "С", "Т", "У", "Ф", "Х", "Ц", "Ч", "Ш", "Щ", "Ъ", "Ъ", 
    "Ь", "Э", "Ю", "Я" 
  );

  return str_replace($convert_from, $convert_to, mb_substr($string, 0, 1)).mb_substr($string, 1); 
} 

// Credit to Quicker at http://php.net/manual/en/function.ucfirst.php
function ucfirst_utf8_2($stri)
{ 
    if($stri{0}>="\xc3")
    {
        return (($stri{1}>="\xa0")
            ? ($stri{0}.chr(ord($stri{1})-32))
            : ($stri{0}.$stri{1})).substr($stri,2);
    }
    return ucfirst($stri);
}

?>
<!DOCTYPE html>
<html>
<head>
<title>Test ucfirst()</title>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<style>
table {
    border-collapse: collapse;
}
td, th {
    border: 1px solid black;
    padding: 5px;
}
</style>
</head>
<body>
<table>
        <tr>
            <th>$t</th>
            <th>ucfirsdt($t)</th>
            <th>ucfirst(strtolower($t)</th>
            <th>mb_ucfirst($t)</th>
            <th>strtoupper_utf8($t)</th>
            <th>ucfirst_utf8_1($t);</th>
            <th>ucfirst_utf8_2($t);</th>
        <tr>
    <?php foreach ($testTexts as $t): ?>
        <tr>
            <td><?php echo escape_html($t); ?></td>
            <td><?php echo escape_html(ucfirst($t)); ?></td>
            <td><?php echo escape_html(ucfirst(strtolower($t))); ?></td>
            <td><?php echo escape_html(mb_ucfirst($t)); ?></td>
            <td><?php echo escape_html(strtoupper_utf8($t)); ?></td>
            <td><?php echo escape_html(ucfirst_utf8_1($t)); ?></td>
            <td><?php echo escape_html(ucfirst_utf8_2($t)); ?></td>
        </tr>
    <?php endforeach; ?>    
</table>
</body>
</html>

ucfirst () with accent without mbstring enable

1 Answers1