-2

My setup: glibc 2.24, gcc 6.2.0, UTF-8 environment.

When we call islower() in the following test.c, the result is printed correctly:

#include <locale.h>
#include <ctype.h>
#include <stdio.h>
int main (void)
{
  setlocale(LC_ALL, "fr_FR.ISO-8859-1");
  if (islower(0xff)) return 0;
  return 1;
}
$ gcc -g -o test test.c
$ ./test; echo $?
0

Now we change 0xff to 'ÿ', and get test2.c:

#include <locale.h>
#include <ctype.h>
#include <stdio.h>
int main (void)
{
  setlocale(LC_ALL, "fr_FR.ISO-8859-1");
  if (islower('ÿ')) return 0;
  return 1;
}

Although the output is supposed to be 0, as in test.c, it is different:

$ gcc -g -fexec-charset=iso8859-1 -o test2 test2.c
$ ./test2; echo $?
1

The iso8859-1 locale is properly installed:

$ locale -a
C
C.UTF-8
en_US.utf8
french
fr_FR
fr_FR.iso88591
POSIX

Neither strace nor gdb reveal anything useful.

Below is strace output for test and test2.


execve("./test", ["./test"], [/* 43 vars */]) = 0
brk(NULL)                               = 0x557a2ad1a000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f13c7e91000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=98552, ...}) = 0
mmap(NULL, 98552, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f13c7e78000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\3\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1685264, ...}) = 0
mmap(NULL, 3791264, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f13c78d4000
mprotect(0x7f13c7a69000, 2093056, PROT_NONE) = 0
mmap(0x7f13c7c68000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x194000) = 0x7f13c7c68000
mmap(0x7f13c7c6e000, 14752, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f13c7c6e000
close(3)                                = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f13c7e76000
arch_prctl(ARCH_SET_FS, 0x7f13c7e76700) = 0
mprotect(0x7f13c7c68000, 16384, PROT_READ) = 0
mprotect(0x557a29abe000, 4096, PROT_READ) = 0
mprotect(0x7f13c7e94000, 4096, PROT_READ) = 0
munmap(0x7f13c7e78000, 98552)           = 0
brk(NULL)                               = 0x557a2ad1a000
brk(0x557a2ad3b000)                     = 0x557a2ad3b000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=3517584, ...}) = 0
mmap(NULL, 3517584, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f13c7579000
close(3)                                = 0
exit_group(0)                           = ?
+++ exited with 0 +++
execve("./test2", ["./test2"], [/* 43 vars */]) = 0
brk(NULL)                               = 0x5603a66f6000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24d23c4000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=98552, ...}) = 0
mmap(NULL, 98552, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f24d23ab000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\3\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1685264, ...}) = 0
mmap(NULL, 3791264, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f24d1e07000
mprotect(0x7f24d1f9c000, 2093056, PROT_NONE) = 0
mmap(0x7f24d219b000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x194000) = 0x7f24d219b000
mmap(0x7f24d21a1000, 14752, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f24d21a1000
close(3)                                = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f24d23a9000
arch_prctl(ARCH_SET_FS, 0x7f24d23a9700) = 0
mprotect(0x7f24d219b000, 16384, PROT_READ) = 0
mprotect(0x5603a560b000, 4096, PROT_READ) = 0
mprotect(0x7f24d23c7000, 4096, PROT_READ) = 0
munmap(0x7f24d23ab000, 98552)           = 0
brk(NULL)                               = 0x5603a66f6000
brk(0x5603a6717000)                     = 0x5603a6717000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=3517584, ...}) = 0
mmap(NULL, 3517584, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f24d1aac000
close(3)                                = 0
exit_group(1)                           = ?
+++ exited with 1 +++

Why test2 does not return 0?

Igor Liferenko
  • 1,499
  • 1
  • 13
  • 28

1 Answers1

1

The question boils down to:

If I write this:

int x = 'ÿ';

x will be -1 but I'd like it to be 255.

It's -1 because 'ÿ' is a char of value 0xff. But as char is signed on your platform, sign extension takes place when you assign it to an int.

The solution is to write:

int x = (unsigned char)'ÿ';

So instead of islower('ÿ') write islower((unsigned char)'ÿ').

Jabberwocky
  • 48,281
  • 17
  • 65
  • 115
  • From this answer it follows that `char` type must be unsigned by default, since, according to its name, it is used to store **char**acters. Can you think of any specific reason which prevented gcc developers to use unsigned as the default for `char`? – Igor Liferenko Nov 08 '16 at 07:51
  • @IgorLiferenko look at [this SO question](http://stackoverflow.com/questions/2054939/is-char-signed-or-unsigned-by-default). – Jabberwocky Nov 08 '16 at 07:53