If someone did want to stick to the standard library, I wanted some actual data
on this. I pulled the full list of two byte characters that fail with
to_lowercase
or to_uppercase
. I then ran this test:
fn lowercase(left: char, right: char) -> bool {
for c in left.to_lowercase() {
for d in right.to_lowercase() {
if c == d { return true }
}
}
false
}
fn uppercase(left: char, right: char) -> bool {
for c in left.to_uppercase() {
for d in right.to_uppercase() {
if c == d { return true }
}
}
false
}
fn main() {
let pairs = &[
&['\u{00E5}','\u{212B}'],&['\u{00C5}','\u{212B}'],&['\u{0399}','\u{1FBE}'],
&['\u{03B9}','\u{1FBE}'],&['\u{03B2}','\u{03D0}'],&['\u{03B5}','\u{03F5}'],
&['\u{03B8}','\u{03D1}'],&['\u{03B8}','\u{03F4}'],&['\u{03D1}','\u{03F4}'],
&['\u{03B9}','\u{1FBE}'],&['\u{0345}','\u{03B9}'],&['\u{0345}','\u{1FBE}'],
&['\u{03BA}','\u{03F0}'],&['\u{00B5}','\u{03BC}'],&['\u{03C0}','\u{03D6}'],
&['\u{03C1}','\u{03F1}'],&['\u{03C2}','\u{03C3}'],&['\u{03C6}','\u{03D5}'],
&['\u{03C9}','\u{2126}'],&['\u{0392}','\u{03D0}'],&['\u{0395}','\u{03F5}'],
&['\u{03D1}','\u{03F4}'],&['\u{0398}','\u{03D1}'],&['\u{0398}','\u{03F4}'],
&['\u{0345}','\u{1FBE}'],&['\u{0345}','\u{0399}'],&['\u{0399}','\u{1FBE}'],
&['\u{039A}','\u{03F0}'],&['\u{00B5}','\u{039C}'],&['\u{03A0}','\u{03D6}'],
&['\u{03A1}','\u{03F1}'],&['\u{03A3}','\u{03C2}'],&['\u{03A6}','\u{03D5}'],
&['\u{03A9}','\u{2126}'],&['\u{0398}','\u{03F4}'],&['\u{03B8}','\u{03F4}'],
&['\u{03B8}','\u{03D1}'],&['\u{0398}','\u{03D1}'],&['\u{0432}','\u{1C80}'],
&['\u{0434}','\u{1C81}'],&['\u{043E}','\u{1C82}'],&['\u{0441}','\u{1C83}'],
&['\u{0442}','\u{1C84}'],&['\u{0442}','\u{1C85}'],&['\u{1C84}','\u{1C85}'],
&['\u{044A}','\u{1C86}'],&['\u{0412}','\u{1C80}'],&['\u{0414}','\u{1C81}'],
&['\u{041E}','\u{1C82}'],&['\u{0421}','\u{1C83}'],&['\u{1C84}','\u{1C85}'],
&['\u{0422}','\u{1C84}'],&['\u{0422}','\u{1C85}'],&['\u{042A}','\u{1C86}'],
&['\u{0463}','\u{1C87}'],&['\u{0462}','\u{1C87}']
];
let (mut upper, mut lower) = (0, 0);
for pair in pairs.iter() {
print!("U+{:04X} ", pair[0] as u32);
print!("U+{:04X} pass: ", pair[1] as u32);
if uppercase(pair[0], pair[1]) {
print!("to_uppercase ");
upper += 1;
} else {
print!(" ");
}
if lowercase(pair[0], pair[1]) {
print!("to_lowercase");
lower += 1;
}
println!();
}
println!("upper pass: {}, lower pass: {}", upper, lower);
}
Result below. Interestingly, one of the pairs fails with both. But based on this,
to_uppercase is the best option.
U+00E5 U+212B pass: to_lowercase
U+00C5 U+212B pass: to_lowercase
U+0399 U+1FBE pass: to_uppercase
U+03B9 U+1FBE pass: to_uppercase
U+03B2 U+03D0 pass: to_uppercase
U+03B5 U+03F5 pass: to_uppercase
U+03B8 U+03D1 pass: to_uppercase
U+03B8 U+03F4 pass: to_lowercase
U+03D1 U+03F4 pass:
U+03B9 U+1FBE pass: to_uppercase
U+0345 U+03B9 pass: to_uppercase
U+0345 U+1FBE pass: to_uppercase
U+03BA U+03F0 pass: to_uppercase
U+00B5 U+03BC pass: to_uppercase
U+03C0 U+03D6 pass: to_uppercase
U+03C1 U+03F1 pass: to_uppercase
U+03C2 U+03C3 pass: to_uppercase
U+03C6 U+03D5 pass: to_uppercase
U+03C9 U+2126 pass: to_lowercase
U+0392 U+03D0 pass: to_uppercase
U+0395 U+03F5 pass: to_uppercase
U+03D1 U+03F4 pass:
U+0398 U+03D1 pass: to_uppercase
U+0398 U+03F4 pass: to_lowercase
U+0345 U+1FBE pass: to_uppercase
U+0345 U+0399 pass: to_uppercase
U+0399 U+1FBE pass: to_uppercase
U+039A U+03F0 pass: to_uppercase
U+00B5 U+039C pass: to_uppercase
U+03A0 U+03D6 pass: to_uppercase
U+03A1 U+03F1 pass: to_uppercase
U+03A3 U+03C2 pass: to_uppercase
U+03A6 U+03D5 pass: to_uppercase
U+03A9 U+2126 pass: to_lowercase
U+0398 U+03F4 pass: to_lowercase
U+03B8 U+03F4 pass: to_lowercase
U+03B8 U+03D1 pass: to_uppercase
U+0398 U+03D1 pass: to_uppercase
U+0432 U+1C80 pass: to_uppercase
U+0434 U+1C81 pass: to_uppercase
U+043E U+1C82 pass: to_uppercase
U+0441 U+1C83 pass: to_uppercase
U+0442 U+1C84 pass: to_uppercase
U+0442 U+1C85 pass: to_uppercase
U+1C84 U+1C85 pass: to_uppercase
U+044A U+1C86 pass: to_uppercase
U+0412 U+1C80 pass: to_uppercase
U+0414 U+1C81 pass: to_uppercase
U+041E U+1C82 pass: to_uppercase
U+0421 U+1C83 pass: to_uppercase
U+1C84 U+1C85 pass: to_uppercase
U+0422 U+1C84 pass: to_uppercase
U+0422 U+1C85 pass: to_uppercase
U+042A U+1C86 pass: to_uppercase
U+0463 U+1C87 pass: to_uppercase
U+0462 U+1C87 pass: to_uppercase
upper pass: 46, lower pass: 8