I need to convert NSString in unicode to NSString in ASCII changing all local characters: Ą to A, Ś to S, Ó to O, ü to u, And so on...
What is the simplest way to do it?
I need to convert NSString in unicode to NSString in ASCII changing all local characters: Ą to A, Ś to S, Ó to O, ü to u, And so on...
What is the simplest way to do it?
-[NSString dataUsingEncoding:NSASCIIStringEncoding allowLossyConversion:YES]
.
All of the examples you gave are handled as you want. Looks like characters with no obvious analog, such as ☃, go to '?'.
NSString *unicode = @"Chào mừng đến với Việt Nam.";
NSString *standard = [unicode stringByReplacingOccurrencesOfString:@"đ" withString:@"d"];
standard = [standard stringByReplacingOccurrencesOfString:@"Đ" withString:@"D"];
NSData *decode = [standard dataUsingEncoding:NSASCIIStringEncoding allowLossyConversion:YES];
NSString *ansi = [[NSString alloc] initWithData:decode encoding:NSASCIIStringEncoding];
NSLog(@"ANSI: %@", ansi);
Ken answer will replace "æ" with "ae" and "ß" with "s", but won't replace ligatures œ, ij, ff, fi, fl, ffi, ffl, ſt, st, ...
An improved solution is to first insert additional lines of mapping to handle everything fine:
string = [string stringByReplacingOccurrencesOfString:@"Œ" withString:@"OE"];
string = [string stringByReplacingOccurrencesOfString:@"œ" withString:@"oe"];
string = [string stringByReplacingOccurrencesOfString:@"Đ" withString:@"D"];
string = [string stringByReplacingOccurrencesOfString:@"đ" withString:@"d"];
string = [string precomposedStringWithCompatibilityMapping];
NSData *data = [string dataUsingEncoding:NSASCIIStringEncoding allowLossyConversion:YES];
NSString *newString = [[NSString alloc] initWithData:data encoding:NSASCIIStringEncoding];
Objective C's NSASCIIEncoding only supports upto 127 , the character set you are looking for are beyond 127 in ASCII table.
NSASCIIStringEncoding Strict 7-bit ASCII encoding within 8-bit chars; ASCII values 0…127 only. Available in Mac OS X v10.0 and later. Declared in NSString.h.
NSData *data = [decode dataUsingEncoding:[NSString defaultCStringEncoding]];
decode = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
NSUTF8StringEncoding, and [NSSring UTF8String] was not working for me under Xcode 12.2
Here is a my custom NSString Category which works for all ASCII values 0..255 in objective c
Header-fle
#import <Cocoa/Cocoa.h>
@interface NSString (ASCIIEncode)
- (const char*)ASCIIEncode;
@end
Implementation
#import "NSString+ASCIIEncode.h"
@implementation NSString (ASCIIEncode)
- (const char*)ASCIIEncode {
static char output[1024];
// https://tools.piex.at/ascii-tabelle/
// https://www.ionos.de/digitalguide/server/knowhow/ascii-american-standard-code-for-information-interchange/
NSMutableArray *ascii = [NSMutableArray new];
// Hex
// 000 Dez Hex
[ascii addObject:@"\0"]; // 000 000 NUL
[ascii addObject:@( 1)]; // 001 001 SOH
[ascii addObject:@( 2)]; // 002 002 STX
[ascii addObject:@( 3)]; // 003 003 ETX
[ascii addObject:@( 4)]; // 004 004 EOT
[ascii addObject:@( 5)]; // 005 005 ENQ
[ascii addObject:@( 6)]; // 006 006 ACK
[ascii addObject:@"\a"]; // 007 007 BEL
[ascii addObject:@"\b"]; // 008 008 BS
[ascii addObject:@( 9)]; // 009 009 TAB
[ascii addObject:@"\n"]; // 010 00A LF
[ascii addObject:@(11)]; // 011 00B VT
[ascii addObject:@(12)]; // 012 00C FF
[ascii addObject:@"\r"]; // 013 00D CR
[ascii addObject:@(14)]; // 014 00E SO
[ascii addObject:@(15)]; // 015 00F NAK
// 010
[ascii addObject:@(16)]; // 016 010 DLE
[ascii addObject:@(17)]; // 017 011 DC1
[ascii addObject:@(18)]; // 018 012 DC2
[ascii addObject:@(19)]; // 019 013 DC3
[ascii addObject:@(20)]; // 020 014 DC4
[ascii addObject:@(21)]; // 021 015 NAK
[ascii addObject:@(22)]; // 022 016 SYN
[ascii addObject:@(23)]; // 023 017 ETB
[ascii addObject:@(24)]; // 024 018 CAN
[ascii addObject:@(25)]; // 025 019 EM
[ascii addObject:@(26)]; // 026 01A SUB
[ascii addObject:@(27)]; // 027 01B ESC
[ascii addObject:@(28)]; // 028 01C FS
[ascii addObject:@(29)]; // 029 01D GS
[ascii addObject:@(30)]; // 030 01E RS
[ascii addObject:@(31)]; // 031 01F US
// 020
[ascii addObject:@" "]; // 032 020 Space
[ascii addObject:@"!"]; // 033 021
[ascii addObject:@"\""]; // 034 022
[ascii addObject:@"#"]; // 035 023
[ascii addObject:@"$"]; // 036 024
[ascii addObject:@"%"]; // 037 025
[ascii addObject:@"&"]; // 038 026
[ascii addObject:@"'"]; // 039 027
[ascii addObject:@"("]; // 040 028
[ascii addObject:@")"]; // 041 029
[ascii addObject:@"*"]; // 042 02A
[ascii addObject:@"+"]; // 043 02B
[ascii addObject:@","]; // 044 02C
[ascii addObject:@"-"]; // 045 02D
[ascii addObject:@"."]; // 046 02E
[ascii addObject:@"/"]; // 047 02F
// 030
[ascii addObject:@"0"]; // 048 030
[ascii addObject:@"1"]; // 049 031
[ascii addObject:@"2"]; // 050 032
[ascii addObject:@"3"]; // 051 033
[ascii addObject:@"4"]; // 052 034
[ascii addObject:@"5"]; // 053 035
[ascii addObject:@"6"]; // 054 036
[ascii addObject:@"7"]; // 055 037
[ascii addObject:@"8"]; // 056 038
[ascii addObject:@"9"]; // 057 039
[ascii addObject:@":"]; // 058 03A
[ascii addObject:@";"]; // 059 03B
[ascii addObject:@"<"]; // 060 03C
[ascii addObject:@"="]; // 061 03D
[ascii addObject:@">"]; // 062 03E
[ascii addObject:@"?"]; // 063 03F
// 040
[ascii addObject:@"@"]; // 064 040
[ascii addObject:@"A"]; // 065 041
[ascii addObject:@"B"]; // 066 042
[ascii addObject:@"C"]; // 067 043
[ascii addObject:@"D"]; // 068 044
[ascii addObject:@"E"]; // 069 045
[ascii addObject:@"F"]; // 070 046
[ascii addObject:@"G"]; // 071 047
[ascii addObject:@"H"]; // 072 048
[ascii addObject:@"I"]; // 073 049
[ascii addObject:@"J"]; // 074 04A
[ascii addObject:@"K"]; // 075 04B
[ascii addObject:@"L"]; // 076 04C
[ascii addObject:@"M"]; // 077 04D
[ascii addObject:@"N"]; // 078 04E
[ascii addObject:@"O"]; // 079 04F
// 050
[ascii addObject:@"P"]; // 080 050
[ascii addObject:@"Q"]; // 081 051
[ascii addObject:@"R"]; // 082 052
[ascii addObject:@"S"]; // 083 053
[ascii addObject:@"T"]; // 084 054
[ascii addObject:@"U"]; // 085 055
[ascii addObject:@"V"]; // 086 056
[ascii addObject:@"W"]; // 087 057
[ascii addObject:@"X"]; // 088 058
[ascii addObject:@"Y"]; // 089 059
[ascii addObject:@"Z"]; // 090 05A
[ascii addObject:@"["]; // 091 05B
[ascii addObject:@"\\"]; // 092 05C
[ascii addObject:@"]"]; // 093 05D
[ascii addObject:@"^"]; // 094 05E
[ascii addObject:@"_"]; // 095 05F
// 060
[ascii addObject:@"`"]; // 096 060
[ascii addObject:@"a"]; // 097 061
[ascii addObject:@"b"]; // 098 062
[ascii addObject:@"c"]; // 099 063
[ascii addObject:@"d"]; // 100 064
[ascii addObject:@"e"]; // 101 065
[ascii addObject:@"f"]; // 102 066
[ascii addObject:@"g"]; // 103 067
[ascii addObject:@"h"]; // 104 068
[ascii addObject:@"i"]; // 105 069
[ascii addObject:@"j"]; // 106 06A
[ascii addObject:@"k"]; // 107 06B
[ascii addObject:@"l"]; // 108 06C
[ascii addObject:@"m"]; // 109 06D
[ascii addObject:@"n"]; // 110 06E
[ascii addObject:@"o"]; // 111 06F
// 070
[ascii addObject:@"p"]; // 112 070
[ascii addObject:@"q"]; // 113 071
[ascii addObject:@"r"]; // 114 072
[ascii addObject:@"s"]; // 115 073
[ascii addObject:@"t"]; // 116 074
[ascii addObject:@"u"]; // 117 075
[ascii addObject:@"v"]; // 118 076
[ascii addObject:@"w"]; // 119 077
[ascii addObject:@"x"]; // 120 078
[ascii addObject:@"y"]; // 121 079
[ascii addObject:@"z"]; // 122 07A
[ascii addObject:@"{"]; // 123 07B
[ascii addObject:@"|"]; // 124 07C
[ascii addObject:@"}"]; // 125 07D
[ascii addObject:@"~"]; // 126 07E
[ascii addObject:@(127)];// 127 07F DEL
// 080
[ascii addObject:@"€"]; // 128 080
[ascii addObject:@(129)];// 129 081
[ascii addObject:@"‚"]; // 130 082
[ascii addObject:@"ƒ"]; // 131 083
[ascii addObject:@"„"]; // 132 084
[ascii addObject:@"…"]; // 133 085
[ascii addObject:@"†"]; // 134 086
[ascii addObject:@"‡"]; // 135 087
[ascii addObject:@"ˆ"]; // 136 088
[ascii addObject:@"‰"]; // 137 089
[ascii addObject:@"Š"]; // 138 08A
[ascii addObject:@"‹"]; // 139 08B
[ascii addObject:@"Œ"]; // 140 08C
[ascii addObject:@(141)];// 141 08D
[ascii addObject:@"Ž"]; // 142 08E
[ascii addObject:@(143)]; // 143 08F
// 090
[ascii addObject:@(144)];// 144 090
[ascii addObject:@"‘"]; // 145 091
[ascii addObject:@"’"]; // 146 092
[ascii addObject:@"“"]; // 147 093
[ascii addObject:@"”"]; // 148 094
[ascii addObject:@"•"]; // 149 095
[ascii addObject:@"–"]; // 150 096
[ascii addObject:@"—"]; // 151 097
[ascii addObject:@"˜"]; // 152 098
[ascii addObject:@"™"]; // 153 099
[ascii addObject:@"š"]; // 154 09A
[ascii addObject:@"›"]; // 155 09B
[ascii addObject:@"œ"]; // 156 09C
[ascii addObject:@(157)];// 157 09D
[ascii addObject:@"ž"]; // 158 09E
[ascii addObject:@"Ÿ"]; // 159 09F
// 0A0
[ascii addObject:@(160)];// 160 0A0
[ascii addObject:@"¡"]; // 161 0A1
[ascii addObject:@"¢"]; // 162 0A2
[ascii addObject:@"£"]; // 163 0A3
[ascii addObject:@"¤"]; // 164 0A4
[ascii addObject:@"¥"]; // 165 0A5
[ascii addObject:@"¦"]; // 166 0A6
[ascii addObject:@"§"]; // 167 0A7
[ascii addObject:@"¨"]; // 168 0A8
[ascii addObject:@"©"]; // 169 0A9
[ascii addObject:@"ª"]; // 170 0AA
[ascii addObject:@"«"]; // 171 0AB
[ascii addObject:@"¬"]; // 172 0AC
[ascii addObject:@(173)];// 173 0AD
[ascii addObject:@"®"]; // 174 0AE
[ascii addObject:@"¯"]; // 175 0AF
// 0B0
[ascii addObject:@"°"]; // 176 0B0
[ascii addObject:@"±"]; // 177 0B1
[ascii addObject:@"²"]; // 178 0B2
[ascii addObject:@"³"]; // 179 0B3
[ascii addObject:@"´"]; // 180 0B4
[ascii addObject:@"µ"]; // 181 0B5
[ascii addObject:@"¶"]; // 182 0B6
[ascii addObject:@"·"]; // 183 0B7
[ascii addObject:@"¸"]; // 184 0B8
[ascii addObject:@"¹"]; // 185 0B9
[ascii addObject:@"º"]; // 186 0BA
[ascii addObject:@"»"]; // 187 0BB
[ascii addObject:@"¼"]; // 188 0BC
[ascii addObject:@"½"]; // 189 0BD
[ascii addObject:@"¾"]; // 190 0BE
[ascii addObject:@"¿"]; // 191 0BF
// 0C0
[ascii addObject:@"À"]; // 192 0C0
[ascii addObject:@"Á"]; // 193 0C1
[ascii addObject:@"Â"]; // 194 0C2
[ascii addObject:@"Ã"]; // 195 0C3
[ascii addObject:@"Ä"]; // 196 0C4
[ascii addObject:@"Å"]; // 197 0C5
[ascii addObject:@"Æ"]; // 198 0C6
[ascii addObject:@"Ç"]; // 199 0C7
[ascii addObject:@"È"]; // 200 0C8
[ascii addObject:@"É"]; // 201 0C9
[ascii addObject:@"Ê"]; // 202 0CA
[ascii addObject:@"Ë"]; // 203 0CB
[ascii addObject:@"Ì"]; // 204 0CC
[ascii addObject:@"Í"]; // 205 0CD
[ascii addObject:@"Î"]; // 206 0CE
[ascii addObject:@"Ï"]; // 207 0CF
// 0D0
[ascii addObject:@"Ð"]; // 208 0D0
[ascii addObject:@"Ñ"]; // 209 0D1
[ascii addObject:@"Ò"]; // 210 0D2
[ascii addObject:@"Ó"]; // 211 0D3
[ascii addObject:@"Ô"]; // 212 0D4
[ascii addObject:@"Õ"]; // 213 0D5
[ascii addObject:@"Ö"]; // 214 0D6
[ascii addObject:@"×"]; // 215 0D7
[ascii addObject:@"Ø"]; // 216 0D8
[ascii addObject:@"Ù"]; // 217 0D9
[ascii addObject:@"Ú"]; // 218 0DA
[ascii addObject:@"Û"]; // 219 0DB
[ascii addObject:@"Ü"]; // 220 0DC
[ascii addObject:@"Ý"]; // 221 0DD
[ascii addObject:@"Þ"]; // 222 0DE
[ascii addObject:@"ß"]; // 223 0DF
// 0E0
[ascii addObject:@"à"]; // 224 0E0
[ascii addObject:@"á"]; // 225 0E1
[ascii addObject:@"â"]; // 226 0E2
[ascii addObject:@"ã"]; // 227 0E3
[ascii addObject:@"ä"]; // 228 0E4
[ascii addObject:@"å"]; // 229 0E5
[ascii addObject:@"æ"]; // 230 0E6
[ascii addObject:@"ç"]; // 231 0E7
[ascii addObject:@"è"]; // 232 0E8
[ascii addObject:@"é"]; // 233 0E9
[ascii addObject:@"ê"]; // 234 0EA
[ascii addObject:@"ë"]; // 235 0EB
[ascii addObject:@"ì"]; // 236 0EC
[ascii addObject:@"í"]; // 237 0ED
[ascii addObject:@"î"]; // 238 0EE
[ascii addObject:@"ï"]; // 239 0EF
// 0F0
[ascii addObject:@"ð"]; // 240 0F0
[ascii addObject:@"ñ"]; // 241 0F1
[ascii addObject:@"ò"]; // 242 0F2
[ascii addObject:@"ó"]; // 243 0F3
[ascii addObject:@"ô"]; // 244 0F4
[ascii addObject:@"õ"]; // 245 0F5
[ascii addObject:@"ö"]; // 246 0F6
[ascii addObject:@"÷"]; // 247 0F7
[ascii addObject:@"ø"]; // 248 0F8
[ascii addObject:@"ù"]; // 249 0F9
[ascii addObject:@"ú"]; // 250 0FA
[ascii addObject:@"û"]; // 251 0FB
[ascii addObject:@"ü"]; // 252 0FC
[ascii addObject:@"ý"]; // 253 0FD
[ascii addObject:@"þ"]; // 254 0FE
[ascii addObject:@"ÿ"]; // 255 0FF
NSInteger i;
for (i=0; i < self.length; i++) {
NSRange range;
range.location = i;
range.length = 1;
NSString *charString = [self substringWithRange:range];
for (NSInteger asciiIdx=0; asciiIdx < ascii.count; asciiIdx++) {
if ([charString isEqualToString:ascii[asciiIdx]]) {
unsigned char c = (unsigned char)asciiIdx;
output[i] = c;
break;
}
}
}
// Don't forget string termination
output[i] = 0;
return (const char*)&output[0];
}
@end