0

I have to decode file encoded in ASCII. However when I want to decode the file, i get a "?" instead of "é" that testify that the decoding didn't happen properly.

This is the code that I am using. Which condition should I had to properly encode ASCII ?

 - (void)_sniffEncoding {
    NSStringEncoding encoding = NSUTF8StringEncoding;
    
    uint8_t bytes[CHUNK_SIZE];
    NSInteger readLength = [_stream read:bytes maxLength:CHUNK_SIZE];
    if (readLength > 0 && readLength <= CHUNK_SIZE) {
        [_stringBuffer appendBytes:bytes length:readLength];
        [self setTotalBytesRead:[self totalBytesRead] + readLength];
    
    NSInteger bomLength = 0;
    
    if (readLength > 3 && bytes[0] == 0x00 && bytes[1] == 0x00 && bytes[2] == 0xFE && bytes[3] == 0xFF) {
        encoding = NSUTF32BigEndianStringEncoding;
        bomLength = 4;
    } else if (readLength > 3 && bytes[0] == 0xFF && bytes[1] == 0xFE && bytes[2] == 0x00 && bytes[3] == 0x00) {
        encoding = NSUTF32LittleEndianStringEncoding;
        bomLength = 4;
    } else if (readLength > 3 && bytes[0] == 0x1B && bytes[1] == 0x24 && bytes[2] == 0x29 && bytes[3] == 0x43) {
        encoding = CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingISO_2022_KR);
        bomLength = 4;
    } else if (readLength > 1 && bytes[0] == 0xFE && bytes[1] == 0xFF) {
        encoding = NSUTF16BigEndianStringEncoding;
        bomLength = 2;
    } else if (readLength > 1 && bytes[0] == 0xFF && bytes[1] == 0xFE) {
        encoding = NSUTF16LittleEndianStringEncoding;
        bomLength = 2;
    } else if (readLength > 2 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) {
        encoding = NSUTF8StringEncoding;
        bomLength = 3;
    } else {
        NSString *bufferAsUTF8 = nil;
        
        for (NSInteger triedLength = 0; triedLength < 4; ++triedLength) {
            bufferAsUTF8 = [[NSString alloc] initWithBytes:bytes length:readLength-triedLength encoding:NSUTF8StringEncoding];
            if (bufferAsUTF8 != nil) {
                break;
            }
        }
        
        if (bufferAsUTF8 != nil) {
            encoding = NSUTF8StringEncoding;
        } else {
            NSLog(@"unable to determine stream encoding; assuming MacOSRoman");
            encoding = NSMacOSRomanStringEncoding;
        }
    }
    
    if (bomLength > 0) {
        [_stringBuffer replaceBytesInRange:NSMakeRange(0, bomLength) withBytes:NULL length:0];
    }
}
_streamEncoding = encoding;
}
Blisko
  • 107
  • 2
  • 9
  • 1
    `é` cannot be encoded in ASCII. If you have é, then you don't have an ASCII file. You will need to determine the actual format. The most common is Latin1, but there are many encodings and different encodings are popular in different parts of the world. Use `[NSString initWithData:encoding:]` to decode it. The above code only tries to pick between UTF encodings and MacRoman. I don't know why you're using it if you know you have ASCII. (All ASCII is UTF-8, but not all UTF-8 is ASCII.) – Rob Napier Aug 01 '21 at 14:46
  • Does this answer your question? [NSString : easy way to remove UTF-8 accents from a string?](https://stackoverflow.com/questions/10932405/nsstring-easy-way-to-remove-utf-8-accents-from-a-string) – JosefZ Aug 01 '21 at 15:06

0 Answers0