This took far longer than I thought it would.
My final approach converts the Unicode escape sequences to their UTF8-equivalent bytes, encodes those as percent-escaped sequences, and then uses [NSString stringByRemovingPercentEncoding]
to yield the actual Unicode char.
You have to perform the bit shifting manually to complete the conversion, but the rest is trivial after that. For reference, you can look at this example gist, which converts using the decimal codepoint-equivalent for Unicode monospace characters after resolving that value from a given ASCII-equivalent character in a larger string. The logic is written in JXA-ObjectiveC, but carries over perfectly into Objective-C.
Update:
Here's what a native Objective-C implementation would look like:
NSString* escapedUnicharToString(NSString* escapedUnicharString) {
const char * escapedUnicharCString = [escapedUnicharString UTF8String];
/// Marshal the hex chars to byte values.
char unicharBytes[8] = {'\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0'};
unicharBytes[0] = escapedUnicharCString[2];
unicharBytes[1] = escapedUnicharCString[3];
unicharBytes[2] = escapedUnicharCString[4];
unicharBytes[3] = escapedUnicharCString[5];
unicharBytes[4] = escapedUnicharCString[6];
unicharBytes[5] = escapedUnicharCString[7];
unicharBytes[6] = escapedUnicharCString[8];
unicharBytes[7] = escapedUnicharCString[9];
/// Convert the marshaled bytes to their unichar-equivalent
/// (`unsigned long`) value.
unsigned long unicharIndex = strtoul(unicharBytes, NULL, 16);
/// Convert the `unsigned long` to a binary string.
NSMutableString *unicharBinaryString = [NSMutableString new];
while (unicharIndex > 0) {
unsigned long remainder = unicharIndex % 2;
[unicharBinaryString appendFormat:@"%lu", remainder];
unicharIndex /= 2;
}
/// Use the conversion mask for the *last* series of unicode chars.
///
/// **Note**
/// This must change if you're converting in a different range of
/// unicode characters.
///
/// See https://stackoverflow.com/a/6240184/12770455 for a list of
/// alternate conversion masks.
///
NSString *conversionMask = @"11110xxx10xxxxxx10xxxxxx10xxxxxx";
NSMutableString *utf8BinaryString = [NSMutableString new];
unsigned long utf8Offset = 0;
/// Use the conversion mask to drop bits from the unichar binary
/// string — beginning at the rightmost position and moving
/// leftwards.
///
/// The unichar binary string is reversed, so we'll fill-in
/// each char using index 0.
for (unsigned long i = [conversionMask length]; i > 0; i--) {
unichar bit = [conversionMask characterAtIndex: (i - 1)];
/// Fill-in the "x" characters from the unichar binary string.
if (bit == 'x') {
/// Fill with "0" when no chars remain.
if (utf8Offset == [unicharBinaryString length]) {
[utf8BinaryString insertString:@"0" atIndex:0];
continue;
}
/// Fill with the current unichar binary offset char.
[utf8BinaryString insertString:[NSString stringWithFormat:@"%c",
[unicharBinaryString characterAtIndex: utf8Offset]] atIndex:0];
utf8Offset++;
continue;
}
/// Fill with the conversion mask's char.
[utf8BinaryString insertString:[NSString stringWithFormat:@"%c", bit] atIndex:0];
}
/// Convert the UTF8-equivalent binary into its decimal-equivalent value.
int decimal = 0;
for (NSUInteger i = 0; i < [utf8BinaryString length]; i++) {
unichar character = [utf8BinaryString characterAtIndex:i];
int bit = character - '0';
decimal = (decimal * 2) + bit;
}
/// Convert the UTF8-equivalent decimal to its hex-string equivalent.
NSMutableString *percentEncoded = [NSMutableString stringWithFormat:@"%08X", decimal];
/// Insert percent chars before each hex-represented byte.
[percentEncoded insertString:@"%" atIndex:0];
[percentEncoded insertString:@"%" atIndex:3];
[percentEncoded insertString:@"%" atIndex:6];
[percentEncoded insertString:@"%" atIndex:9];
/// Decode the percent-encoded UTF8 char, and return to caller.
return [percentEncoded stringByRemovingPercentEncoding];
}
int main(int argc, const char * argv[]) {
@autoreleasepool {
NSLog(@"%@", escapedUnicharToString(@"\\U0001D670"));
}
return 0;
}