As I tried to explain in a comment, the real problem here is that you have a %-encoded string value, but using a different encoding to what you expected; to fix this, you need to:
- identify the %-encoded tokens in the source data
- parse out the bytes from the source %-encoded blocks
- decode those bytes using the source encoding
- re-encode those bytes using the destination encoding
- re-apply %-encoding of those bytes
- substitute those values back into the original string
For example (which changes "C:/Users/%C5%92ser"
to "C:/Users/%8Cser"
):
using System;
using System.Text;
using System.Text.RegularExpressions;
static class P
{
static void Main()
{
var result = RewriteUrlPercentEncoding("C:/Users/%C5%92ser",
Encoding.UTF8, Encoding.GetEncoding(1252));
Console.WriteLine(result);
}
static string RewriteUrlPercentEncoding(string value, Encoding from, Encoding to)
=> Regex.Replace(value, @"(\%[0-9a-fA-F]{2})+", match => // #1
{
var s = match.Value;
// #2
var bytes = new byte[s.Length / 3];
for (int i = 0; i < bytes.Length; i++)
{
byte hi = ParseNibble(s[(i * 3) + 1]),
lo = ParseNibble(s[(i * 3) + 2]);
bytes[i] = (byte)((hi << 4) | lo);
}
// #3 and #4
var reencoded = to.GetBytes(from.GetString(bytes));
// #5
var chars = new char[3 * reencoded.Length];
int index = 0;
for (int i = 0; i < reencoded.Length; i++)
{
var b = reencoded[i];
chars[index++] = '%';
chars[index++] = WriteNibble((byte)(b >> 4));
chars[index++] = WriteNibble((byte)(b & 0b1111));
}
// #6
return new string(chars);
static byte ParseNibble(char c) => c switch
{
'0' => 0x0,
'1' => 0x1,
'2' => 0x2,
'3' => 0x3,
'4' => 0x4,
'5' => 0x5,
'6' => 0x6,
'7' => 0x7,
'8' => 0x8,
'9' => 0x9,
'A' => 0xA,
'B' => 0xB,
'C' => 0xC,
'D' => 0xD,
'E' => 0xE,
'F' => 0xF,
'a' => 0xA,
'b' => 0xB,
'c' => 0xC,
'd' => 0xD,
'e' => 0xF,
'f' => 0xF,
_ => throw new ArgumentOutOfRangeException(nameof(c)),
};
static char WriteNibble(byte b) => b switch
{
0x0 => '0',
0x1 => '1',
0x2 => '2',
0x3 => '3',
0x4 => '4',
0x5 => '5',
0x6 => '6',
0x7 => '7',
0x8 => '8',
0x9 => '9',
0xA => 'A',
0xB => 'B',
0xC => 'C',
0xD => 'D',
0xE => 'E',
0xF => 'F',
_ => throw new ArgumentOutOfRangeException(nameof(b)),
};
});
}
Note that the above is intended for simplicity rather than efficiency; for high volume work, there are many ways to improve this.
Similarly, reversing the encodings allows us to get from things like "C:/Users/%DCser"
to "C:/Users/%C3%9Cser"
:
var result = RewriteUrlPercentEncoding("C:/Users/%DCser",
Encoding.GetEncoding(1252), Encoding.UTF8);
Console.WriteLine(result);