2

I'm working on French String in .NET Decoding a Mail body , I receive "Chasn=C3=A9 sur illet" I would like to get "Chasné sur illet" and i don't find any solution aver 2 days web search.

C# ou VB.NET Can anyone helps me ?

thanks

  • Can you post the bit of your code where you set the string? – Robin Mackenzie May 31 '16 at 08:15
  • Hello,My string comes from an IMAP Server I read the message and Get the message Body Text with the IMAP COMMAND : FETCH BODY[TEXT] and it returns me a String coded in Quoted_printable Format And I don't find any idea to do an converter – Marc Collin Jun 02 '16 at 09:08
  • @MarcCollin See code below with full code. – ib11 Jun 03 '16 at 06:12

5 Answers5

6

Or the easiest of all, just use the QuotedPrintableDecoder from my MimeKit library:

static string DecodeQuotedPrintable (string input, string charset)
{
    var decoder = new QuotedPrintableDecoder ();
    var buffer = Encoding.ASCII.GetBytes (input);
    var output = new byte[decoder.EstimateOutputLength (buffer.Length)];
    int used = decoder.Decode (buffer, 0, buffer.Length, output);
    var encoding = Encoding.GetEncoding (charset);
    return encoding.GetString (output, 0, used);
}

Note that the other answers above assume the decoded content will be ASCII or UTF-8, but that isn't necessarily the case. You'll need to get the charset parameter from the Content-Type header of the MIME part that you are decoding.

Of course... if you don't know how to get that info, you could simply use my awesome MailKit library to get the MIME part from IMAP and have it do all of this work for you.

jstedfast
  • 35,744
  • 5
  • 97
  • 110
  • MimeKit looks great, but it expects to be able to load stream objects. I get why that's better overall, but AFAICT there's no function for using it with strings if your existing code base doesn't make it easy to drop MimeKit in at the file loading step. – TheAtomicOption Mar 21 '19 at 16:54
  • 1
    FWIW, not all email messages can be properly converted into a C# string. The problem is that email messages *can* have multiple text parts each using a different charset encoding which means that by converting it to a string, it will be corrupted. Just something for you to be aware of long-term. – jstedfast Mar 21 '19 at 16:59
1

This is UTF8 encoding.

Using this post:

http://www.dpit.co.uk/decoding-quoted-printable-email-in-c/

Here is the code (don't forget to accept the answer if helped):

using System;
using System.Text;
using System.Text.RegularExpressions;

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {

            Console.WriteLine(DecodeQuotedPrintable("Chasn=C3=A9 sur illet"));
            Console.ReadKey();
        }

        static string DecodeQuotedPrintable(string input)
        {
            var occurences = new Regex(@"(=[0-9A-Z][0-9A-Z])+", RegexOptions.Multiline);
            var matches = occurences.Matches(input);
            foreach (Match m in matches)
            {
                byte[] bytes = new byte[m.Value.Length / 3];
                for (int i = 0; i < bytes.Length; i++)
                {
                    string hex = m.Value.Substring(i * 3 + 1, 2);
                    int iHex = Convert.ToInt32(hex, 16);
                    bytes[i] = Convert.ToByte(iHex);
                }
                input = input.Replace(m.Value, Encoding.UTF8.GetString(bytes));
            }
            return input.Replace("=rn", "");
        }
    }
}
ib11
  • 2,530
  • 3
  • 22
  • 55
  • I used this, however there's a bug - the Regex needs to be `@"(=[0-9A-E][0-9A-E])+" as I found it was incorrectly matching on invalid characters like =CM – Tom Mar 14 '22 at 10:02
0

From : https://stackoverflow.com/a/36803911/6403521 My solution :

    [TestMethod]
    public void TestMethod1()
    {

        Assert.AreEqual("La Bouichère", quotedprintable("La Bouich=C3=A8re", "utf-8"));
        Assert.AreEqual("Chasné sur illet", quotedprintable("Chasn=C3=A9 sur illet", "utf-8"));
        Assert.AreEqual("é è", quotedprintable("=C3=A9 =C3=A8", "utf-8"));
    }
    private string quotedprintable(string pStrIn, string encoding)
    {
        String strOut = pStrIn.Replace("=\r\n", "");
        // Find the first =
        int position = strOut.IndexOf("=");
        while (position != -1)
        { 
            // String before the =
            string leftpart = strOut.Substring(0, position);
            // get the QuotedPrintable String in a ArrayList
            System.Collections.ArrayList hex = new System.Collections.ArrayList();
            // The first Part
            hex.Add(strOut.Substring(1 + position, 2));
            // Look for the next parts
            while (position + 3 < strOut.Length && strOut.Substring(position + 3, 1) == "=")
            {
                position = position + 3;
                hex.Add(strOut.Substring(1 + position, 2));
            }
            // In the hex Array, we have two items 
            // Convert using the GetEncoding Function
            byte[] bytes = new byte[hex.Count];
            for (int i = 0; i < hex.Count; i++)
            {
                bytes[i] = System.Convert.ToByte(new string(((string)hex[i]).ToCharArray()), 16);
            }
            string equivalent = System.Text.Encoding.GetEncoding(encoding).GetString(bytes);
            // Part of the orignal String after the last QP Symbol
            string rightpart = strOut.Substring(position + 3);
            // Re build the String
            strOut = leftpart + equivalent + rightpart;
            // find the new QP Position
            position = leftpart.Length + equivalent.Length;
            if (rightpart.Length == 0)
            {
                position = -1;
            }
            else
            {
                position = strOut.IndexOf("=", position + 1);
            }
        }
        return strOut;
    }
Community
  • 1
  • 1
0

We had an issue with this method - that it is VERY slow. The following enhanced performance A LOT

public static string FromMailTransferEncoding(this string messageText, Encoding enc, string transferEncoding)
{
    if (string.IsNullOrEmpty(transferEncoding)) 
        return messageText;

    if ("quoted-printable".Equals(transferEncoding.ToLower())) 
    {
        StringBuilder sb = new StringBuilder();               
        string delimitorRegEx = @"=[\r][\n]";
        string[] parts = Regex.Split(messageText, delimitorRegEx);

        foreach (string part in parts)
        {
            string subPart = part;
            Regex occurences = new Regex(@"(=[0-9A-Z][0-9A-Z])+", RegexOptions.Multiline);
            MatchCollection matches = occurences.Matches(subPart);

            foreach (Match m in matches)
            {
                byte[] bytes = new byte[m.Value.Length / 3];
                for (int i = 0; i < bytes.Length; i++)
                {
                    string hex = m.Value.Substring(i * 3 + 1, 2);
                    int iHex = Convert.ToInt32(hex, 16);
                    bytes[i] = Convert.ToByte(iHex);
                }

                subPart = occurences.Replace(subPart, enc.GetString(bytes), 1);
            }

            sb.Append(subPart);
        }
        return sb.ToString();
    }        
return messageText;
}
Michael
  • 3,093
  • 7
  • 39
  • 83
-1
    static string ConverFromHex(string source)
    {
        string target = string.Empty;

        int startPos = source.IndexOf('=', 0);
        int prevStartPos = 0;
        while (startPos >= 0)
        {
            // concat with substring from source
            target += source.Substring(prevStartPos, startPos - prevStartPos);

            // next offset
            startPos++;

            // update prev pos
            prevStartPos = startPos;

            // get substring
            string hexString = source.Substring(startPos, 2);

            // get int equiv
            int hexNum = 0;
            if (int.TryParse(hexString, System.Globalization.NumberStyles.AllowHexSpecifier, System.Globalization.CultureInfo.InvariantCulture, out hexNum))
            {
                // add to target string
                target += (char)hexNum;

                // add hex length
                prevStartPos += 2;
            }

            // next occurence
            startPos = source.IndexOf('=', startPos);
        }

        // add rest of source
        target += source.Substring(prevStartPos);

        return target;
    }
GuineaPig
  • 3
  • 4
  • 1
    This code does not handle the OP's string correctly, the result is "Chasné sur illet". Your code treats each hex number as a separate character, but "=C3=A9" is supposed to represent the single character "é". – Blackwood May 31 '16 at 17:11