38

I have this extension method

    public static string SerializeObject<T>(this T value)
    {
        var serializer = new XmlSerializer(typeof(T));           
        var settings = new XmlWriterSettings
                       {
                        Encoding = new UTF8Encoding(true), 
                        Indent = false, 
                        OmitXmlDeclaration = false,
                        NewLineHandling = NewLineHandling.None
                       };

        using(var stringWriter = new StringWriter()) 
        {
            using(var xmlWriter = XmlWriter.Create(stringWriter, settings)) 
            {
                serializer.Serialize(xmlWriter, value);
            }

            return stringWriter.ToString();
        }
    }

but whenever I call this it has an encoding of utf-16 specified, ie <?xml version="1.0" encoding="utf-16"?>. What am I doing wrong?

Cœur
  • 37,241
  • 25
  • 195
  • 267
Glenn Slaven
  • 33,720
  • 26
  • 113
  • 165
  • possible duplicate of [How to put an encoding attribute to xml other that utf-16 with XmlWriter?](http://stackoverflow.com/questions/427725/how-to-put-an-encoding-attribute-to-xml-other-that-utf-16-with-xmlwriter) – Glenn Slaven Feb 28 '12 at 00:24
  • I've realised that this is a dupe, so vote to close – Glenn Slaven Feb 28 '12 at 00:25

6 Answers6

26

Strings are UTF-16, so writing to a StringWriter will always use UTF-16. If that's not what you want, then use some other TextWriter derived class, with the encoding you like.

John Saunders
  • 160,644
  • 26
  • 247
  • 397
  • 12
    Shakes head. So what's the point then of having an encoding property on the XmlWriterSettings. Yes a string is UTF-16, but if we're serializing to a string, its because we're about to write to a file or something, and we want the encoding attribute of xml declaration to match the real encoding of the file we create, and this is unlikely to be UTF-16. – bbsimonbb Aug 30 '16 at 08:38
  • @user don't serialize to a string go directly to a Stream. – John Saunders Aug 30 '16 at 08:42
  • OK. That makes more sense. – bbsimonbb Aug 30 '16 at 08:46
  • Yeah, you're using `StringWriter`, so by default it's Unicode (UTF-16) . If I do `using(var xmlWriter = XmlWriter.Create("MyFile.xml", settings)` and do a manual `xmlWriter.WriteStartElement("SomeRootElement"); xmlWriter.WriteEndElement();`, then load it back in: `XmlDocument xml = new XmlDocument(); xml.Load("MyFile.xml"); byte[] bytes = Encoding.Default.GetBytes(xml.OuterXml); string xmlDoc = Encoding.Default.GetString(bytes);`, it is UTF-8 - just FYI that you can dump it to a file and read it back in, then delete the file. Alternatively, use `StringWriter`, just specify `Encoding.UTF8`. – vapcguy Jan 09 '17 at 18:13
24

As far as I know, StringWriter class will always use UTF 16 encoding when serializing to string. You can write your own override class that accepts a different encoding:

public class StringWriterWithEncoding : StringWriter
{
    private readonly Encoding _encoding;

    public StringWriterWithEncoding()
    {
    }

    public StringWriterWithEncoding(IFormatProvider formatProvider)
        : base(formatProvider)
    {
    }

    public StringWriterWithEncoding(StringBuilder sb)
        : base(sb)
    {
    }

    public StringWriterWithEncoding(StringBuilder sb, IFormatProvider formatProvider)
        : base(sb, formatProvider)
    {
    }


    public StringWriterWithEncoding(Encoding encoding)
    {
        _encoding = encoding;
    }

    public StringWriterWithEncoding(IFormatProvider formatProvider, Encoding encoding)
        : base(formatProvider)
    {
        _encoding = encoding;
    }

    public StringWriterWithEncoding(StringBuilder sb, Encoding encoding)
        : base(sb)
    {
        _encoding = encoding;
    }

    public StringWriterWithEncoding(StringBuilder sb, IFormatProvider formatProvider, Encoding encoding)
        : base(sb, formatProvider)
    {
        _encoding = encoding;
    }

    public override Encoding Encoding
    {
        get { return (null == _encoding) ? base.Encoding : _encoding; }
    }
}

So you can use this instead:

using(var stringWriter = new StringWriterWithEncoding( Encoding.UTF8))
{
   ...
}
Bojin Li
  • 5,769
  • 2
  • 24
  • 37
4

As the accepted answer says, StringWriter is UTF-16 (Unicode) by default and design. If you want to do it by getting a UTF-8 string in the end, there are 2 ways I can give you to get it done:

Solution #1 (not very efficient, bad practice, but gets the job done): Dump it to a text file and read it back in, delete the file (probably only suitable for small files, if you even want to do this at all - just wanted to show it could be done!)

public static string SerializeObject<T>(this T value)
{
    var serializer = new XmlSerializer(typeof(T));           
    var settings = new XmlWriterSettings
                   {
                    Encoding = new UTF8Encoding(true), 
                    Indent = false, 
                    OmitXmlDeclaration = false,
                    NewLineHandling = NewLineHandling.None
                   };


    using(var xmlWriter = XmlWriter.Create("MyFile.xml", settings)) 
    {
        serializer.Serialize(xmlWriter, value);
    }

    XmlDocument xml = new XmlDocument();
    xml.Load("MyFile.xml");
    byte[] bytes = Encoding.UTF8.GetBytes(xml.OuterXml);        
    File.Delete("MyFile.xml");

    return Encoding.UTF8.GetString(bytes);

}

Solution #2 (better, easier, more elegant solution!): Do it like you have it, using StringWriter, but use its Encoding property to set it to UTF-8:

public static string SerializeObject<T>(this T value)
{
    var serializer = new XmlSerializer(typeof(T));           
    var settings = new XmlWriterSettings
                   {
                    Encoding = new UTF8Encoding(true), 
                    Indent = false, 
                    OmitXmlDeclaration = false,
                    NewLineHandling = NewLineHandling.None
                   };

    using(var stringWriter = new UTF8StringWriter())
    {
        using(var xmlWriter = XmlWriter.Create(stringWriter, settings)) 
        {
            serializer.Serialize(xmlWriter, value);
        }

        return stringWriter.ToString();
    }
}

public class UTF8StringWriter : StringWriter
{
    public override Encoding Encoding
    {
        get
        {
            return Encoding.UTF8;
        }
    }
}
vapcguy
  • 7,097
  • 1
  • 56
  • 52
4

as @john-saunders mentioned in his answer:

StringWriter will always use UTF-16

So i used MemoryStream for this purposes.

In my case i using windows-1251 encoding.

var xmlSstring = "";
using (var ms = new MemoryStream())
{
    var encoding = Encoding.GetEncoding(1251);
    var settings = new XmlWriterSettings
    {
        Indent = true,
        Encoding = encoding
    };

    using (var xmlTextWriter = XmlWriter.Create(ms, settings))
    {
        doc.Save(xmlTextWriter);
        xmlString = encoding.GetString(ms.ToArray());
    }
}
aleha_84
  • 8,309
  • 2
  • 38
  • 46
1

You should derive a new class from StringWriter which has an overriden encoding property.

ahmet
  • 646
  • 6
  • 14
0

If you do not want to use a class that derives from StringWriter, then in your case, you could simply set the OmitXmlDeclaration to false and declare your own, just as I do below:

 public static string Serialize<T>(this T value, string xmlDeclaration = "<?xml version=\"1.0\"?>") where T : class, new()
        {
            if (value == null) return string.Empty;

            using (var stringWriter = new StringWriter())
            {
                var settings = new XmlWriterSettings
                {
                    Indent = true,
                    OmitXmlDeclaration = xmlDeclaration != null,
                };

                using (var xmlWriter = XmlWriter.Create(stringWriter, settings))
                {
                    var xmlSerializer = new XmlSerializer(typeof(T));

                    xmlSerializer.Serialize(xmlWriter, value);

                    var sb = new StringBuilder($"{Environment.NewLine}{stringWriter}");

                    sb.Insert(0, xmlDeclaration);

                    return sb.ToString();
                }
            }
Phil C
  • 3,687
  • 4
  • 29
  • 51