0

I have a bunch of xml files that I need to deserialize and store in a database. I found a generic implementation of an XML deserilizer that does a great job. The only inconvenient is that I cannot deserialize the Celsius degree sign.

The following XML samples contain the Celsius degree sign, which I cannot deserialize:

<pt>
          <ch>1</ch>
          <type>Analog</type>
          <chtype>Temperature</chtype>
          <chunits>°C</chunits>
          <time>2020-05-03 22:10:00</time>
          <value>0</value>
        </pt>

or

 <pt>
          <ch>5</ch>
          <type>Analog</type>
          <chtype>Wind Direction</chtype>
          <chunits>°</chunits>
          <time>2020-05-03 22:10:00</time>
          <value>0</value>
        </pt>

My Deserializer implementation is the following:

public class XmlConvert
    {
        public static string SerializeObject<T>(T dataObject)
        {
            if (dataObject == null)
            {
                return string.Empty;
            }
            try
            {
                using (StringWriter stringWriter = new System.IO.StringWriter())
                {
                    var serializer = new XmlSerializer(typeof(T));
                    serializer.Serialize(stringWriter, dataObject);
                    return stringWriter.ToString();
                }
            }
            catch (Exception ex)
            {
                return string.Empty;
            }
        }

        public static T DeserializeObject<T>(string xml)
            where T : new()
        {
            if (string.IsNullOrEmpty(xml))
            {
                return new T();
            }
            try
            {
                using (var stringReader = new StringReader(xml))
                {
                    var serializer = new XmlSerializer(typeof(T));
                    return (T)serializer.Deserialize(stringReader);
                }
            }
            catch (Exception ex)
            {
                return new T();
            }
        }
    }

The class I use to convert is the following:

  /// <summary>
    /// Container for a single data point.
    /// </summary>
    public class MessageDataPoint
    {
        /// <summary>
        /// Channel number for data point.
        /// </summary>
        [System.Xml.Serialization.XmlElement("ch")]
        public string Ch { get; set; }

        /// <summary>
        /// Point type (Analog, Flow)
        /// </summary>
        [System.Xml.Serialization.XmlElement("type")]
        public string Type { get; set; }

        /// <summary>
        /// 
        /// </summary>
        [System.Xml.Serialization.XmlElement("chtype")]
        public string ChType { get; set; }

        /// <summary>
        /// Channel units
        /// </summary>
        [System.Xml.Serialization.XmlElement("chunits")]
        public string ChUnits { get; set; }

        /// <summary>
        /// Point timestamp. 
        /// </summary>
        [System.Xml.Serialization.XmlElement("time")]
        public string Time { get; set; }

        [System.Xml.Serialization.XmlIgnore]
        public DateTime? TimeParsed
        {
            get
            {
                DateTime.TryParse(Time, CultureInfo.InvariantCulture, DateTimeStyles.None, out var dt);
                return dt;
            }
            private set { }
        }

        /// <summary>
        /// Channel Value
        /// </summary>
        [System.Xml.Serialization.XmlElement("value")]
        public string Value { get; set; }

    }

The deserializer implementation is the following:

 public async Task<string> GetFileAsStream(string key)
 {
            
 var content = string.Empty;
            
try
{
   GetObjectRequest request = new GetObjectRequest
                {
                    BucketName = bucketName,
                    Key = key
                };

   using (GetObjectResponse response = await client.GetObjectAsync(request))
   {
    using (StreamReader reader = new StreamReader(response.ResponseStream))
    {
        content = await reader.ReadToEndAsync();
                        
        var logger = XmlConvert.DeserializeObject<Logger>(content);

        // code removed for brevity
     }
  }
 }
}

The result is the following:

<pt>
          <ch>1</ch>
          <type>Analog</type>
          <chtype>Temperature</chtype>
          <chunits>�C</chunits>
          <time>2020-05-03 22:10:00</time>
          <value>0</value>
        </pt>
        <pt>
          <ch>2</ch>
          <type>Analog</type>
          <chtype>Wind Chill</chtype>
          <chunits>�C</chunits>
          <time>2020-05-03 22:10:00</time>
          <value>0</value>
        </pt>

Any ideas?

This is a sample file:

<?xml version="1.0" encoding="ISO-8859-15"?>
<logger>
   <id>111111</id>
   <mobileNumber>12345676</mobileNumber>
   <serialNumber>01404</serialNumber>
   <siteName>abcdef</siteName>
   <siteId>abcdef</siteId>
   <maintenanceflag>False</maintenanceflag>
   <messages>
     <message>
       <id>123456789</id>
       <number>123456</number>
       <dateReceived>2020-05-04 00:02:25.0</dateReceived>
       <dateCredited>2020-05-04 00:02:25.0</dateCredited>
       <message>        <siteId>abcdef</siteId>
        <type>data</type>
        <RST>2020-01-18 08:50:00</RST>
        <RTC>2020-05-03 23:02:24</RTC>
        <DST>2020-05-03 22:00:00</DST>
        <mode>0</mode>
        <SR>600</SR>
        <pt>
          <ch>1</ch>
          <type>Analog</type>
          <chtype>Temperature</chtype>
          <chunits>°C</chunits>
          <time>2020-05-03 23:00:00</time>
          <value>0</value>
        </pt>
      
        <pt>
          <ch>5</ch>
          <type>Analog</type>
          <chtype>Wind Direction</chtype>
          <chunits>°</chunits>
          <time>2020-05-03 23:00:00</time>
          <value>0</value>
        </pt>
     
     
      </message>
       <format>BINARY</format>
       <source>FTP</source>
       <batteryCondition>123</batteryCondition>
       <signalStrength>12</signalStrength>
    </message>
  </messages>
</logger>
  • 1
    What is the error you are getting? – Alexander Haas Aug 07 '20 at 10:55
  • 2
    Do you have the correct character set and XML doctype to make the "°"-sign a valid character? – Alexander Haas Aug 07 '20 at 10:56
  • Hi Alexander. Thanks for looking into it. I dont't get any errors. I have this encoding on xml – Enrico Acampora Aug 07 '20 at 11:11
  • 1
    `StreamReader` does not do encoding detection, and its encoding defaults to UTF-8. Your best bet is probably to read the file as ascii, from the `"'`, get the `encoding=` bit out of it, and use that to parse the whole file with `StreamReader`. – Nyerguds Aug 07 '20 at 11:22
  • Actually, since you're starting from a Stream... first receive the whole thing as bytes. That'll make it a whole lot easier to do any kind of detections on the content. – Nyerguds Aug 07 '20 at 11:29
  • Hi @Nyerguds. Thanks for looking into it. I will try as you suggested. – Enrico Acampora Aug 07 '20 at 11:39
  • Small side note on `DeserializeObject`: isn't it more logical for a failed deserialisation, if you're not actually handling and logging the exception, to at least return null to indicate the result isn't actually the deserialised document? Returning a new pristine object seems like a scarily silent way to fail, and a cause for massive headaches in debugging if anything goes wrong in that part later. In fact, a catch-all `catch (Exception)` is almost _never_ a good idea. – Nyerguds Aug 19 '20 at 08:29

2 Answers2

1

Feed the stream to the serializer. This will greatly simplify the code.
Moreover, you can completely delete your XmlConvert class.

using (GetObjectResponse response = await client.GetObjectAsync(request))
using (var stream = response.ResponseStream)
{
    var serializer = new XmlSerializer(typeof(Logger));
    var logger = (Logger)serializer.DeserializeObject(stream);
}

But if you think that the XmlConvert class is necessary, for example, it does some extra work, then at least use Stream in it instead of string.

public static T DeserializeObject<T>(Stream stream)
    where T : new()
{
    try
    {
        var serializer = new XmlSerializer(typeof(T));
        return (T)serializer.Deserialize(stream);
    }
    catch (Exception ex)
    {
        return new T();
    }
}
Alexander Petrov
  • 13,457
  • 2
  • 20
  • 49
0

I found the solution by looking at the following post on Stackoverflow

Using .NET how to convert ISO 8859-1 encoded text files that contain Latin-1 accented characters to UTF-8

Thanks to Nyerguds and Alexander Haas for pointing me in the right direction

The solution for me was

public async Task<string> GetFileAsStream(string key)
 {
            
 var content = string.Empty;
            
try
{
   GetObjectRequest request = new GetObjectRequest
                {
                    BucketName = bucketName,
                    Key = key
                };

   using (GetObjectResponse response = await client.GetObjectAsync(request))
   {
    using (StreamReader reader = new StreamReader(response.ResponseStream, Encoding.GetEncoding("iso-8859-1")))
    {
        content = await reader.ReadToEndAsync();
                        
        var logger = XmlConvert.DeserializeObject<Logger>(content);

        // code removed for brevity
     }
  }
 }
}
  • A link to a solution is welcome, but please ensure your answer is useful without it: [add context around the link](//meta.stackexchange.com/a/8259) so your fellow users will have some idea what it is and why it’s there, then quote the most relevant part of the page you're linking to in case the target page is unavailable. [Answers that are little more than a link may be deleted.](//stackoverflow.com/help/deleted-answers) –  Aug 07 '20 at 11:57
  • You don't need to convert stuff between text encodings. You just need to make sure it gets interpreted using the encoding set in the xml header in the first place. – Nyerguds Aug 07 '20 at 11:59
  • Thanks @Daniil for your feedback. I have updated my answer as you suggested – Enrico Acampora Aug 07 '20 at 13:05