-2

I have a problem when trying to read a file. The content of the XML file has invalid characters. The character is '&'.

I tried to resolve the problem by replacing the invalid characters and with try/catch but I have other exception, because the file is in use by another program.

Below is an example of my XML:

<?xml version="1.0" encoding="ISO-8859-1"?>
<ALABAMAOGANYWHERE file="" LSP="73089617000107" datetime="20170201|094839" records="1">
    <NEWSHIPMENT id="701781" type="OUT&BOUND" dtRequest="20170201|000000">
        <ORIGIN name="Sousas & Sousas LTDA" address="RUA PAUL GROUND , 1415" country="BRASIL" state="PR" city="CURITIBA" cdPostal="81460040"/>  
    </NEWSHIPMENT> 
</ALABAMAOGANYWHERE>

This is how I try to read it:

    public void lerArquivoXML()
    {

        string[] arquivos = Directory.GetFiles(@"C:\Users\la\Documents\teste");

        foreach (string arq in arquivos)
        {             
            XmlDocument doc = new XmlDocument();
            try
            {
                doc.Load(arq);
            }
            catch (System.Xml.XmlException)
            {
                //replace---------------------------------------------------------------------------
                Replace replace = new Replace();
                replace.ReplaceTextInFile(arq, arq, "&", "");
                doc.Load(arq);
                //end_replace------------------------------------------------------------------------
           }

            String ORIGIN_Name, ORIGIN_Address, ORIGIN_Country, ORIGIN_State, ORIGIN_City, ORIGIN_cdPostal, ORIGIN_cnpj, ORIGIN_IE, DESTINY_Name, DESTINY_Address, DESTINY_Country, DESTINY_State, DESTINY_City, DESTINY_CD_Postal, DESTINY_CNPJ, DESTINY_IE, DESTINY_PHONE, DESTINY_PHONE2, MODAL, DELLINSTRUCTION, SALESREPMAIL, SALESREPCODIGO, NF_VALUE, NF_DAISSUE, NFServico, PO, DESCRIPTION_VALUE, PRODUCT_PAYMENTCC, PRODUCT_LOB, PRODUCT_TYPE, PRODUCT_qtTotal, PRODUCT_wgTotal, PRODUCT_vlTotal, DELIVERYPLUS_dpservicename, DELIVERYPLUS_dpcomplementaryinfo, NFSerie, SEGMENT, SA3, EDD, SHIFT_VALUE, Customer_Number;
            ORIGIN_Name = ORIGIN_Address = ORIGIN_Country = ORIGIN_State = ORIGIN_City = ORIGIN_cdPostal = ORIGIN_cnpj = ORIGIN_IE = DESTINY_Name = DESTINY_Address = DESTINY_Country = DESTINY_State = DESTINY_City = DESTINY_CD_Postal = DESTINY_CNPJ = DESTINY_IE = DESTINY_PHONE = DESTINY_PHONE2 = MODAL = DELLINSTRUCTION = SALESREPMAIL = SALESREPCODIGO = NF_VALUE = NF_DAISSUE = NFServico = PO = DESCRIPTION_VALUE = PRODUCT_PAYMENTCC = PRODUCT_LOB = PRODUCT_TYPE = PRODUCT_qtTotal = PRODUCT_wgTotal = PRODUCT_vlTotal = DELIVERYPLUS_dpservicename = DELIVERYPLUS_dpcomplementaryinfo = NFSerie = SEGMENT = SA3 = EDD = SHIFT_VALUE = Customer_Number = "";

            try
            {
                XmlNodeList node = doc.GetElementsByTagName("NEWSHIPMENT");

                foreach (XmlElement nodo in node)
                {
                    ORIGIN_Name = nodo.GetElementsByTagName("ORIGIN")[0].Attributes["name"].Value;
                    ORIGIN_Address = nodo.GetElementsByTagName("ORIGIN")[0].Attributes["address"].Value;
                    ORIGIN_Country = nodo.GetElementsByTagName("ORIGIN")[0].Attributes["country"].Value;
                    ORIGIN_State = nodo.GetElementsByTagName("ORIGIN")[0].Attributes["state"].Value;
                    ORIGIN_City = nodo.GetElementsByTagName("ORIGIN")[0].Attributes["city"].Value;
                    ORIGIN_cdPostal = nodo.GetElementsByTagName("ORIGIN")[0].Attributes["cdPostal"].Value;
                    ORIGIN_cnpj = nodo.GetElementsByTagName("ORIGIN")[0].Attributes["cnpj"].Value;
                    ORIGIN_IE = nodo.GetElementsByTagName("ORIGIN")[0].Attributes["ie"].Value;
                    DESTINY_Name = nodo.GetElementsByTagName("DESTINY")[0].Attributes["name"].Value;
                    DESTINY_Address = nodo.GetElementsByTagName("DESTINY")[0].Attributes["address"].Value;
                    DESTINY_Country = nodo.GetElementsByTagName("DESTINY")[0].Attributes["country"].Value;
                    DESTINY_State = nodo.GetElementsByTagName("DESTINY")[0].Attributes["state"].Value;
                    DESTINY_City = nodo.GetElementsByTagName("DESTINY")[0].Attributes["city"].Value;
                    DESTINY_CD_Postal = nodo.GetElementsByTagName("DESTINY")[0].Attributes["cdPostal"].Value;
                    DESTINY_CNPJ = nodo.GetElementsByTagName("DESTINY")[0].Attributes["cnpj"].Value;
                    DESTINY_IE = nodo.GetElementsByTagName("DESTINY")[0].Attributes["ie"].Value;
                    DESTINY_PHONE = nodo.GetElementsByTagName("DESTINY")[0].Attributes["phone"].Value;
                    DESTINY_PHONE2 = nodo.GetElementsByTagName("DESTINY")[0].Attributes["phone2"].Value;
                    DESTINY_PHONE = nodo.GetElementsByTagName("DESTINY")[0].Attributes["phone"].Value;
                    DESTINY_PHONE2 = nodo.GetElementsByTagName("DESTINY")[0].Attributes["phone2"].Value;
                    MODAL = nodo.GetElementsByTagName("MODAL")[0].Attributes["value"].Value;
                    DELLINSTRUCTION = nodo.GetElementsByTagName("DELLINSTRUCTION")[0].Attributes["value"].Value;
                    SALESREPMAIL = nodo.GetElementsByTagName("SALESREPMAIL")[0].Attributes["value"].Value;
                    SALESREPCODIGO = nodo.GetElementsByTagName("SALESREPCODIGO")[0].Attributes["value"].Value;


                    XmlNodeList node_product = (nodo).GetElementsByTagName("PRODUCTS");

                    foreach (XmlElement nodo_product in node_product)
                    {
                        NF_VALUE = nodo_product.GetElementsByTagName("NF")[0].Attributes["value"].Value;
                        NF_DAISSUE = nodo_product.GetElementsByTagName("NF")[0].Attributes["daIssue"].Value;
                        PO = nodo_product.GetElementsByTagName("PO")[0].Attributes["value"].Value;
                        DESCRIPTION_VALUE = nodo_product.GetElementsByTagName("DESCRIPTION")[0].Attributes["value"].Value;
                        PRODUCT_PAYMENTCC = nodo_product.GetElementsByTagName("PRODUCT")[0].Attributes["paymentcc"].Value;
                        PRODUCT_LOB = nodo_product.GetElementsByTagName("PRODUCT")[0].Attributes["lob"].Value;
                        PRODUCT_TYPE = nodo_product.GetElementsByTagName("PRODUCT")[0].Attributes["type"].Value;
                        PRODUCT_qtTotal = nodo_product.GetElementsByTagName("PRODUCT")[0].Attributes["qtTotal"].Value;
                        PRODUCT_wgTotal = nodo_product.GetElementsByTagName("PRODUCT")[0].Attributes["wgTotal"].Value;
                        PRODUCT_vlTotal = nodo_product.GetElementsByTagName("PRODUCT")[0].Attributes["vlTotal"].Value;
                        DELIVERYPLUS_dpservicename = nodo_product.GetElementsByTagName("DELIVERYPLUS")[0].Attributes["dpservicename"].Value;
                        DELIVERYPLUS_dpcomplementaryinfo = nodo_product.GetElementsByTagName("DELIVERYPLUS")[0].Attributes["dpcomplementaryinfo"].Value;
                        NFSerie = nodo_product.GetElementsByTagName("NFSerie")[0].Attributes["value"].Value;
                        SEGMENT = nodo_product.GetElementsByTagName("SEGMENT")[0].Attributes["value"].Value;
                        SA3 = nodo_product.GetElementsByTagName("SA3")[0].Attributes["value"].Value;
                        EDD = nodo_product.GetElementsByTagName("EDD")[0].Attributes["value"].Value;
                        SHIFT_VALUE = nodo_product.GetElementsByTagName("SHIFT")[0].Attributes["value"].Value;
                    }
                }
            }
            catch (ArgumentOutOfRangeException) { }
            counterXML++;
            try
            {
                dadosXML.GravarXML(ORIGIN_Name, ORIGIN_Address, ORIGIN_Country, ORIGIN_State, ORIGIN_City, ORIGIN_cdPostal, ORIGIN_cnpj, ORIGIN_IE, DESTINY_Name, DESTINY_Address, DESTINY_Country, DESTINY_State, DESTINY_City,
                                           DESTINY_CD_Postal, DESTINY_CNPJ, DESTINY_IE, DESTINY_PHONE, DESTINY_PHONE2, MODAL,
                                           DELLINSTRUCTION, SALESREPMAIL, SALESREPCODIGO, NF_VALUE, NF_DAISSUE, NFServico, PO,
                                           DESCRIPTION_VALUE, PRODUCT_PAYMENTCC, PRODUCT_LOB, PRODUCT_TYPE, PRODUCT_qtTotal, PRODUCT_wgTotal,
                                           PRODUCT_vlTotal, DELIVERYPLUS_dpservicename, DELIVERYPLUS_dpcomplementaryinfo, NFSerie, SEGMENT, SA3, EDD, SHIFT_VALUE, Customer_Number);
            }
            catch (Exception ex)
            {

            }

            System.Console.WriteLine("There were {0} lines,", counterXML);
        }
    }

Class Replace

class Replace
{

    #region
    public void ReplaceTextInFile(string originalFile, string outputFile, string searchTerm, string replaceTerm)
    {
        string tempLineValue;
        using (FileStream inputStream = File.OpenRead(originalFile))
        {
            using (StreamReader inputReader = new StreamReader(inputStream))
            {
                using (StreamWriter outputWriter = File.AppendText(outputFile))
                {
                    while (null != (tempLineValue = inputReader.ReadLine()))
                    {
                        outputWriter.WriteLine(tempLineValue.Replace(searchTerm, replaceTerm));
                    }                     
                    outputWriter.Close();
                }
            }
        }
    }
    #endregion
}
Manfred Radlwimmer
  • 13,257
  • 13
  • 53
  • 62
  • See this: http://stackoverflow.com/questions/12524908/how-to-escape-in-xml – VDN Feb 07 '17 at 11:18
  • 2
    The problem is, what you're working with *isn't* XML. It's text that looks somewhat like XML. If at all possible, work with whatever is *generating* this text so that it starts generating XML rather than XML-ish. Some issues may be solvable by simple text replacement but if they're not escaping properly, sooner or later you'll probably get examples that it's just plain not easy to write a *program* that can recover the data and make it parsable as XML. There's a *reason* for XML escaping. – Damien_The_Unbeliever Feb 07 '17 at 11:21

1 Answers1

0

Just try

doc.LoadXml(File.ReadAllText(arq).Replace("&", "&amp;"));

instead of

doc.Load(arq);

inside your try block

Rifky
  • 1,444
  • 11
  • 26
  • 4
    this might lead you to some unwanted result, like if there is &, or < or > in xml file, you will get something like &amp; – VDN Feb 07 '17 at 11:23
  • yes but in the example he has only &. and in the sample the xml should not have a single & if the xml is written properly. it should have an & instead of & – Rifky Feb 07 '17 at 11:28
  • Thank you Rifky for your help fast. I go test if this code solve the problem and comment here. – felipe gomes Feb 07 '17 at 16:59
  • Hi Guys, the solution proposed for Rifky is very well and resolved the problem. Thank you all. – felipe gomes Feb 17 '17 at 19:11