0

I have the following xml file with the below structure to convert to csv using Azure function C#. The XML file is located in Azure Data Lake location. The structure of the file is as follows.

<root id="1" created_date="01/01/2023" asof_date="01/01/2023">
    <level1>
        <data1>sdfs</data1>
        <data2>true</data2>
        <level2 rec="4">
            <level_record>
                <groupid>1</groupid>
                <groupname>somegroup</groupname>
                <groupdate>01/01/2023</groudate>
                <groupvalue>5</groupvalue>
                <groupkey>ag55</groupkey>
            </level_record>  
            <level_record>
                <groupid>2</groupid>
                <groupname>somegroup1</groupname>
                <groupdate>02/01/2023</groudate>
                <groupvalue>6</groupvalue>
                <groupkey>ag56</groupkey>
            </level_record> 
       </level2> 
    </level1>
</root> 

How do i read the file from Azure data lake and convert it as a csv file?

Sandeep T
  • 421
  • 8
  • 22
  • By doing the work. You need to come here with a specific issue, otherwise, we're just doing the work for you. – Skin Jun 01 '23 at 10:22

2 Answers2

0

Here is the example of Azure Function in C# that reads an XML file from Azure Data Lake Storage and converts it to a CSV file

using Microsoft.Azure.Functions.Worker;
using Microsoft.Extensions.Logging;
using Microsoft.Azure.Storage;
using Microsoft.Azure.Storage.Auth;
using Microsoft.Azure.Storage.Blob;
using System.IO;
using System.Xml.Linq;

namespace YourNamespace
{
    public static class ConvertXmlToCsvFunction
    {
        [Function("ConvertXmlToCsvFunction")]
        public static void Run([BlobTrigger("your-container/{name}", Connection = "AzureWebJobsStorage")] Stream xmlStream, string name, FunctionContext context)
        {
            var logger = context.GetLogger("ConvertXmlToCsvFunction");
            logger.LogInformation($"Processing file: {name}");

            try
            {
                // Read the XML file content
                string xmlContent;
                using (StreamReader reader = new StreamReader(xmlStream))
                {
                    xmlContent = reader.ReadToEnd();
                }

                // Parse the XML content
                XDocument xDoc = XDocument.Parse(xmlContent);

                // Extract data and convert to CSV format
                XElement rootElement = xDoc.Element("root");
                XElement level1Element = rootElement.Element("level1");
                XElement level2Element = level1Element.Element("level2");

                // Create the CSV header
                string csv = "groupid,groupname,groupdate,groupvalue,groupkey" + "\n";

                // Iterate over level_record elements and extract data
                foreach (XElement recordElement in level2Element.Elements("level_record"))
                {
                    string groupid = recordElement.Element("groupid").Value;
                    string groupname = recordElement.Element("groupname").Value;
                    string groupdate = recordElement.Element("groupdate").Value;
                    string groupvalue = recordElement.Element("groupvalue").Value;
                    string groupkey = recordElement.Element("groupkey").Value;

                    // Append the CSV row
                    csv += $"{groupid},{groupname},{groupdate},{groupvalue},{groupkey}" + "\n";
                }

                // Save the CSV content to a file
                string csvFileName = Path.ChangeExtension(name, "csv");
                string csvFilePath = Path.Combine(Path.GetTempPath(), csvFileName);
                File.WriteAllText(csvFilePath, csv);

                logger.LogInformation($"CSV file created: {csvFilePath}");
            }
            catch (Exception ex)
            {
                logger.LogError($"An error occurred: {ex.Message}");
                throw;
            }
        }
    }
}
Krishna Varma
  • 4,238
  • 2
  • 10
  • 25
0

Try following. The xml is not valid since groupdate doesn't have same start end end tag.

using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Xml;
using System.Xml.Linq;

namespace ConsoleApplication52
{
    class Program
    {
        const string INPUT_FILENAME = @"c:\temp\test.xml";
        const string OUTPUT_FILENAME = @"c:\temp\test.csv";
        
        static void Main(string[] args)
        {
            StreamWriter writer = new StreamWriter(OUTPUT_FILENAME);
            XDocument doc = XDocument.Load(INPUT_FILENAME);


            int rowCount = 0;
            foreach (XElement record in doc.Descendants("level_record"))
            {
                rowCount++;
                if (rowCount == 1)
                {
                    //write csv header row
                    string[] headers = record.Elements().Select(x => x.Name.LocalName).ToArray();
                    writer.WriteLine(string.Join(",", headers));
                }
                //assume elements are in same order all the time.
                string[] data = record.Elements().Select(x => (string)x).ToArray();
                writer.WriteLine(string.Join(",", data));
            }

            writer.Flush();
            writer.Close();

        }
    }


}
jdweng
  • 33,250
  • 2
  • 15
  • 20