0

I have a XML file size 35 GB. I tried to load this file using xmldocument and got out of memory exception. So, using xmlreader to parse the xml data to load it to database. But, I am not able to read the child nodes within a parent node.

Example XML file content:

File name : wcproduction.xml

<root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<xsd:schema targetNamespace="urn:schemas-microsoft-com:sql:SqlRowSet1" xmlns:schema="urn:schemas-microsoft-com:sql:SqlRowSet1" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:sqltypes="http://schemas.microsoft.com/sqlserver/2004/sqltypes" elementFormDefault="qualified">
    <xsd:import namespace="http://schemas.microsoft.com/sqlserver/2004/sqltypes" schemaLocation="http://schemas.microsoft.com/sqlserver/2004/sqltypes/sqltypes.xsd"/>
    <xsd:element name="wcproduction">
        <xsd:complexType>
            <xsd:sequence>
                <xsd:element name="api_st_cde" type="sqltypes:smallint" nillable="1"/>
                <xsd:element name="api_cnty_cde" type="sqltypes:smallint" nillable="1"/>
                <xsd:element name="api_well_idn" type="sqltypes:int" nillable="1"/>
                <xsd:element name="pool_idn" type="sqltypes:int" nillable="1"/>
                <xsd:element name="prodn_mth" type="sqltypes:smallint" nillable="1"/>
                <xsd:element name="prodn_yr" type="sqltypes:int" nillable="1"/>
                <xsd:element name="ogrid_cde" type="sqltypes:int" nillable="1"/>
                <xsd:element name="prd_knd_cde" nillable="1">
                    <xsd:simpleType>
                        <xsd:restriction base="sqltypes:char" sqltypes:localeId="1033" sqltypes:sqlCompareOptions="IgnoreCase IgnoreKanaType IgnoreWidth" sqltypes:sqlSortId="52">
                            <xsd:maxLength value="2"/>
                        </xsd:restriction>
                    </xsd:simpleType>
                </xsd:element>
                <xsd:element name="eff_dte" type="sqltypes:datetime" nillable="1"/>
                <xsd:element name="amend_ind" nillable="1">
                    <xsd:simpleType>
                        <xsd:restriction base="sqltypes:char" sqltypes:localeId="1033" sqltypes:sqlCompareOptions="IgnoreCase IgnoreKanaType IgnoreWidth" sqltypes:sqlSortId="52">
                            <xsd:maxLength value="1"/>
                        </xsd:restriction>
                    </xsd:simpleType>
                </xsd:element>
                <xsd:element name="c115_wc_stat_cde" nillable="1">
                    <xsd:simpleType>
                        <xsd:restriction base="sqltypes:char" sqltypes:localeId="1033" sqltypes:sqlCompareOptions="IgnoreCase IgnoreKanaType IgnoreWidth" sqltypes:sqlSortId="52">
                            <xsd:maxLength value="1"/>
                        </xsd:restriction>
                    </xsd:simpleType>
                </xsd:element>
                <xsd:element name="prod_amt" type="sqltypes:int" nillable="1"/>
                <xsd:element name="prodn_day_num" type="sqltypes:smallint" nillable="1"/>
                <xsd:element name="mod_dte" type="sqltypes:datetime" nillable="1"/>
            </xsd:sequence>
        </xsd:complexType>
    </xsd:element>
</xsd:schema>
<wcproduction xmlns="urn:schemas-microsoft-com:sql:SqlRowSet1">
    <api_st_cde>30</api_st_cde>
    <api_cnty_cde>5</api_cnty_cde>
    <api_well_idn>20178</api_well_idn>
    <pool_idn>10540</pool_idn>
    <prodn_mth>7</prodn_mth>
    <prodn_yr>1973</prodn_yr>
    <ogrid_cde>12437</ogrid_cde>
    <prd_knd_cde>G </prd_knd_cde>
    <eff_dte>1973-07-31T00:00:00</eff_dte>
    <amend_ind>N</amend_ind>
    <c115_wc_stat_cde>F</c115_wc_stat_cde>
    <prod_amt>53612</prod_amt>
    <prodn_day_num>99</prodn_day_num>
    <mod_dte>2015-04-07T07:31:00.173</mod_dte>
</wcproduction>
</root>

VB.net code that is try to read parent node wcproduction and its child nodes ( api_st_cde, api_cnty_cde, ...)

Dim settings As XmlReaderSettings = New XmlReaderSettings()
        settings.IgnoreWhitespace = True

        Using reader As XmlReader = XmlReader.Create("D:\\wcproduction.xml", settings)

            reader.ReadToFollowing("wcproduction")
            Do

                Dim inner As XmlReader = reader.ReadSubtree()
                Dim str As String = ""

                inner.ReadToDescendant("api_st_cde")
                str = inner.ReadInnerXml
                inner.ReadToDescendant("api_cnty_cde")
                str = str & ", " & inner.ReadInnerXml
                inner.ReadToDescendant("api_well_idn")
                str = str & ", " & inner.ReadInnerXml
                inner.ReadToDescendant("pool_idn")
                str = str & ", " & inner.ReadInnerXml
                inner.ReadToDescendant("prodn_mth")
                str = str & ", " & inner.ReadInnerXml
                inner.ReadToDescendant("prodn_yr")
                str = str & ", " & inner.ReadInnerXml
                inner.ReadToDescendant("ogrid_cde")
                str = str & ", " & inner.ReadInnerXml
                inner.ReadToDescendant("prd_knd_cde")
                str = str & ", " & inner.ReadInnerXml
                inner.ReadToDescendant("eff_dte")
                str = str & ", " & inner.ReadInnerXml
                inner.ReadToDescendant("amend_ind")
                str = str & ", " & inner.ReadInnerXml
                inner.ReadToDescendant("c115_wc_stat_cde")
                str = str & ", " & inner.ReadInnerXml
                inner.ReadToDescendant("prod_amt")
                str = str & ", " & inner.ReadInnerXml
                inner.ReadToDescendant("prodn_day_num")
                str = str & ", " & inner.ReadInnerXml
                inner.ReadToDescendant("mod_dte")
                str = str & ", " & inner.ReadInnerXml
                MsgBox(str)
                inner.Close()
            Loop While (reader.ReadToNextSibling("wcproduction"))

        End Using

I want to read and upload all nodes (wcproduction) and its child nodes to SQL server.

user10987050
  • 75
  • 10
  • I can help. Do this lots of times in c# and willing to help in vb.net. I have two questions. 1) The xml has namespace. The best way is to do serialization. This data appear to come from a SQL Database. The xml shows 1 schema location for the low level xml. Is there any other schema locations in the file? 2) If you do not use serialization can you give me the repeating element you are trying to extract. With XML reader I usually work with the repeating elements. I can't tell from posted xml which repeating elements you are trying to extract. – jdweng Feb 02 '19 at 13:25
  • The sql database uses a standard schema that is available on the web. MSDN has a tool xsd.exe which you can download that take a xsd schema and converts to VB classes. You can then use XML Serialization to take your database and convert to classes. I probably will cause memory error with a 35GB file. Can you explain what you are trying to do? You should import SQL data into a SQL Server. There are command line tools that are designed to handle very large files. See : https://docs.microsoft.com/en-us/sql/tools/command-prompt-utility-reference-database-engine?view=sql-server-2017 – jdweng Feb 02 '19 at 14:10
  • @jdweng, the repeating element is wcproduction and its child elements. I would like to extract all fields from wcproduction. I am ready to implement any solution. – user10987050 Feb 03 '19 at 15:29
  • I have edited the answer. – preciousbetine Feb 04 '19 at 10:08

1 Answers1

2

Try following code which uses combination of XmlReader and xml linq

Imports System.Xml
Imports System.Xml.Linq
Module Module1
    Const FILENAME As String = "c:\temp\test.xml"
    Sub Main()
        Dim wcProdcutions As New List(Of WCProduction)
        Dim reader As XmlReader = XmlReader.Create(FILENAME)
        While (Not reader.EOF)
            If reader.Name <> "wcproduction" Then
                reader.ReadToFollowing("wcproduction")
            End If
            If Not reader.EOF Then
                Dim xWcproduction As XElement = XElement.ReadFrom(reader)
                Dim ns As XNamespace = xWcproduction.GetDefaultNamespace()
                Dim wcproduction As New WCProduction
                wcProdcutions.Add(wcproduction)

                wcproduction.api_st_cde = CType(xWcproduction.Element(ns + "api_st_cde"), Integer)
                wcproduction.api_well_idn = CType(xWcproduction.Element(ns + "api_well_idn"), Integer)
                wcproduction.pool_idn = CType(xWcproduction.Element(ns + "pool_idn"), Integer)
                wcproduction.prodn_mth = CType(xWcproduction.Element(ns + "prodn_mth"), Integer)
                wcproduction.prodn_yr = CType(xWcproduction.Element(ns + "prodn_yr"), Integer)
                wcproduction.ogrid_cde = CType(xWcproduction.Element(ns + "ogrid_cde"), Integer)
                wcproduction.prd_knd_cde = CType(xWcproduction.Element(ns + "prd_knd_cde"), String)
                wcproduction.eff_dte = CType(xWcproduction.Element(ns + "eff_dte"), DateTime)
                wcproduction.amend_ind = CType(xWcproduction.Element(ns + "amend_ind"), String)
                wcproduction.c115_wc_stat_cde = CType(xWcproduction.Element(ns + "c115_wc_stat_cde"), String)
                wcproduction.prod_amt = CType(xWcproduction.Element(ns + "prod_amt"), Integer)
                wcproduction.prodn_day_num = CType(xWcproduction.Element(ns + "prodn_day_num"), Integer)
                wcproduction.mod_dte = CType(xWcproduction.Element(ns + "mod_dte"), DateTime)

            End If

        End While
    End Sub

End Module
Public Class WCProduction
    Public api_st_cde As Integer
    Public api_cnty_cde As Integer
    Public api_well_idn As Integer
    Public pool_idn As Integer
    Public prodn_mth As Integer
    Public prodn_yr As Integer
    Public ogrid_cde As Integer
    Public prd_knd_cde As String
    Public eff_dte As DateTime
    Public amend_ind As String
    Public c115_wc_stat_cde As String
    Public prod_amt As Integer
    Public prodn_day_num As Integer
    Public mod_dte As DateTime
End Class
jdweng
  • 33,250
  • 2
  • 15
  • 20
  • I think wcProdcutions.Add(wcproduction) line of code should appear after assigning the values. – user10987050 Feb 05 '19 at 06:06
  • Doesn't matter. I like showing it immediately after wcproduction is created by calling the constructor. I don't like putting it at the end because I often forget to include the line. – jdweng Feb 05 '19 at 09:49