1

I have 12 XML files which I have to import in single Table in SQL Server, one of it is below, Please help me to put those files in a Table format with their column names.

<?xml version="1.0" encoding="UTF-16"?>
<Data>
<!-- IntelliScan - USC  -->
<BatchNumber>1</BatchNumber>
<ComputerName>PC-XDS1</ComputerName>
<StartTime>2/16/2016 7:44:38 AM</StartTime>
<EndTime>2/16/2016 8:14:47 AM</EndTime>
<IdleTime>672</IdleTime>
<ImagesDroppedFront>0</ImagesDroppedFront>
<ImagesDroppedBack>205</ImagesDroppedBack>
<JobName>Landscape Production NO MONEY</JobName>
<JobNumber>001</JobNumber>
<Operator>abwhite</Operator>
<OutputFolderName>6604773004001</OutputFolderName>
<PagesOperatorReplaced>2</PagesOperatorReplaced>
<PagesOperatorDeleted>1</PagesOperatorDeleted>
<PagesOperatorInserted>0</PagesOperatorInserted>
<ProcessingDate>2/16/2016</ProcessingDate>
<ProfileName> Multis Landscape</ProfileName>
<RecoveredTime></RecoveredTime>
<Status></Status>
<SystemDate>2/16/2016</SystemDate>
<TotalItems>318</TotalItems>
<TotalLevel1>0</TotalLevel1>
<TotalLevel2>64</TotalLevel2>
<TotalLevel3>0</TotalLevel3>
<TotalImages>506</TotalImages>
<TotalFrontIJP>321</TotalFrontIJP>
<TotalBackIJP>0</TotalBackIJP>
<TotalPostIJP>0</TotalPostIJP>
<TransportType>DocuScan</TransportType>
<TotalAutoFeed>298</TotalAutoFeed>
<TotalManualFeed>23</TotalManualFeed>
<TotalFeedOnTime>0</TotalFeedOnTime>
<TotalSpeed1>0</TotalSpeed1>
<TotalSpeed2>0</TotalSpeed2>
<TotalSpeed3>321</TotalSpeed3>
<XPTNumber>73</XPTNumber>
<IR_Marked>0</IR_Marked>
<IR_Reviewed>39</IR_Reviewed>
<Start_DLN>6604773004001</Start_DLN>
<End_DLN>6604773004061</End_DLN>
<DocTypes>
<Doctype ID="400">
<!-- [ DocType400 ]  - DOCSEP -->
<Name>DOCSEP</Name>
<Count>64</Count>
</Doctype>
<Doctype ID="001">
<!-- [ DocType001 ]  - Page -->
<Name>Page</Name>
<Count>254</Count>
</Doctype>
</DocTypes>
<Jams>
<Jam>
<Message>5.10 Document Late To MultiPocket STK1 PKT1 Entrance Sensor</Message>
<Count>3</Count>
<Time>206</Time>
</Jam>
<Jam>
<Message>1.6 Document Late to PostScan IJP Entrance sensor.</Message>
<Count>2</Count>
<Time>13</Time>
</Jam>
<Jam>
<Message>1.4 Document Late to Reader On Doc sensor.</Message>
<Count>3</Count>
<Time>18</Time>
</Jam>
<Jam>
<Message>0.125 Double feed detected. Document held at the handfeed track</Message>
<Count>1</Count>
<Time>16</Time>
</Jam>
<Jam>
<Message>1.5 Document Late to PreScan IJP Entrance sensor.</Message>
<Count>1</Count>
<Time>3</Time>
</Jam>
<Jam>
<Message>3.2 Unexpected Item Detected at Dbl Feed exit sensor.</Message>
<Count>1</Count>
<Time>43</Time>
</Jam>
</Jams>
</Data>

I am already running one query but the problem is that, In Data/DoctTypes there are multiple ID and in Data/Jams/Jam there are multiple Message,Count,time columns. I have already loaded xml files in table name XMLFiles and column name XMLData and now need to retrieve the columns inside xml files from xml files with their values in a single table. So how can I retrieve those multiple Columns or data? My query is below,

Select  XMLData.value('(/Data/BatchNumber)[1]','int' ) BatchNumber,
    XMLData.value('(/Data/ComputerName)[1]','varchar(50)' ) ComputerName,
    XMLData.value('(/Data/StartTime)[1]','DateTime' ) StartTime,
    XMLData.value('(/Data/EndTime)[1]','DateTime' ) EndTime,
    XMLData.value('(/Data/IdleTime)[1]','int' ) IdleTime,
    XMLData.value('(/Data/ImagesDroppedFront)[1]','int' ) ImagesDroppedFront,
    XMLData.value('(/Data/ImagesDroppedBack)[1]','int' ) ImagesDroppedBack,
    XMLData.value('(/Data/JobName)[1]','varchar(100)' ) JobName,
    XMLData.value('(/Data/JobNumber)[1]','varchar(10)' ) JobNumber,
    XMLData.value('(/Data/Operator)[1]','varchar(50)' ) Operator,
    XMLData.value('(/Data/OutputFolderName)[1]','varchar(20)' ) OutputFolderName,
    XMLData.value('(/Data/PagesOperatorReplaced)[1]','int' ) PagesOperatorReplaced,
    XMLData.value('(/Data/PagesOperatorDeleted)[1]','int' ) PagesOperatorDeleted,
    XMLData.value('(/Data/PagesOperatorInserted)[1]','int' ) PagesOperatorInserted,
    XMLData.value('(/Data/ProcessingDate)[1]','Date' ) ProcessingDate,
    XMLData.value('(/Data/ProfileName)[1]','varchar(50)' ) ProfileName,
    XMLData.value('(/Data/RecoveredTime)[1]','Time' ) RecoveredTime,
    XMLData.value('(/Data/Status)[1]','varchar(50)' ) [Status],
    XMLData.value('(/Data/SystemDate)[1]','Date' ) SystemDate,
    XMLData.value('(/Data/TotalItems)[1]','int' ) TotalItems,
    XMLData.value('(/Data/TotalLevel1)[1]','int' ) TotalLevel1,
    XMLData.value('(/Data/TotalLevel2)[1]','int' ) TotalLevel2,
    XMLData.value('(/Data/TotalLevel3)[1]','int' ) TotalLevel3,
    XMLData.value('(/Data/TotalImages)[1]','int' ) TotalImages,
    XMLData.value('(/Data/TotalFrontIJP)[1]','int' ) TotalFrontIJP,
    XMLData.value('(/Data/TotalBackIJP)[1]','int' ) TotalBackIJP,
    XMLData.value('(/Data/TotalPostIJP)[1]','int' ) TotalPostIJP,
    XMLData.value('(/Data/TransportType)[1]','varchar(50)' ) TransportType,
    XMLData.value('(/Data/TotalAutoFeed)[1]','int' ) TotalAutoFeed,
    XMLData.value('(/Data/TotalManualFeed)[1]','int' ) TotalManualFeed,
    XMLData.value('(/Data/TotalFeedOnTime)[1]','int' ) TotalFeedOnTime,
    XMLData.value('(/Data/TotalSpeed1)[1]','int' ) TotalSpeed1,
    XMLData.value('(/Data/TotalSpeed2)[1]','int' ) TotalSpeed2,
    XMLData.value('(/Data/TotalSpeed3)[1]','int' ) TotalSpeed3,
    XMLData.value('(/Data/XPTNumber)[1]','int' ) XPTNumber,
    XMLData.value('(/Data/IR_Marked)[1]','int' ) IR_Marked,
    XMLData.value('(/Data/IR_Reviewed)[1]','int' ) IR_Reviewed,
    ISNULL(XMLData.value('(/Data/Start_DLN)[1]','varchar(20)'),'' ) Start_DLN,
    ISNULL(XMLData.value('(/Data/End_DLN)[1]','varchar(20)'),'' ) End_DLN,
    XMLData.value('(/Data/DocTypes/Doctype/@ID)[1]','int') ID,
    XMLData.value('(/Data/DocTypes/Doctype/Name)[1]','varchar(50)' ) Name,
    XMLData.value('(/Data/DocTypes/Doctype/Count)[1]','int' ) [Count],
    ISNULL(XMLData.value('(/Data/Jams/Jam/Message)[1]','varchar(1000)'),'') [Message],
    ISNULL(XMLData.value('(/Data/Jams/Jam/Count)[1]','int' ),'') [Count],
    ISNULL(XMLData.value('(/Data/Jams/Jam/Time)[1]','int' ),'') [Time]
    From XMLFiles X
Ruhaan
  • 176
  • 13

1 Answers1

0

Your XML has nested data with 1:n relation. To put this in properly designed data structures needs separate tables.

With this code you would get three derived tables with generated IDs to define their relation:

DECLARE @x XML=
N'<Data>
  <!-- IntelliScan - USC  -->
  <BatchNumber>1</BatchNumber>
  <ComputerName>PC-XDS1</ComputerName>
  <StartTime>2/16/2016 7:44:38 AM</StartTime>
  <EndTime>2/16/2016 8:14:47 AM</EndTime>
  <IdleTime>672</IdleTime>
  <ImagesDroppedFront>0</ImagesDroppedFront>
  <ImagesDroppedBack>205</ImagesDroppedBack>
  <JobName>Landscape Production NO MONEY</JobName>
  <JobNumber>001</JobNumber>
  <Operator>abwhite</Operator>
  <OutputFolderName>6604773004001</OutputFolderName>
  <PagesOperatorReplaced>2</PagesOperatorReplaced>
  <PagesOperatorDeleted>1</PagesOperatorDeleted>
  <PagesOperatorInserted>0</PagesOperatorInserted>
  <ProcessingDate>2/16/2016</ProcessingDate>
  <ProfileName> Multis Landscape</ProfileName>
  <RecoveredTime />
  <Status />
  <SystemDate>2/16/2016</SystemDate>
  <TotalItems>318</TotalItems>
  <TotalLevel1>0</TotalLevel1>
  <TotalLevel2>64</TotalLevel2>
  <TotalLevel3>0</TotalLevel3>
  <TotalImages>506</TotalImages>
  <TotalFrontIJP>321</TotalFrontIJP>
  <TotalBackIJP>0</TotalBackIJP>
  <TotalPostIJP>0</TotalPostIJP>
  <TransportType>DocuScan</TransportType>
  <TotalAutoFeed>298</TotalAutoFeed>
  <TotalManualFeed>23</TotalManualFeed>
  <TotalFeedOnTime>0</TotalFeedOnTime>
  <TotalSpeed1>0</TotalSpeed1>
  <TotalSpeed2>0</TotalSpeed2>
  <TotalSpeed3>321</TotalSpeed3>
  <XPTNumber>73</XPTNumber>
  <IR_Marked>0</IR_Marked>
  <IR_Reviewed>39</IR_Reviewed>
  <Start_DLN>6604773004001</Start_DLN>
  <End_DLN>6604773004061</End_DLN>
  <DocTypes>
    <Doctype ID="400">
      <!-- [ DocType400 ]  - DOCSEP -->
      <Name>DOCSEP</Name>
      <Count>64</Count>
    </Doctype>
    <Doctype ID="001">
      <!-- [ DocType001 ]  - Page -->
      <Name>Page</Name>
      <Count>254</Count>
    </Doctype>
  </DocTypes>
  <Jams>
    <Jam>
      <Message>5.10 Document Late To MultiPocket STK1 PKT1 Entrance Sensor</Message>
      <Count>3</Count>
      <Time>206</Time>
    </Jam>
    <Jam>
      <Message>1.6 Document Late to PostScan IJP Entrance sensor.</Message>
      <Count>2</Count>
      <Time>13</Time>
    </Jam>
    <Jam>
      <Message>1.4 Document Late to Reader On Doc sensor.</Message>
      <Count>3</Count>
      <Time>18</Time>
    </Jam>
    <Jam>
      <Message>0.125 Double feed detected. Document held at the handfeed track</Message>
      <Count>1</Count>
      <Time>16</Time>
    </Jam>
    <Jam>
      <Message>1.5 Document Late to PreScan IJP Entrance sensor.</Message>
      <Count>1</Count>
      <Time>3</Time>
    </Jam>
    <Jam>
      <Message>3.2 Unexpected Item Detected at Dbl Feed exit sensor.</Message>
      <Count>1</Count>
      <Time>43</Time>
    </Jam>
  </Jams>
</Data>';

--Here the query starts:

WITH DataNode AS
(
    SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS Data_ID
          ,Data.value('BatchNumber[1]','int') AS BatchNumber
          ,Data.value('ComputerName[1]','nvarchar(max)') AS ComputerName
          --add all columns of the first level below Data
          ,Data.query('DocTypes') AS Doctype
          ,Data.query('Jams') AS Jam
    FROM @x.nodes('Data') AS A(Data)
)
,Doctypes AS
(
    SELECT DataNode.Data_ID
          ,ROW_NUMBER() OVER(PARTITION BY Data_ID ORDER BY (SELECT NULL)) AS Doctype_ID
          ,dt.value('@ID','int') AS DoctypeID
          ,dt.value('Name[1]','nvarchar(max)') AS DoctypeName
          ,dt.value('Count[1]','int') AS DoctypeCount
    FROM DataNode
    CROSS APPLY Doctype.nodes('DocTypes/Doctype') AS A(dt)
)
,Jams AS
(
    SELECT DataNode.Data_ID
          ,ROW_NUMBER() OVER(PARTITION BY Data_ID ORDER BY (SELECT NULL)) AS Jam_ID
          ,dt.value('Message[1]','nvarchar(max)') AS JamMessage
          ,dt.value('Count[1]','int') AS JamCount
          ,dt.value('Time[1]','int') AS JamTime
    FROM DataNode
    CROSS APPLY Jam.nodes('Jams/Jam') AS A(dt)
)
SELECT dn.*
      --add all Data columns here
      ,dt.Doctype_ID
      ,dt.DoctypeCount
      ,dt.DoctypeID
      ,dt.DoctypeName
      ,jm.Jam_ID
      ,jm.JamCount
      ,jm.JamMessage
      ,jm.JamTime
INTO #tmpResult
FROM DataNode AS dn
INNER JOIN DocTypes AS dt ON dt.Data_ID=dn.Data_ID
INNER JOIN Jams AS jm ON dt.Data_ID=jm.Data_ID;

SELECT DISTINCT Data_ID,BatchNumber,ComputerName INTO #MyDataTable FROM #tmpResult;
SELECT DISTINCT Data_ID,Doctype_ID,DoctypeID,DoctypeName,DoctypeCount INTO #MyDoctypeTable FROM #tmpResult;
SELECT DISTINCT Data_ID,Jam_ID,JamCount,JamMessage,JamTime INTO #MyJamTable FROM #tmpResult;
GO

SELECT * FROM #MyDataTable;
SELECT * FROM #MyDoctypeTable;
SELECT * FROM #MyJamTable;

GO
DROP TABLE #MyDataTable;
DROP TABLE #MyDoctypeTable;
DROP TABLE #MyJamTable;
DROP TABLE #tmpResult;
Shnugo
  • 66,100
  • 9
  • 53
  • 114
  • Thank you Shnugo, its working.. But I am unable to load all 12 Xml files into a single table..can u help me? – Ruhaan Jun 08 '16 at 07:38
  • In my 2nd XML file, there is no Jam tags and Messages,so unable to get output,how can we get such result? and there are some xmls without DoctypeID and Jams,Plz suggest some Query,@Shnugo. – Ruhaan Jul 01 '16 at 11:22
  • @Ruhaan please start a new question with a reduced sample and a clear description. Add the question's link here and I will pop up there soon. – Shnugo Jul 01 '16 at 11:29
  • yeah, new question posted.please suggest some solution. – Ruhaan Jul 01 '16 at 12:17