-1

I am Trying to convert an XML file into CSV file. (basically get data from XML into tabular format) But I need XSL file for that particular XML file for csv conversion. It's a complex XML. I am noob here.

This is the sample XML file. I need to extract relevant all the fields like 1. MetaversionOID 2. StudyOID 3. LocationOID

<ODM
    xmlns="http://www.cdisc.org/ns/odm/v1.3"
    xmlns:p1="https://www.protocolfirst.com/ns/odm/v1.3.2" CreationDateTime="2019-03-25T06:33:43.806Z" FileOID="9c94b49a-0110-418b-a8e9-adb5d557b106" ODMVersion="1.3.2" FileType="Snapshot" AsOfDateTime="2019-03-25T06:33:43.806Z" SourceSystem="ProtocolFirst EDC">
    <ClinicalData MetaDataVersionOID="1.0" StudyOID="BAML-S16 AGI-IDH1">
        <SubjectData SubjectKey="101-155-16">
            <StudyEventData StudyEventOID="1.01" p1:Name="Screening (Master)" p1:CreationDateTime="2018-11-01T14:45:12.997Z" p1:Branch="1.0" p1:NotDone="N" p1:VisitDate="2018-10-18T04:00:00.000Z">
                <FormData FormOID="demo" p1:Name="Demographics" p1:Started="Y" p1:NotDone="N">
                    <ItemGroupData ItemGroupOID="demo">
                        <ItemData ItemOID="2a48d0b6-de96-4da9-8b90-c9d555ccbc45" p1:FieldName="Date of Birth" p1:EntryType="Transcription" Value="1950-08-24" p1:TimezoneOffset="-04:00">
                            <AuditRecord>
                                <UserRef UserOID="molly.vittorio@osumc.edu"/>
                                <DateTimeStamp>2018-11-05T16:30:42.220Z</DateTimeStamp>
                            </AuditRecord>
                        </ItemData>
                        <ItemData ItemOID="73bce803-1540-479f-8022-1a814f5bfa8e" p1:FieldName="Sex" p1:EntryType="Transcription" Value="M" p1:DisplayValue="Male">
                            <AuditRecord>
                                <UserRef UserOID="molly.vittorio@osumc.edu"/>
                                <DateTimeStamp>2018-11-05T16:30:43.007Z</DateTimeStamp>
                            </AuditRecord>
                        </ItemData>
                        <ItemData ItemOID="bc160779-263c-40ca-97ce-72c8f07f907c" p1:FieldName="Ethnicity" p1:EntryType="Transcription" Value="NOT HISPANIC OR LATINO" p1:DisplayValue="Not Hispanic or Latino">
                            <AuditRecord>
                                <UserRef UserOID="molly.vittorio@osumc.edu"/>
                                <DateTimeStamp>2018-11-05T16:30:46.151Z</DateTimeStamp>
                            </AuditRecord>
                        </ItemData>
                        <ItemData ItemOID="8f064011-8e2b-486b-8b60-c2f744ca5235" p1:FieldName="Race" p1:EntryType="Transcription" Value="CAUCASIAN" p1:DisplayValue="Caucasian">
                            <AuditRecord>
                                <UserRef UserOID="molly.vittorio@osumc.edu"/>
                                <DateTimeStamp>2018-11-05T16:30:45.366Z</DateTimeStamp>
                            </AuditRecord>
                        </ItemData>
                    </ItemGroupData>
                    <AuditRecord EditPoint="Monitoring">
                        <p1:Review DateTimeStamp="2019-03-12T16:59:47.139Z" UserOID="lia.zevallos@syneoshealth.com" Action="query" Comment="Birth recorded in the SD 24 August 1950. Please verify and correct the CRF page, thanks."/>
                        <p1:Review DateTimeStamp="2018-11-05T16:30:51.928Z" UserOID="molly.vittorio@osumc.edu" Action="submitted"/>
                        <p1:Review DateTimeStamp="2018-11-01T14:45:12.997Z" UserOID="molly.vittorio@osumc.edu" Action="open"/>
                    </AuditRecord>
                </FormData>
            </StudyEventData>
        </SubjectData>
    </ClinicalData>
</ODM>

THis is the code i am using to XML into CSV format.

import java.io.File;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import org.w3c.dom.Document;

public class Temp {

    public static void main(String args[]) throws Exception {

        Document document;

        File stylesheet = new File("C:/Users/mmahajan/Desktop/Input/style.xsl");
        File xmlSource = new File("C:/Users/mmahajan/Desktop/Input/subject-beataml-BAML-S8 AST-FLT3-20190325114820225683361888824.xml");

        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder;
        try {
            builder = factory.newDocumentBuilder();
            document = builder.parse(xmlSource);
            StreamSource stylesource = new StreamSource(stylesheet);
            Transformer transformer = TransformerFactory.newInstance().newTransformer(stylesource);
            Source source = new DOMSource(document);
            Result outputTarget = new StreamResult(new File("C:/Users/mmahajan/Desktop/Input/x.csv"));
            transformer.transform(source, outputTarget);

        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        }

    }

}

So far, I have written following XSL file.

<?xml version="1.0"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:fo="http://www.w3.org/1999/XSL/Format" >
<xsl:output method="text" omit-xml-declaration="yes" indent="no"/>
<xsl:template match="/">
Metaversion,StudyOID,SubjectKey,CreationDateTime,VisitDate,NotDone,Branch,Name,StudyEventOID,Name,Started,FormOID,ItemGroupOID,TimezoneOffset,Value,EntryType,FieldName,ItemOID,UserOID,DateTimeStamp
<xsl:for-each select="//AuditRecord">
<xsl:value-of select="concat(//ClinicalData/@MetaDataVersionOID,',',//ClinicalData/@StudyOID,',',//ClinicalData/SubjectData/@SubjectKey,',',//ClinicalData/SubjectData/StudyEventData/@CreationDateTime,',',//ClinicalData/SubjectData/StudyEventData/@CreationDateTime,',',//ClinicalData/SubjectData/StudyEventData/@VisitDate,',',//ClinicalData/SubjectData/StudyEventData/@NotDone,',',//ClinicalData/SubjectData/StudyEventData/@Branch,',',//ClinicalData/SubjectData/StudyEventData/@Name,',',//ClinicalData/SubjectData/StudyEventData/@StudyEventOID,',',//ClinicalData/SubjectData/StudyEventData/FormData/',',//ClinicalData/SubjectData/StudyEventData/FormData/@Started,',',//ClinicalData/SubjectData/StudyEventData/FormData/@FormOID,',',//ClinicalData/SubjectData/StudyEventData/FormData/ItemGroupData/@ItemGroupOID,',',//ClinicalData/SubjectData/StudyEventData/FormData/ItemGroupData/ItemData/@TimezoneOffset,',',//ClinicalData/SubjectData/StudyEventData/FormData/ItemGroupData/ItemData/@Value,',',//ClinicalData/SubjectData/StudyEventData/FormData/ItemGroupData/ItemData/@EntryType,',',//ClinicalData/SubjectData/StudyEventData/FormData/ItemGroupData/ItemData/@FieldName,',',//ClinicalData/SubjectData/StudyEventData/FormData/ItemGroupData/ItemData/@ItemOID,',',//ClinicalData/SubjectData/StudyEventData/FormData/ItemGroupData/ItemData/AuditRecord/UserRef/@UserOID,',',//ClinicalData/SubjectData/StudyEventData/FormData/ItemGroupData/ItemData/AuditRecord/UserRef/@DateTimeStamp,
'&#xA;')"/>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>

But this is wrong. Any sort of help for correct XSL will be very much appreciated.

Update: I have modified XML File and XSL file, But still not able to generate correct XSL file for it.

  • expected output? – Rupesh_Kr Mar 27 '19 at 06:50
  • I need to extract information from XML file into CSV file. I am trying this approach. [link]https://stackoverflow.com/a/21415506/10884684 – Mayur Mahajan Mar 27 '19 at 06:55
  • 2
    The XML sample in your question seems to be slightly badly formed, but it suggests you actually have a root element with a default namespace (the `xmlns="...." ' partially shown in your fragment). If this is indeed the case, you should check out this question; https://stackoverflow.com/questions/1344158/xslt-with-xml-source-that-has-a-default-namespace-set-to-xmlns . Thanks! – Tim C Mar 27 '19 at 07:45
  • 1
    You may also a problem with your template match; `match="/ClinicalData"` would only match anything if the root element was `ClinicalData`, but in your case it is (probably) `ODM` (or maybe something above this). Thanks! – Tim C Mar 27 '19 at 07:47
  • If you want CSV output, why is your XSLT written to output `` and other HTML elements? – michael.hor257k Mar 27 '19 at 09:42
  • @michael.hor257k Like I said, I am noob here. I have corrected it now. – Mayur Mahajan Mar 27 '19 at 10:03
  • I see no such correction. More importantly, you haven't edited your question to show the cut-off part at the beginning of your XML **and** the expected output of transforming the given example. – michael.hor257k Mar 27 '19 at 10:10
  • @michael.hor257k I have changed it now. Can u please help me with correct XSL ? – Mayur Mahajan Mar 27 '19 at 10:20
  • Clearly a duplicate of [XSLT with XML source that has a default namespace set to xmlns](https://stackoverflow.com/questions/1344158/xslt-with-xml-source-that-has-a-default-namespace-set-to-xmlns) . You need to properly use namespaces. – Alejandro Mar 27 '19 at 12:46

2 Answers2

0
<xsl:template match="/">
<html>
<head>
</head>
<body>
    <xsl:for-each select="/ClinicalData">
        <td><xsl:value-of select="@MetaDataVersionOID"/></td>
        <td><xsl:value-of select="@StudyOID"/></td>
        <td><xsl:value-of select="SubjectData/SiteRef/@LocationOID"/></td>
    </xsl:for-each>
</body>
</html>
</xsl:template>
Ajeet Singh
  • 1,056
  • 1
  • 6
  • 21
-1

You can check this it creates csv output

<xsl:value-of select="concat(@MetaDataVersionOID, ',', @StudyOID, ',', SubjectData/SiteRef/@LocationOID)"/>

you can see transform at https://xsltfiddle.liberty-development.net/6r5Gh3b

XSLT

    <?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:a="http://www.cdisc.org/ns/odm/v1.3"
    xmlns:p1="https://www.protocolfirst.com/ns/odm/v1.3.2"
    xpath-default-namespace="http://www.cdisc.org/ns/odm/v1.3"
    exclude-result-prefixes="xs"
    version="2.0">

    <xsl:template match="/">
        <xsl:for-each select="//AuditRecord">
            <xsl:text>&#xa;</xsl:text>
            <xsl:value-of select="concat(//ClinicalData/@MetaDataVersionOID,','
                ,//ClinicalData/@StudyOID,','
                ,//ClinicalData/SubjectData/@SubjectKey,','
                ,//ClinicalData/SubjectData/StudyEventData/@p1:CreationDateTime,','
                ,//ClinicalData/SubjectData/StudyEventData/@p1:CreationDateTime,','
                ,//ClinicalData/SubjectData/StudyEventData/@p1:VisitDate,','
                ,//ClinicalData/SubjectData/StudyEventData/@p1:NotDone,','
                ,//ClinicalData/SubjectData/StudyEventData/@p1:Branch,','
                ,//ClinicalData/SubjectData/StudyEventData/@p1:Name,','
                ,//ClinicalData/SubjectData/StudyEventData/@StudyEventOID,','
                ,//ClinicalData/SubjectData/StudyEventData/FormData/','
                ,//ClinicalData/SubjectData/StudyEventData/FormData/@p1:Started,','
                ,//ClinicalData/SubjectData/StudyEventData/FormData/@FormOID,','
                ,//ClinicalData/SubjectData/StudyEventData/FormData/ItemGroupData/@ItemGroupOID,','
                ,parent::ItemData/@TimezoneOffset,','
                ,parent::ItemData/@Value,','
                ,parent::ItemData/@p1:EntryType,','
                ,parent::ItemData/@p1:FieldName,','
                ,parent::ItemData/@ItemOID,','
                ,UserRef/@UserOID,','
                ,UserRef/@DateTimeStamp,','
                ,UserOID,','
                ,DateTimeStamp,
                '&#xA;')"/>
        </xsl:for-each>
    </xsl:template>


</xsl:stylesheet>
Rupesh_Kr
  • 3,395
  • 2
  • 17
  • 32
  • Hi, I have slightly updated my xml. Your approach is correct. Can u please tell me where I am going wrong as correct data has not resulted as output. I need to fetch all the fields from Above XML. Thanks @Rupesh_Kr – Mayur Mahajan Mar 27 '19 at 10:18
  • @MayurMahajan you are missing namespace in your approach. see updated transformation at [https://xsltfiddle.liberty-development.net/6r5Gh3b/1](https://xsltfiddle.liberty-development.net/6r5Gh3b/1) – Rupesh_Kr Mar 27 '19 at 11:16
  • based on your shared xsl file, I am getting error as p1 is undefined. – Mayur Mahajan Mar 27 '19 at 11:33
  • you have to define `xmlns:p1="https://www.protocolfirst.com/ns/odm/v1.3.2"` in xslt – Rupesh_Kr Mar 27 '19 at 11:41
  • Can u please share your xsl file somewhere else, I am not able to access above site. Thanks – Mayur Mahajan Mar 27 '19 at 11:54
  • Its not giving correct output. Not sure though why ? IN the link that u shared, Some of the values were missing from XML also – Mayur Mahajan Mar 27 '19 at 12:32
  • Specifically, when i am trying to access tags like : FormData/@p1:NotDone FormData/@p1:Name, FormData/@p1:Started FormData/@FormOID,', Its giving incorrect data or no data at all. – Mayur Mahajan Mar 27 '19 at 12:44