34

I'm using Java 5 javax.xml.validation.Validator to validate XML file. I've done it for one schema that uses only imports and everything works fine. Now I'm trying to validate with another schema that uses import and one include. The problem I have is that element in the main schema are ignored, the validation says it cannot find their declaration.

Here is how I build the Schema:

InputStream includeInputStream = getClass().getClassLoader().getResource("include.xsd").openStream();
InputStream importInputStream = getClass().getClassLoader().getResource("import.xsd").openStream();
InputStream mainInputStream = getClass().getClassLoader().getResource("main.xsd").openStream();
Source[] sourceSchema = new SAXSource[]{includeInputStream , importInputStream, 
mainInputStream };
Schema schema = factory.newSchema(sourceSchema);

Now here is the extract of the declaration in main.xsd

<xsd:schema xmlns="http://schema.omg.org/spec/BPMN/2.0" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:import="http://www.foo.com/import" targetNamespace="http://main/namespace" elementFormDefault="qualified" attributeFormDefault="unqualified">
    <xsd:import namespace="http://www.foo.com/import" schemaLocation="import.xsd"/>
    <xsd:include schemaLocation="include.xsd"/>
    <xsd:element name="element" type="tElement"/>
    <...>
</xsd:schema>

If I copy the code of my included XSD in the main.xsd, it works fine. If I don't, validation doesn't find the declaration of "Element".

Line
  • 1,529
  • 3
  • 18
  • 42
Melanie
  • 1,198
  • 2
  • 17
  • 42

11 Answers11

65

you need to use an LSResourceResolver for this to work. please take a look at the sample code below.

a validate method:

// note that if your XML already declares the XSD to which it has to conform, then there's no need to declare the schemaName here
void validate(String xml, String schemaName) throws Exception {

    DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
    builderFactory.setNamespaceAware(true);

    DocumentBuilder parser = builderFactory
            .newDocumentBuilder();

    // parse the XML into a document object
    Document document = parser.parse(new StringInputStream(xml));

    SchemaFactory factory = SchemaFactory
            .newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);

    // associate the schema factory with the resource resolver, which is responsible for resolving the imported XSD's
    factory.setResourceResolver(new ResourceResolver());

            // note that if your XML already declares the XSD to which it has to conform, then there's no need to create a validator from a Schema object
    Source schemaFile = new StreamSource(getClass().getClassLoader()
            .getResourceAsStream(schemaName));
    Schema schema = factory.newSchema(schemaFile);

    Validator validator = schema.newValidator();
    validator.validate(new DOMSource(document));
}

the resource resolver implementation:

public class ResourceResolver  implements LSResourceResolver {

public LSInput resolveResource(String type, String namespaceURI,
        String publicId, String systemId, String baseURI) {

     // note: in this sample, the XSD's are expected to be in the root of the classpath
    InputStream resourceAsStream = this.getClass().getClassLoader()
            .getResourceAsStream(systemId);
    return new Input(publicId, systemId, resourceAsStream);
}

 }

The Input implemetation returned by the resource resolver:

public class Input implements LSInput {

private String publicId;

private String systemId;

public String getPublicId() {
    return publicId;
}

public void setPublicId(String publicId) {
    this.publicId = publicId;
}

public String getBaseURI() {
    return null;
}

public InputStream getByteStream() {
    return null;
}

public boolean getCertifiedText() {
    return false;
}

public Reader getCharacterStream() {
    return null;
}

public String getEncoding() {
    return null;
}

public String getStringData() {
    synchronized (inputStream) {
        try {
            byte[] input = new byte[inputStream.available()];
            inputStream.read(input);
            String contents = new String(input);
            return contents;
        } catch (IOException e) {
            e.printStackTrace();
            System.out.println("Exception " + e);
            return null;
        }
    }
}

public void setBaseURI(String baseURI) {
}

public void setByteStream(InputStream byteStream) {
}

public void setCertifiedText(boolean certifiedText) {
}

public void setCharacterStream(Reader characterStream) {
}

public void setEncoding(String encoding) {
}

public void setStringData(String stringData) {
}

public String getSystemId() {
    return systemId;
}

public void setSystemId(String systemId) {
    this.systemId = systemId;
}

public BufferedInputStream getInputStream() {
    return inputStream;
}

public void setInputStream(BufferedInputStream inputStream) {
    this.inputStream = inputStream;
}

private BufferedInputStream inputStream;

public Input(String publicId, String sysId, InputStream input) {
    this.publicId = publicId;
    this.systemId = sysId;
    this.inputStream = new BufferedInputStream(input);
}
}
Stefan De Boey
  • 2,344
  • 16
  • 14
  • Thanks a lot for this comprehensive answer! I'll implement that this afternoon and let you know how it worked. I do need to create the Schema object since I have no idea how the file that are going to be validated will be built. I don't want to rely on their declaration. – Melanie Feb 26 '10 at 16:23
  • 1
    no prob, the sample code is taken from a unit test, so you'll probably need to change some bits to suit your needs – Stefan De Boey Feb 27 '10 at 02:06
  • I'm almost there. Now my the validator does include the included file and the content of the main file. But I have an exception when loading an import file, content not allowed in prolog... It's with a file that is imported. If I load that file directly (build the schema from it instead of the main), I don't get this error. Any idea what can cause this kind of exception in that condition? – Melanie Mar 01 '10 at 14:08
  • Everything works fine now... I just removed an empty line at the end of my file and that fixed the content not allowed in Prolog exception! Again, thanks a lot! – Melanie Mar 01 '10 at 14:16
  • The documentation says that adding a resourece resolver on the SchemaFactory does not affect the validator. This solution is not perfect. https://docs.oracle.com/javase/7/docs/api/javax/xml/validation/SchemaFactory.html#setResourceResolver(org.w3c.dom.ls.LSResourceResolver) – Atul Rai Nov 25 '14 at 08:26
  • The code in `Input#getStringData()` is (1) not correct as it does not handle encoding properly (2) not needed as providing the `InputStream` via `getInputStream()` is enough. – dma_k Mar 30 '16 at 12:45
  • this [answer](http://stackoverflow.com/a/19853877/2979310) resolution is simpler. – ulab Jul 29 '16 at 15:33
  • 2
    @ulab just using an `URL` isn't always the solution. I am using an `URL` in my project, but I still need an `LSResourceResolver`. My best guess, is that your files use import paths relative to your working directory, while my files use import paths relative to the root of my classpath, which are not automatically resolved. – bvdb Oct 26 '16 at 13:25
  • 1
    "note: in this sample, the XSD's are expected to be in the root of the classpath" - could you please describe how to ensure this? – Line Jul 28 '17 at 09:20
  • I'm definitely not understanding what is meant by ` // note: in this sample, the XSD's are expected to be in the root of the classpath` I'm getting the console message stating it can't find 'second.xsd' file which means it finds the first one. What am I missing in order to get it to point to the imported one? Thanks – dweeb Dec 06 '18 at 05:58
12

As user "ulab" points out in a comment on another answer the solution described in this answer (to a separate stackoverflow question) will work for many. Here's the rough outline of that approach:

SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
URL xsdURL = this.getResource("/xsd/my-schema.xsd");
Schema schema = schemaFactory.newSchema(xsdURL);

The key to this approach is avoiding handing the schema factory a stream and instead giving it a URL. This way it gets information about the location of the XSD file.

One thing to keep in mind here is that the "schemaLocation" attribute on include and/or import elements will be treated as relative to the classpath location of the XSD file whose URL you've handed to the validator when you use simple file paths in the form "my-common.xsd" or "common/some-concept.xsd".

Notes: - In the example above I've placed the schema file into a jar file under an "xsd" folder. - The leading slash in the "getResource" argument tells Java to start at the root of the classloader instead of at the "this" object's package name.

8

The accepted answer is perfectly ok, but does not work with Java 8 without some modifications. It would also be nice to be able to specify a base path from which the imported schemas are read.

I have used in my Java 8 the following code which allows to specify an embedded schema path other than the root path:

import com.sun.org.apache.xerces.internal.dom.DOMInputImpl;
import org.w3c.dom.ls.LSInput;
import org.w3c.dom.ls.LSResourceResolver;

import java.io.InputStream;
import java.util.Objects;

public class ResourceResolver implements LSResourceResolver {

    private String basePath;

    public ResourceResolver(String basePath) {
        this.basePath = basePath;
    }

    @Override
    public LSInput resolveResource(String type, String namespaceURI, String publicId, String systemId, String baseURI) {
        // note: in this sample, the XSD's are expected to be in the root of the classpath
        InputStream resourceAsStream = this.getClass().getClassLoader()
                .getResourceAsStream(buildPath(systemId));
        Objects.requireNonNull(resourceAsStream, String.format("Could not find the specified xsd file: %s", systemId));
        return new DOMInputImpl(publicId, systemId, baseURI, resourceAsStream, "UTF-8");
    }

    private String buildPath(String systemId) {
        return basePath == null ? systemId : String.format("%s/%s", basePath, systemId);
    }
}

This implementation also gives to the user a meaningful message in case the schema cannot be read.

gil.fernandes
  • 12,978
  • 5
  • 63
  • 76
5

I had to make some modifications to this post by AMegmondoEmber

My main schema file had some includes from sibling folders, and the included files also had some includes from their local folders. I also had to track down the base resource path and relative path of the current resource. This code works for me now, but please keep in mind that it assumes all xsd files have a unique name. If you have some xsd files with same name, but different content at different paths, it will probably give you problems.

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.ls.LSInput;
import org.w3c.dom.ls.LSResourceResolver;

/**
 * The Class ResourceResolver.
 */
public class ResourceResolver implements LSResourceResolver {
    
    /** The logger. */
    private final Logger logger = LoggerFactory.getLogger(this.getClass());
    
    /** The schema base path. */
    private final String schemaBasePath;
    
    /** The path map. */
    private Map<String, String> pathMap = new HashMap<String, String>();

    /**
     * Instantiates a new resource resolver.
     *
     * @param schemaBasePath the schema base path
     */
    public ResourceResolver(String schemaBasePath) {
        this.schemaBasePath = schemaBasePath;
        logger.warn("This LSResourceResolver implementation assumes that all XSD files have a unique name. "
                + "If you have some XSD files with same name but different content (at different paths) in your schema structure, "
                + "this resolver will fail to include the other XSD files except the first one found.");
    }

    /* (non-Javadoc)
     * @see org.w3c.dom.ls.LSResourceResolver#resolveResource(java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String)
     */
    @Override
    public LSInput resolveResource(String type, String namespaceURI,
            String publicId, String systemId, String baseURI) {
        // The base resource that includes this current resource
        String baseResourceName = null;
        String baseResourcePath = null;
        // Extract the current resource name
        String currentResourceName = systemId.substring(systemId
                .lastIndexOf("/") + 1);

        // If this resource hasn't been added yet
        if (!pathMap.containsKey(currentResourceName)) {
            if (baseURI != null) {
                baseResourceName = baseURI
                        .substring(baseURI.lastIndexOf("/") + 1);
            }

            // we dont need "./" since getResourceAsStream cannot understand it
            if (systemId.startsWith("./")) {
                systemId = systemId.substring(2, systemId.length());
            }

            // If the baseResourcePath has already been discovered, get that
            // from pathMap
            if (pathMap.containsKey(baseResourceName)) {
                baseResourcePath = pathMap.get(baseResourceName);
            } else {
                // The baseResourcePath should be the schemaBasePath
                baseResourcePath = schemaBasePath;
            }

            // Read the resource as input stream
            String normalizedPath = getNormalizedPath(baseResourcePath, systemId);
            InputStream resourceAsStream = this.getClass().getClassLoader()
                    .getResourceAsStream(normalizedPath);

            // if the current resource is not in the same path with base
            // resource, add current resource's path to pathMap
            if (systemId.contains("/")) {
                pathMap.put(currentResourceName, normalizedPath.substring(0,normalizedPath.lastIndexOf("/")+1));
            } else {
                // The current resource should be at the same path as the base
                // resource
                pathMap.put(systemId, baseResourcePath);
            }
            Scanner s = new Scanner(resourceAsStream).useDelimiter("\\A");
            String s1 = s.next().replaceAll("\\n", " ") // the parser cannot understand elements broken down multiple lines e.g. (<xs:element \n name="buxing">)
                    .replace("\\t", " ") // these two about whitespaces is only for decoration
                    .replaceAll("\\s+", " ").replaceAll("[^\\x20-\\x7e]", ""); // some files has a special character as a first character indicating utf-8 file
            InputStream is = new ByteArrayInputStream(s1.getBytes());

            return new LSInputImpl(publicId, systemId, is); // same as Input class
        }

        // If this resource has already been added, do not add the same resource again. It throws
        // "org.xml.sax.SAXParseException: sch-props-correct.2: A schema cannot contain two global components with the same name; this schema contains two occurrences of ..."
        // return null instead.
        return null;
    }
    
    /**
     * Gets the normalized path.
     *
     * @param basePath the base path
     * @param relativePath the relative path
     * @return the normalized path
     */
    private String getNormalizedPath(String basePath, String relativePath){
        if(!relativePath.startsWith("../")){
            return basePath + relativePath;
        }
        else{
            while(relativePath.startsWith("../")){
                basePath = basePath.substring(0,basePath.substring(0, basePath.length()-1).lastIndexOf("/")+1);
                relativePath = relativePath.substring(3);
            }
            return basePath+relativePath;
        }
    }
}
burcakulug
  • 517
  • 6
  • 17
  • 1
    Thanks for sharing :-) I confirm that this has worked for us since first xsd import other xsd using relative paths like ../../otherSchema.xsd – A. Masson Nov 27 '14 at 16:35
  • 1
    the `Input` in this answer http://stackoverflow.com/a/2342859/2733462 is pretty much it, only thing I would recommend is to modify `public String getStringData()` in a way that it can handle reading large schema files, so it will be more resilient. – burcakulug Feb 12 '16 at 17:06
  • Premature end of file when it parses dependencies :( – nllsdfx Jun 27 '18 at 12:44
2

The accepted answer is very verbose, and builds a DOM in memory first, includes seems to work out of the box for me, including relative references.

    SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
    Schema schema = schemaFactory.newSchema(new File("../foo.xsd"));
    Validator validator = schema.newValidator();
    validator.validate(new StreamSource(new File("./foo.xml")));
teknopaul
  • 6,505
  • 2
  • 30
  • 24
1

All you need is to use StreamSource with 2-arg signature like this:

val schemaResource = Thread.currentThread().contextClassLoader.getResource("path/to/main.xsd") ?: throw RuntimeException("todo")
val schema = schemaFactory.newSchema(StreamSource(schemaResource.openStream(), schemaResource.toExternalForm()))
schema.newValidator().validate(source)
gdomo
  • 1,650
  • 1
  • 9
  • 18
0

For us the resolveResource looked like this. After some prolog exception and weird Element type "xs:schema" must be followed by either attribute specifications, ">" or "/>". Element type "xs:element" must be followed by either attribute specifications, ">" or "/>". (because of the breakdown of multiple lines)

The path history was needed because of the structure of includes

main.xsd (this has include "includes/subPart.xsd")
/includes/subPart.xsd (this has include "./subSubPart.xsd")
/includes/subSubPart.xsd

So the code looks like:

String pathHistory = "";

@Override
public LSInput resolveResource(String type, String namespaceURI, String publicId, String systemId, String baseURI) {
    systemId = systemId.replace("./", "");// we dont need this since getResourceAsStream cannot understand it
    InputStream resourceAsStream = Message.class.getClassLoader().getResourceAsStream(systemId);
    if (resourceAsStream == null) {
        resourceAsStream = Message.class.getClassLoader().getResourceAsStream(pathHistory + systemId);
    } else {
        pathHistory = getNormalizedPath(systemId);
    }
    Scanner s = new Scanner(resourceAsStream).useDelimiter("\\A");
    String s1 = s.next()
            .replaceAll("\\n"," ") //the parser cannot understand elements broken down multiple lines e.g. (<xs:element \n name="buxing">) 
            .replace("\\t", " ") //these two about whitespaces is only for decoration
            .replaceAll("\\s+", " ") 
            .replaceAll("[^\\x20-\\x7e]", ""); //some files has a special character as a first character indicating utf-8 file
    InputStream is = new ByteArrayInputStream(s1.getBytes());

    return new LSInputImpl(publicId, systemId, is);
}

private String getNormalizedPath(String baseURI) {
    return baseURI.substring(0, baseURI.lastIndexOf(System.getProperty("file.separator"))+ 1) ;
}
0

This thread was very useful for parsing complex xml schemas in multiple files .

I also had to add:

        SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
        factory.setFeature("http://apache.org/xml/features/honour-all-schemaLocations", true);

to handle multiple files with same targetnamespace.

M123
  • 11
  • 1
0

Me and a colleague were fighting this for quite some hours and nothing seemed to be working. We would be getting errors like

cvc-elt.1: Cannot find the declaration of element 'Login'.

The most important thing in our case was to use an URL as resource instead of an InputStream when setting the schema with the SchemaFactorys method newSchema(...). Our XSDs reside in the classpath so we use ClassPathResource to get the XSD files. See below XSDs + XML + Java example code. The XSDs were delivered to us as is.

The main XSD (Login.xsd):

<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:ns0="xmnls"
           xmlns:xs="http://www.w3.org/2001/XMLSchema"
           ns0:xs="http://www.w3.org/2001/XMLSchema"
           elementFormDefault="qualified"
           attributeFormDefault="unqualified">
   <xs:include schemaLocation="CustomTypes.xsd"/>
   <xs:element name="Login">      
      <xs:complexType>
         <xs:sequence>
            <xs:element name="Username" type="Char20">               
            </xs:element>            
         </xs:sequence>
      </xs:complexType>
   </xs:element>
</xs:schema>

The XSD (CustomTypes.xsd) being included from the main XSD (Login.xsd):

<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:ns0="xmnls"
           xmlns:xs="http://www.w3.org/2001/XMLSchema"
           xmlns:xjc="http://java.sun.com/xml/ns/jaxb/xjc"
           xmlns:jaxb="http://java.sun.com/xml/ns/jaxb"
           jaxb:extensionBindingPrefixes="xjc"
           jaxb:version="2.0"
           ns0:xs="http://www.w3.org/2001/XMLSchema"
           elementFormDefault="qualified"
           attributeFormDefault="unqualified">   
   <xs:simpleType name="Char20">      
      <xs:restriction base="xs:string">
         <xs:pattern value=".{1,20}">            
         </xs:pattern>
      </xs:restriction>
   </xs:simpleType>   
</xs:schema>

The XML to validate:

<?xml version="1.0" encoding="UTF-8"?>
<Login>
 <Username>This username exceeds 20 character</Username>
</Login>

The Java-code to validate the XML-string against the XSD:

class XsdXmlJaxbValidationExample {
     validateXml() {

          String loginXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
              + "<Login>"
              + "<Username>This username exceeds 20 character</Username>"
              + "</Login>";
 
          SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
          URL mainXsd = new ClassPathResource("Login.xsd").getURL();
          Schema mainSchema = schemaFactory.newSchema(mainXsd);
          Validator validator = mainSchema.newValidator();
          validator.validate(new StreamSource(new StringReader(loginXml)));

     }
 }
tjeerdnet
  • 344
  • 1
  • 6
  • 14
-2

If you wont find an element in xml you will get xml:lang exception. Elements are case sensitive

Ramakrishna
  • 1,170
  • 2
  • 10
  • 17
-4
SchemaFactory schemaFactory = SchemaFactory
                                .newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
Source schemaFile = new StreamSource(getClass().getClassLoader()
                                .getResourceAsStream("cars-fleet.xsd"));
Schema schema = schemaFactory.newSchema(schemaFile);
Validator validator = schema.newValidator();
StreamSource source = new StreamSource(xml);
validator.validate(source);
ncenerar
  • 1,517
  • 12
  • 25
valerian
  • 79
  • 1
  • 1