Schema aware processing with XSLT 1.0

I wrote some extension functions for Xalan-J XSLT (1.0) processor, to showcase how we can implement few of the Schema awareness features in XSLT 1.0 based applications. The examples are presented below. Using these techniques, some of the Schema awareness features of XSLT 2.0 can be simulated in an XSLT 1.0 environment.

[1] schmval.xsl

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                      xmlns:java="http://xml.apache.org/xalan/java"
                      xmlns:exslt="http://exslt.org/common"
                      exclude-result-prefixes="java exslt"
                      version="1.0">

<xsl:output method="xml" indent="yes" omit-xml-declaration="yes" />

<xsl:template match="/">
    <!-- inline XML Schema definition -->
    <xsl:variable name="schemaDoc" select="'
        &lt;xs:schema xmlns:xs=&quot;http://www.w3.org/2001/XMLSchema&quot;&gt;
            &lt;xs:element name=&quot;PEOPLE&quot;&gt;
                &lt;xs:complexType&gt;
                    &lt;xs:sequence&gt;
                        &lt;xs:element name=&quot;PERSON&quot; maxOccurs=&quot;unbounded&quot;&gt;
                            &lt;xs:complexType&gt;
                                &lt;xs:sequence&gt;
                                    &lt;xs:element name=&quot;FNAME&quot; type=&quot;xs:string&quot; /&gt;
                                    &lt;xs:element name=&quot;LNAME&quot; type=&quot;xs:string&quot; /&gt;
                                    &lt;xs:element name=&quot;DOB&quot; type=&quot;xs:date&quot; /&gt;
                                    &lt;xs:element name=&quot;SEX&quot;&gt;
                                        &lt;xs:simpleType&gt;
                                            &lt;xs:restriction base=&quot;xs:string&quot;&gt;
                                                &lt;xs:enumeration value=&quot;M&quot; /&gt;
                                                &lt;xs:enumeration value=&quot;F&quot; /&gt;
                                            &lt;/xs:restriction&gt;
                                        &lt;/xs:simpleType&gt;
                                    &lt;/xs:element&gt;
                                &lt;/xs:sequence&gt;
                            &lt;/xs:complexType&gt;
                        &lt;/xs:element&gt;
                    &lt;/xs:sequence&gt;
                &lt;/xs:complexType&gt;
            &lt;/xs:element&gt;
        &lt;/xs:schema&gt;'" />

    <xsl:variable name="xmlDoc">
        <PEOPLE>
            <PERSON>
                <FNAME>Mukul</FNAME>
                <LNAME>Gandhi</LNAME>
                <DOB>2006-01-02</DOB>
                <SEX>M</SEX>
            </PERSON>
        </PEOPLE>
    </xsl:variable>

    <xsl:variable name="validationResult" select="java:SchemaExt.isInstanceValid(exslt:node-set($xmlDoc), normalize-space($schemaDoc), 'false')" />

    <xsl:choose>
        <xsl:when test="$validationResult = 'true'">
            <xsl:copy-of select="$xmlDoc" />
        </xsl:when>
        <xsl:otherwise>
            Invalid content generated from the stylesheet<xsl:text>&#xa;</xsl:text>
            <xsl:value-of select="$validationResult" />
        </xsl:otherwise>
    </xsl:choose>

</xsl:template>

</xsl:stylesheet>

The above stylesheet:

1) Invokes a Java extension function ('isInstanceValid'), which validates a XML fragment using an inline Schema. The XML fragment is generated by the stylesheet (in this example, stored in a variable, 'xmlDoc').

2) In this example, if the XML fragment is valid corresponding to the given Schema, some action is taken (like generating some content as an output). If the XML fragment is invalid, an error message is generated.

The Java extension class is:

SchemaExt.java 

import javax.xml.XMLConstants;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import javax.xml.transform.stream.StreamSource;
import javax.xml.transform.dom.DOMSource;
import org.xml.sax.SAXException;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import java.io.StringReader;
import java.io.IOException;

public class SchemaExt {

    private static boolean valid = true;

    public static boolean isSchemaValid(String schemaDoc) {
        try {
            SchemaFactory sFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
            Schema schema = sFactory.newSchema(new StreamSource(new StringReader(schemaDoc)));
            return true;
        }
        catch(SAXException ex) {
            return false;
        }
    }

    public static String isInstanceValid(NodeList nodeList, String schemaDoc, String isUri) {
        try {
            Node node = nodeList.item(0);

            if (node != null) {
                SchemaFactory sFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
                Schema schema = null;

                if (isUri.equals("true")) {
                    schema = sFactory.newSchema(new StreamSource(schemaDoc));
                }
                else {
                    schema = sFactory.newSchema(new StreamSource(new StringReader(schemaDoc)));
                }

                Validator validator = schema.newValidator();
                MyErrorHandler errorHandler = new MyErrorHandler();
                validator.setErrorHandler(errorHandler);
                validator.validate(new DOMSource(node));

                if (errorHandler.isValid()) {
                    return "true";
                }
                else {
                    return errorHandler.getErrorMessage();
                }
            }
        }
        catch(SAXException ex) {
            ex.printStackTrace();
        }
        catch(IOException ex) {
            ex.printStackTrace();
        }
        catch(Exception ex) {
            ex.printStackTrace();
        }

        return "Not a valid result tree fragment to process";
    }

}

The Error handler Java class (MyErrorHandler.java) is:

import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

class MyErrorHandler implements ErrorHandler {

    private boolean valid = true;
    private String errorMessage = "";

    public void fatalError(SAXParseException e) throws SAXException {
        valid = false;
        errorMessage = e.getMessage();
    }

    public void error(SAXParseException e) throws SAXException {
        valid = false;
        errorMessage = e.getMessage();
    }

    public void warning(SAXParseException e) throws SAXException {
        // noop
    }

    public boolean isValid() {
        return valid;
    }

    public String getErrorMessage() {
        return errorMessage;
    }

}

Output from the stylesheet:

1) When XML fragment is valid

java org.apache.xalan.xslt.Process -in schmval.xsl -xsl schmval.xsl

<PEOPLE>
    <PERSON>
        <
FNAME>Mukul</FNAME>
        <
LNAME>Gandhi</LNAME>
        <DOB>2006-01-02</DOB>
        <SEX>M</SEX>
    </PERSON>
</PEOPLE>

2) When XML fragment is invalid

java org.apache.xalan.xslt.Process -in schmval.xsl -xsl schmval.xsl

Invalid content generated from the stylesheet
cvc-type.3.1.3: The value '006-01-02' of element 'DOB' is not valid.

Now let's consider two more stylesheet examples, which showcase, how an XML Schema can be supplied for validation, as an URI (and not inline, as in the previous example).

[2] schmvalUri1.xsl

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                      xmlns:java="http://xml.apache.org/xalan/java"
                      xmlns:exslt="http://exslt.org/common"
                      exclude-result-prefixes="java exslt"
                      version="1.0">

    <xsl:output method="xml" indent="yes" omit-xml-declaration="yes" />

    <!-- URI reference to the XML Schema -->
    <xsl:variable name="schemaURI" select="'personSchema.xsd'" />

    <xsl:template match="/">
        <xsl:variable name="xmlDoc">
            <PEOPLE>
                <PERSON>
                    <FNAME>Mukul</FNAME>
                    <LNAME>Gandhi</LNAME>
                    <DOB>2006-01-02</DOB>
                    <SEX>M</SEX>
                </PERSON>
            </PEOPLE>
        </xsl:variable>

        <xsl:variable name="validationResult" select="java:SchemaExt.isInstanceValid(exslt:node-set($xmlDoc), $schemaURI, 'true')" />

        <xsl:choose>
            <xsl:when test="$validationResult = 'true'">
                <xsl:copy-of select="$xmlDoc" />
            </xsl:when>
            <xsl:otherwise>
                Invalid content generated from the stylesheet<xsl:text>&#xa;</xsl:text>
                <xsl:value-of select="$validationResult" />
            </xsl:otherwise>
        </xsl:choose>

</xsl:template>

</xsl:stylesheet>

The Schema definition file is:

personSchema.xsd

<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">

    <xs:element name="PEOPLE">
        <xs:complexType>
            <xs:sequence>
                <xs:element name="PERSON" maxOccurs="unbounded">
                    <xs:complexType>
                        <xs:sequence>
                            <xs:element name="FNAME" type="xs:string" />
                            <xs:element name="LNAME" type="xs:string" />
                            <xs:element name="DOB" type="xs:date" />
                            <xs:element name="SEX">
                                <xs:simpleType>
                                    <xs:restriction base="xs:string">
                                        <xs:enumeration value="M" />
                                        <xs:enumeration value="F" />
                                    </xs:restriction>
                                </xs:simpleType>
                            </xs:element>
                        </xs:sequence>
                    </xs:complexType>
                </xs:element>
            </xs:sequence>
        </xs:complexType>
    </xs:element>

</xs:schema>

The stylesheet, [2] produces the same output as stylesheet, [1]. Except that, now the Schema is stored in a physical file, and accessible in the stylesheet as a URI reference.

[3] schmvalUri2.xsl

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                      xmlns:java="http://xml.apache.org/xalan/java"
                      xmlns:exslt="http://exslt.org/common"
                      exclude-result-prefixes="java exslt"
                      version="1.0">

    <xsl:output method="xml" indent="yes" omit-xml-declaration="yes" />

    <xsl:variable name="schemaURI" select="'personSchema.xsd'" />

    <xsl:template match="/">
        <xsl:variable name="xmlDoc">
            <xsl:copy-of select="*" />
        </xsl:variable>
       
        <xsl:variable name="validationResult" select="java:SchemaExt.isInstanceValid(exslt:node-set($xmlDoc), $schemaURI, 'true')" />

        <xsl:choose>
            <xsl:when test="$validationResult = 'true'">
                <xsl:copy-of select="$xmlDoc" />
            </xsl:when>
            <xsl:otherwise>
                Invalid content generated from the stylesheet<xsl:text>&#xa;</xsl:text>
                <xsl:value-of select="$validationResult" />
            </xsl:otherwise>
        </xsl:choose>

    </xsl:template>

</xsl:stylesheet>

The above stylesheet ([3]) is similar to stylesheet, [2], except that, here the XML content is read dynamically from an external XML file (marked with, bold).

I hope that this write-up is useful.


Home


Last Updated: Dec 31, 2008