Merging XML Documents

1. Merging XML documents

The following question was asked on XSL-List.

I have the source XML that looks like this:

<employee>
  <name>
    <first>Bob</first>
  </name>
</employee>

I have another XML (updates.xml) that contains information about how to update the above source. Notice that this updates.xml is dynamically generated, and its contents vary.

<updates>
  <elem xpath="/employee/address/country" xvalue="USA" />
  <elem xpath="/employee/name/first" xvalue="Bill" />
</updates>

I want to write an XSLT that reads information from updates.xml, and updates source.xml based on these criteria:
- if xpath in updates.xml exist in source.xml, replace source xml node with xvalue;
- otherwise, create xml node in source(recursively if necessary), with xvalue defined in updates.xml;

Basically here is the result XML that I need:

<employee>
  <name>
    <first>Bill</first>
  </name>
  <address>
    <country>USA</country>
  </address>
</employee>

The stylesheet for this problem is:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                                            xmlns:exsl="http://exslt.org/common"
                                            exclude-result-prefixes="exsl">
  
   <xsl:output method="xml" indent="yes" />
  
   <xsl:variable name="updates" select="document('updates.xml')" />
  
   <!-- store XPath values of all elements, of source.xml in a variable -->
   <xsl:variable name="all-xpaths">    
     <xsl:for-each select="//*">
       <xpath>
          <xsl:call-template name="constructXPathExpr">
             <xsl:with-param name="node" select="." />
             <xsl:with-param name="xpath" select="name(.)" />
          </xsl:call-template>
        </xpath>  
     </xsl:for-each>
   </xsl:variable>
  
   <!-- a template rule that will match to any element node -->
   <xsl:template match="*">    
      <xsl:variable name="curr-node" select="." />
      <xsl:variable name="xpath-expr">
        <xsl:call-template name="constructXPathExpr">
           <xsl:with-param name="node" select="." />
           <xsl:with-param name="xpath" select="name(.)" />
        </xsl:call-template>
     </xsl:variable>
     <xsl:element name="{name()}">       
          <xsl:choose>
            <xsl:when test="$xpath-expr = $updates/updates/elem/@xpath">
              <xsl:value-of select="$updates/updates/elem[@xpath = $xpath-expr]/@xvalue" />          
            </xsl:when> 
            <xsl:when test="not(*)">         
              <xsl:value-of select="text()" />               
            </xsl:when>
            <xsl:otherwise>
              <!-- code to create xml node in source -->
              <xsl:for-each select="$updates/updates/elem">
                   <xsl:if test="not(@xpath = exsl:node-set($all-xpaths)/xpath)">
                        <xsl:variable name="temp" select="." />                           
                        <xsl:variable name="check">
                           <xsl:for-each select="$curr-node//*">
                              <xsl:variable name="expr">
                                  <xsl:call-template name="constructXPathExpr">
                                      <xsl:with-param name="node" select="." />
                                      <xsl:with-param name="xpath" select="name(.)" />
                                  </xsl:call-template>
                             </xsl:variable>
                             <xsl:if test="starts-with($temp/@xpath,$expr)">
                                1
                             </xsl:if>
                       </xsl:for-each>
                  </xsl:variable>          
                  <xsl:if test="not(contains($check,'1')) and (substring-after(substring-after(@xpath,$xpath-expr),'/') != '')">          
                         <xsl:call-template name="constructXmlFragment">
                             <xsl:with-param name="path" select="substring-after(substring-after(@xpath,$xpath-expr),'/')" />
                             <xsl:with-param name="value" select="@xvalue" />
                          </xsl:call-template>
                  </xsl:if>
              </xsl:if>    
              </xsl:for-each>           
           </xsl:otherwise>
         </xsl:choose>       

         <xsl:apply-templates select="*" />           

     </xsl:element>    
   </xsl:template>

   <!-- a template to construct an XPath expression, for a given node -->
   <xsl:template name="constructXPathExpr">
     <xsl:param name="node" />
     <xsl:param name="xpath" />
    
     <xsl:choose>      
       <xsl:when test="$node/parent::*">
         <xsl:call-template name="constructXPathExpr">
            <xsl:with-param name="node" select="$node/parent::*" />
            <xsl:with-param name="xpath" select="concat(name($node/parent::*),'/',$xpath)" />
         </xsl:call-template>
       </xsl:when>
       <xsl:otherwise>
         <xsl:value-of select="concat('/',$xpath)" />
       </xsl:otherwise>
     </xsl:choose>
   </xsl:template>
  
   <!-- a template to generate a XML fragment -->
   <xsl:template name="constructXmlFragment">
     <xsl:param name="path" />
     <xsl:param name="value" />
    
     <xsl:choose>
       <xsl:when test="contains($path,'/')">
         <xsl:element name="{substring-before($path,'/')}">          
             <xsl:call-template name="constructXmlFragment">
               <xsl:with-param name="path" select="substring-after($path,'/')" />
               <xsl:with-param name="value" select="$value" />
             </xsl:call-template>        
         </xsl:element>
       </xsl:when>
       <xsl:otherwise>        
         <xsl:element name="{$path}">
            <xsl:value-of select="$value" />
         </xsl:element>
       </xsl:otherwise>
     </xsl:choose>
   </xsl:template>
  
</xsl:stylesheet>

Assume, that this stylesheet is named as main.xsl

source.xml is:

<?xml version="1.0" encoding="UTF-8"?>
<employee>
  <name>
    <first>Bob</first>   
  </name> 
</employee>

updates.xml is:

<?xml version="1.0" encoding="UTF-8"?>
<updates> 
  <elem xpath="/employee/address/country" xvalue="USA" />
  <elem xpath="/employee/name/first" xvalue="Bill" />
</updates>

When the XSLT transformation is run as (using Saxon 8.1.1):

java net.sf.saxon.Transform source.xml main.xsl

The output received is:

<?xml version="1.0" encoding="UTF-8"?>
<employee>
   <address>
      <country>USA</country>
   </address>
   <name>
      <first>Bill</first>
   </name>
</employee>

Following is another test case.

If source.xml is same. While updates.xml is:

<?xml version="1.0" encoding="UTF-8"?>
<updates>
  <elem xpath="/employee/name/first" xvalue="Mukul" />
  <elem xpath="/employee/name/last" xvalue="Gandhi" />
  <elem xpath="/employee/address/country" xvalue="India" />
</updates>

The output received is:

<?xml version="1.0" encoding="UTF-8"?>
<employee>
    <address>
         <country>India</country>
    </address>
    <name>
        <last>Gandhi</last>
        <first>Mukul</first>
    </name>
</employee>

2. Merging XML documents

The following question was asked on XSL-List,

How can I combine two XML files into one assuming that I have the same structure in both files like this:

(The first file)

<bookshelf>
 <book><title>1st Book</title>
 <book><title>2nd Book</title>
 <book><title>3rd Book</title>
<bookshelf>

(The second file)

<bookshelf>
 <book><title>4th Book</title>
 <book><title>5th Book</title>
 <book><title>6th Book</title>
<bookshelf>


I would like to have the following file:

<bookshelf>
 <book><title>1st Book</title>
 <book><title>2nd Book</title>
 <book><title>3rd Book</title>
 <book><title>4th Book</title>
 <book><title>5th Book</title>
 <book><title>6th Book</title>
<bookshelf>

The stylesheet for this problem is:

<?xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">

<xsl:output method="xml" indent="yes" />

<xsl:variable name="file2" select="document('file2.xml')" />

<xsl:template match="/bookshelf">
  <bookshelf>
    <xsl:copy-of select="*" />
    <xsl:copy-of select="$file2/bookshelf/*" />
  </bookshelf>
</xsl:template>

</xsl:stylesheet>

Charles Knell provided following answer:

-- no sorting --
<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output method="xml" indent="yes" encoding="UTF-8" />

 <xsl:strip-space elements="*" />
 <xsl:variable name="kkoch3a" select="document('kkoch3a.xml')" />

 <xsl:template match="/">
   <xsl:apply-templates />
 </xsl:template>

 <xsl:template match="bookshelf">
   <bookshelf>
     <xsl:apply-templates select="$kkoch3a/bookshelf/book" />
     <xsl:apply-templates />
   </bookshelf>
 </xsl:template>

 <xsl:template match="book">
   <xsl:copy-of select="." />
 </xsl:template>

</xsl:stylesheet>

-- sorting --
<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output method="xml" indent="yes" encoding="UTF-8" />
 <xsl:strip-space elements="*" />
 <xsl:variable name="kkoch3a" select="document('kkoch3a.xml')" />
 <xsl:variable name="kkoch3b" select="document('kkoch3b.xml')" />

 <xsl:template match="/">
   <xsl:call-template name="sort-books" />
 </xsl:template>

 <xsl:template name="sort-books">
   <bookshelf>
     <xsl:for-each select="$kkoch3a/bookshelf/book | $kkoch3b/bookshelf/book">
       <xsl:sort select="title" />
       <xsl:apply-templates select="." />
     </xsl:for-each>
   </bookshelf>
 </xsl:template>

 <xsl:template match="book">
   <xsl:copy-of select="." />
 </xsl:template>

</xsl:stylesheet>