/* * $Id$ * * Copyright (C) INRIA, 2010 * * Modifications to the initial code base are copyright of their * respective authors, or their employers as appropriate. Authorship * of the modifications may be determined from the ChangeLog placed at * the end of this file. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA. */ package org.ontologymatching.book; // Alignment API classes import org.semanticweb.owl.align.Alignment; import org.semanticweb.owl.align.AlignmentException; import org.semanticweb.owl.align.AlignmentProcess; import org.semanticweb.owl.align.AlignmentVisitor; // Alignment API implementation classes import fr.inrialpes.exmo.align.impl.BasicParameters; import fr.inrialpes.exmo.align.impl.ObjectAlignment; import fr.inrialpes.exmo.align.impl.URIAlignment; import fr.inrialpes.exmo.align.impl.BasicAlignment; import fr.inrialpes.exmo.align.impl.DistanceAlignment; import fr.inrialpes.exmo.align.impl.InstanceBasedMatrixMeasure; import fr.inrialpes.exmo.align.impl.method.StringDistAlignment; import fr.inrialpes.exmo.align.impl.renderer.OWLAxiomsRendererVisitor; import fr.inrialpes.exmo.align.util.NullStream; import fr.inrialpes.exmo.align.parser.AlignmentParser; import fr.inrialpes.exmo.ontowrap.OntologyFactory; import fr.inrialpes.exmo.ontowrap.HeavyLoadedOntology; import fr.inrialpes.exmo.ontowrap.LoadedOntology; import fr.inrialpes.exmo.ontowrap.OntowrapException; import fr.inrialpes.exmo.ontosim.string.StringDistances; // OWL API import org.semanticweb.owlapi.model.IRI; import org.semanticweb.owlapi.model.OWLOntologyManager; import org.semanticweb.owlapi.model.OWLOntology; import org.semanticweb.owlapi.model.OWLOntologyCreationException; import org.semanticweb.owlapi.model.OWLClass; import org.semanticweb.owlapi.model.OWLDataProperty; import org.semanticweb.owlapi.model.OWLDataPropertyExpression; import org.semanticweb.owlapi.model.OWLLiteral; import org.semanticweb.owlapi.model.OWLClassExpression; import org.semanticweb.owlapi.model.OWLNamedIndividual; import org.semanticweb.owlapi.model.OWLIndividual; import org.semanticweb.owlapi.apibinding.OWLManager; import org.semanticweb.owlapi.reasoner.OWLReasoner; // SAX standard classes import org.xml.sax.SAXException; // DOM Standard classes import org.w3c.dom.Document; import org.w3c.dom.NodeList; import javax.xml.transform.TransformerFactory; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.OutputKeys; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPathFactory; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathConstants; // Java standard classes import java.lang.Character; import java.util.Set; import java.util.Properties; import java.util.Vector; import java.io.PrintWriter; import java.io.StringWriter; import java.io.BufferedWriter; import java.io.OutputStreamWriter; import java.io.BufferedReader; import java.io.InputStreamReader; import java.io.InputStream; import java.io.FileInputStream; import java.io.PrintStream; import java.io.FileNotFoundException; import java.io.ByteArrayInputStream; import java.io.StringReader; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.io.File; import java.io.FileWriter; import java.net.URI; import java.net.URL; import java.net.MalformedURLException; import java.net.URISyntaxException; /** * InstanceBasedAlignment * * This should be put in a generic instance-based matcher in which * - a distance between instances is computed and stored in the table * - the classical distance between classes is computed from the distance between * instances through classical measure (linkage, etc.). * - the extraction is as usual (both for classes and instances) * */ public class InstanceBasedAlignment extends DistanceAlignment { // implements AlignmentProcess public boolean substring = false; HeavyLoadedOntology ontology1 = null; HeavyLoadedOntology ontology2 = null; // JE 2010: documentation // Similarity has the process: // initialize( o1, o2, al ) // compute( prop) // This is what must be done with a new kind of measure protected class OMBookDistanceMatrix extends InstanceBasedMatrixMeasure { Vector nametab1 = null; Vector nametab2 = null; public void initialize( LoadedOntology onto1, LoadedOntology onto2, Alignment align ) { // create the matrices and all structures super.initialize( onto1, onto2, align ); try { // It should be tested that we are in OWLOntologies... // And this is only because we are dealing with instances and we must get the value of attributes if ( !(onto1.getOntology() instanceof OWLOntology) || !(onto2.getOntology() instanceof OWLOntology) ) throw new AlignmentException( "OMBookDistanceMatrix requires OWL API 3.0 or above"); HeavyLoadedOntology ontology1 = (HeavyLoadedOntology)onto1; HeavyLoadedOntology ontology2 = (HeavyLoadedOntology)onto2; OWLOntologyManager manager1 = ((OWLOntology)ontology1.getOntology()).getOWLOntologyManager(); OWLOntologyManager manager2 = ((OWLOntology)ontology2.getOntology()).getOWLOntologyManager(); // Normalise instance comparators OWLDataProperty firstAtt = manager1.getOWLDataFactory().getOWLDataProperty( IRI.create( "http://book.ontologymatching.org/exercise/admin.owl#firstname" ) ); OWLDataProperty lastAtt = manager1.getOWLDataFactory().getOWLDataProperty( IRI.create( "http://book.ontologymatching.org/exercise/admin.owl#lastname" ) ); nametab1 = new Vector(nbind1); for( Object in1 : ontology1.getIndividuals() ) { if ( in1 instanceof OWLNamedIndividual ) { String name; if ( substring ) { name = getValue( (OWLNamedIndividual)in1, lastAtt, (OWLOntology)ontology1.getOntology() ); } else { String first = getValue( (OWLNamedIndividual)in1, firstAtt, (OWLOntology)ontology1.getOntology() ); String last = normalizeString1( getValue( (OWLNamedIndividual)in1, lastAtt, (OWLOntology)ontology1.getOntology() ) ); name = first+" "+last; } nametab1.add( indlist1.get( in1 ).intValue(), name ); //System.err.println( ontology1.getEntityName( in1 )+" : "+name ); } } OWLDataProperty nameAtt = manager2.getOWLDataFactory().getOWLDataProperty( IRI.create( "http://book.ontologymatching.org/exercise/lab.owl#name" ) ); nametab2 = new Vector(nbind2); for( Object in2 : ontology2.getIndividuals() ) { if ( in2 instanceof OWLNamedIndividual ) { String name; if ( substring ) { name = getValue( (OWLNamedIndividual)in2, nameAtt, (OWLOntology)ontology2.getOntology() ); } else { name = normalizeString2( getValue( (OWLNamedIndividual)in2, nameAtt, (OWLOntology)ontology2.getOntology() ) ); } nametab2.add( indlist2.get( in2 ).intValue(), name ); //System.err.println( ontology2.getEntityName( in2 )+" : "+name ); } } } catch (OntowrapException owex) { owex.printStackTrace(); } catch (AlignmentException alex) { alex.printStackTrace(); } } public void computeInstanceDistance( Properties params ) { StringDistances dist = new StringDistances(); for ( int i=0; i < nbind1; i++ ) { for ( int j=0; j < nbind2; j++ ) { if ( substring ) { indmatrix[i][j] = dist.subStringDistance( nametab1.get(i), nametab2.get(j) ); } else { indmatrix[i][j] = dist.levenshteinDistance( nametab1.get(i), nametab2.get(j) ); } } } } } /** Creation **/ public InstanceBasedAlignment() { setSimilarity( new OMBookDistanceMatrix() ); setType("**"); } /** Processing */ public void align( Alignment alignment, Properties params ) throws AlignmentException { // Get function from params (if we need some like substring) String im = params.getProperty("imeasure"); if ( im != null && im.equals("substring") ) substring = true; String cm = params.getProperty("cmeasure"); // JE2010: Strange: why is it not equivalent to call // super.align( alignment, params ) // Load initial alignment loadInit( alignment ); // Initialize matrix getSimilarity().initialize( ontology1(), ontology2(), alignment ); // Compute similarity/dissimilarity getSimilarity().compute( params ); // Print matrix if asked params.setProperty( "algName", getClass()+"/"+im ); if ( params.getProperty("printMatrix") != null ) printDistanceMatrix( params ); // Extract alignment extract( type, params ); } public String normalizeString2( String s ) { String sp = s.trim(); String result =""; int beginWord = 0; int pos = 0; int len = sp.length(); while ( pos < len ) { char c = sp.charAt( pos ); if ( c == ' ' ) { // acronym for the previous word result += upperCharAt( sp, beginWord )+" "; beginWord = pos+1; } else if ( c == '/' || c == ':' || c == '_' || c == '\\' || c == '+' || c == '&' || c == '.' || c == '-' ) { while ( c == '/' || c == ':' || c == '_' || c == '\\' || c == '+' || c == '&' || c == '.' || c == '-' ) pos++; if ( pos > 0 ) result += upperCharAt( sp, beginWord )+" "; beginWord = pos+1; } else if ( pos != 0 && Character.isUpperCase( c ) && Character.isLowerCase( sp.charAt( pos-1 ) ) ) { // add a space if connected munusMajus result += upperCharAt( sp, beginWord )+sp.substring(beginWord+1,pos)+" "; beginWord = pos; } pos++; } result += sp.substring( beginWord, pos ); //System.err.println( "* "+result+" *" ); return result; } public String normalizeString1( String s ) { String sp = s.trim(); String result =""; int beginWord = 0; int pos = 0; int len = sp.length(); while ( pos < len ) { char c = sp.charAt( pos ); if ( c == ' ' ) { // acronym for the previous word result += upperCharAt( sp, beginWord )+sp.substring(beginWord+1,pos)+" "; beginWord = pos+1; } else if ( c == '/' || c == ':' || c == '_' || c == '\\' || c == '+' || c == '&' || c == '.' || c == '-' ) { while ( c == '/' || c == ':' || c == '_' || c == '\\' || c == '+' || c == '&' || c == '.' || c == '-' ) pos++; if ( pos > 0 ) result += upperCharAt( sp, beginWord )+sp.substring(beginWord+1,pos)+" "; beginWord = pos+1; } else if ( pos != 0 && Character.isUpperCase( c ) && Character.isLowerCase( sp.charAt( pos-1 ) ) ) { // add a space if connected munusMajus result += upperCharAt( sp, beginWord )+sp.substring(beginWord+1,pos)+" "; beginWord = pos; } pos++; } result += sp.substring( beginWord, pos ); //System.err.println( "* "+result+" *" ); return result; } public char upperCharAt( String s, int pos ) { char c = s.charAt( pos ); if ( Character.isLowerCase( c ) ) return Character.toUpperCase( c ); else return c; } public String getValue( OWLIndividual ind, OWLDataPropertyExpression p, OWLOntology o ) { Set result = ind.getDataPropertyValues( p, o); if ( result.isEmpty() ) return null; else return result.iterator().next().getLiteral(); } static LoadedOntology loadOntology( URI ref ) throws AlignmentException { OntologyFactory factory = OntologyFactory.getFactory(); try { return factory.loadOntology( ref ); } catch ( OntowrapException owex ) { throw new AlignmentException( "Cannot load ontology "+ref, owex ); } } }