From d9aa9b48434b90dbaf05bdefd400b065d12799ef Mon Sep 17 00:00:00 2001 From: Marko Luukkainen Date: Mon, 22 Oct 2018 12:19:25 +0300 Subject: [PATCH] Improved attribute namespace handling + element characters data parsing gitlab #3 Change-Id: Ia5a27bfb304b863b9e8ba30a968fd3558fd5d8b6 --- .../simantics/xml/data/XmlDataConverter.java | 264 +++++++++++++----- 1 file changed, 195 insertions(+), 69 deletions(-) diff --git a/org.simantics.xml.sax/src/org/simantics/xml/data/XmlDataConverter.java b/org.simantics.xml.sax/src/org/simantics/xml/data/XmlDataConverter.java index 7a2a5c5..83cd7af 100644 --- a/org.simantics.xml.sax/src/org/simantics/xml/data/XmlDataConverter.java +++ b/org.simantics.xml.sax/src/org/simantics/xml/data/XmlDataConverter.java @@ -37,8 +37,11 @@ import org.w3._2001.xmlschema.ExplicitGroup; import org.w3._2001.xmlschema.Import; import org.w3._2001.xmlschema.LocalComplexType; import org.w3._2001.xmlschema.LocalElement; +import org.w3._2001.xmlschema.LocalSimpleType; import org.w3._2001.xmlschema.OpenAttrs; +import org.w3._2001.xmlschema.Restriction; import org.w3._2001.xmlschema.Schema; +import org.w3._2001.xmlschema.SimpleType; import org.w3._2001.xmlschema.TopLevelElement; /** @@ -56,6 +59,9 @@ public class XmlDataConverter { String pluginName; + private boolean useElementNSforAttributes = true; // If true, attributes with undefined namespace are written to the same name space as teh element. If false. the attributes are written to the root namespace. + private boolean ignoreAttributeNS = true; // Completely ignore attribute namespaces. When true, all attributes are written to the elements. + private String[] header; public XmlDataConverter(List inputFiles, File conversionFile, File outputPlugin) { @@ -146,6 +152,7 @@ public class XmlDataConverter { Map> elementMap = new HashMap<>(); Map elementNsMap = new HashMap<>(); Map> attributeMap = new HashMap<>(); + String defaultNS; protected void doConvert() throws IOException, XMLStreamException, JAXBException { XMLInputFactory input = XMLInputFactory.newInstance(); @@ -164,7 +171,7 @@ public class XmlDataConverter { XMLEvent event = reader.nextEvent(); if (event.isStartElement()) { StartElement parseElement = event.asStartElement(); -// System.out.println("Start " + parseElement.getName()); + Element schemaElement = null; String currentNS = parseElement.getName().getNamespaceURI(); Schema s = schemaMap.get(currentNS); @@ -174,6 +181,7 @@ public class XmlDataConverter { } else { schemaElement = elementMap.get(s).get(elementName); } + Element parentElement = elementStack.peek(); boolean newElement = false; @@ -187,6 +195,8 @@ public class XmlDataConverter { addSchemaDependency(ps, s); } + } else { + defaultNS = currentNS; } if (schemaElement == null) { if (elementStack.isEmpty()) { @@ -204,7 +214,7 @@ public class XmlDataConverter { newElement = true; } if (parentElement != null) { - ComplexType complexType = parentElement.getComplexType(); + ComplexType complexType = getOrCreateComplexType(parentElement); ExplicitGroup choice = complexType.getChoice(); if (choice == null) { choice = new ExplicitGroup(); @@ -226,42 +236,65 @@ public class XmlDataConverter { // System.out.println("Attribute " + attribute.getName() + " " + attribute.getValue()); // } if (newElement) { - LocalComplexType complexType = new LocalComplexType(); - schemaElement.setComplexType(complexType); attributeIterator = parseElement.getAttributes(); - while (attributeIterator.hasNext()) { - Attribute attribute = attributeIterator.next(); - if ("http://www.w3.org/XML/1998/namespace".equals(attribute.getName().getNamespaceURI())) - continue; - addAttribute(attribute, complexType, currentNS); + if (attributeIterator.hasNext()) { + ComplexType complexType = getOrCreateComplexType(schemaElement); + while (attributeIterator.hasNext()) { + Attribute attribute = attributeIterator.next(); + if ("http://www.w3.org/XML/1998/namespace".equals(attribute.getName().getNamespaceURI())) + continue; + addAttribute(attribute, complexType, currentNS); + } } } else { - LocalComplexType complexType = schemaElement.getComplexType(); + ComplexType complexType = schemaElement.getComplexType(); attributeIterator = parseElement.getAttributes(); - Map currentAttributes = new HashMap<>(); - Iterator currentAttributeIterator = complexType.getAttributeOrAttributeGroup().iterator(); - while (currentAttributeIterator.hasNext()) { - Annotated annotated = currentAttributeIterator.next(); - if (annotated instanceof org.w3._2001.xmlschema.Attribute) { - org.w3._2001.xmlschema.Attribute schemaAttribute = (org.w3._2001.xmlschema.Attribute)annotated; - String n = schemaAttribute.getName(); - if (n != null) - currentAttributes.put(n, schemaAttribute); + if (complexType != null || attributeIterator.hasNext()) { + complexType = getOrCreateComplexType(schemaElement); + Map currentAttributes = new HashMap<>(); + Iterator currentAttributeIterator = complexType.getAttributeOrAttributeGroup().iterator(); + while (currentAttributeIterator.hasNext()) { + Annotated annotated = currentAttributeIterator.next(); + if (annotated instanceof org.w3._2001.xmlschema.Attribute) { + org.w3._2001.xmlschema.Attribute localAttribute = (org.w3._2001.xmlschema.Attribute)annotated; + String n = localAttribute.getName(); + if (n != null) + currentAttributes.put(n, localAttribute); + else if (localAttribute.getRef() != null) { + currentAttributes.put(localAttribute.getRef().getLocalPart(), localAttribute); + } + } } - } - while (attributeIterator.hasNext()) { - Attribute attribute = attributeIterator.next(); - if ("http://www.w3.org/XML/1998/namespace".equals(attribute.getName().getNamespaceURI())) - continue; - org.w3._2001.xmlschema.Attribute schemaAttribute = currentAttributes.get(attribute.getName().getLocalPart()); - if (schemaAttribute == null) { - addAttribute(attribute, complexType, currentNS); - } else { - QName newType = getType(attribute.getValue()); - updateAttributeType(schemaAttribute, newType); + while (attributeIterator.hasNext()) { + Attribute attribute = attributeIterator.next(); + if ("http://www.w3.org/XML/1998/namespace".equals(attribute.getName().getNamespaceURI())) + continue; + org.w3._2001.xmlschema.Attribute localAttribute = currentAttributes.get(attribute.getName().getLocalPart()); + if (localAttribute == null) { + addAttribute(attribute, complexType, currentNS); + } else { + QName newType = getType(attribute.getValue()); + org.w3._2001.xmlschema.Attribute schemaAttribute = updateAttributeType(localAttribute, newType); + + String attrNs = getNS(attribute, currentNS); + if (!ignoreAttributeNS && attribute.getName().getNamespaceURI().length() > 0) { + // Attribute has explicit ns definition. + if (localAttribute.getRef() != null) { + // current local attribute is reference, check that the namespaces match + if (!localAttribute.getRef().getNamespaceURI().equals(attrNs)) + throw new RuntimeException("Conflicting namespaces for attribute " + attribute.getName().getLocalPart() + " " + attrNs + " " + localAttribute.getRef().getNamespaceURI()); + } else if (!attrNs.equals(currentNS)){ + // move the attribute to explicit namespace. + complexType.getAttributeOrAttributeGroup().remove(localAttribute); + org.w3._2001.xmlschema.Attribute scAttribute = addAttribute(attribute, complexType, currentNS); + scAttribute.setType(schemaAttribute.getType()); + } + + } + } + } - } } @@ -279,29 +312,59 @@ public class XmlDataConverter { } else if (event.isCharacters()) { Characters characters = event.asCharacters(); -// if (!characters.isWhiteSpace()) -// System.out.println(characters.getData()); + Element element = elementStack.peek(); + if (element != null) { + String text = characters.getData().trim(); + if (text.length() > 0) { + setElementCharactersData(element, text, characters); + } + } } else if (event.isNamespace()) { } } } - private void updateAttributeType(org.w3._2001.xmlschema.Attribute schemaAttribute, QName newType) { + private void setElementCharactersData(Element element, String text, Characters characters) { - QName currentType = schemaAttribute.getType(); - if (!newType.getLocalPart().equals(currentType.getLocalPart())) { - - - if (currentType.getLocalPart().equals("integer") && newType.getLocalPart().equals("double")) { - // change integer to double - schemaAttribute.setType(newType); - } else if (currentType.getLocalPart().equals("double") && newType.getLocalPart().equals("integer")) { - // nothing to do, integer can be parsed as double - } else if (!currentType.getLocalPart().equals("string")){ - schemaAttribute.setType(new QName(SchemaConversionBase.SCHEMA_NS, "string")); + //System.out.println(element.getName() + " " + characters.getData()); + if (element.getComplexType() != null) + element.getComplexType().setMixed(true); + else { + SimpleType simpleType = getOrCreateSimpleType(element); + QName type = getType(text); + Restriction restriction = simpleType.getRestriction(); + if (restriction == null) { + restriction = new Restriction(); + restriction.setBase(type); + simpleType.setRestriction(restriction); + } else { + restriction.setBase(mergePrimitiveType(restriction.getBase(), type)); + } + } + } + + private ComplexType getOrCreateComplexType(Element element) { + LocalComplexType complexType = element.getComplexType(); + if (complexType == null) { + complexType = new LocalComplexType(); + element.setComplexType(complexType); + if (element.getSimpleType() != null) { + // Convert SimpleType to ComplexType + element.setSimpleType(null); + complexType.setMixed(true); } } + return complexType; + } + + private SimpleType getOrCreateSimpleType(Element element) { + LocalSimpleType simpleType = element.getSimpleType(); + if (simpleType == null) { + simpleType = new LocalSimpleType(); + element.setSimpleType(simpleType); + } + return simpleType; } private void addElement(ExplicitGroup choice, QName type, LocalElement localElement) { @@ -334,33 +397,51 @@ public class XmlDataConverter { parentSchema.getIncludeOrImportOrRedefine().add(import1); } - private void addAttribute(Attribute attribute, ComplexType complexType, String currentNS) { - if (attribute.getName().getNamespaceURI().length() == 0 || attribute.getName().getNamespaceURI().equals(currentNS)) { + private String getNS(Attribute attribute, String currentNS) { + if (ignoreAttributeNS) + return currentNS; + String attrNs = attribute.getName().getNamespaceURI(); + if (attrNs.length() == 0) { + if (useElementNSforAttributes) + attrNs = currentNS; + else + attrNs = defaultNS; + } + return attrNs; + } + + + private org.w3._2001.xmlschema.Attribute addAttribute(Attribute attribute, ComplexType complexType, String currentNS) { + String attrNs = getNS(attribute, currentNS); + String attrName = attribute.getName().getLocalPart(); + + if (attrNs.equals(currentNS)) { org.w3._2001.xmlschema.Attribute schemaAttribute = new org.w3._2001.xmlschema.Attribute(); - schemaAttribute.setName(attribute.getName().getLocalPart()); + schemaAttribute.setName(attrName); schemaAttribute.setType(getType(attribute.getValue())); addAttribute(complexType, schemaAttribute); + return schemaAttribute; } else { - { - Schema schema = getOrCreateSchema(currentNS); - Schema attrSchema = getOrCreateSchema(attribute.getName().getNamespaceURI()); - - org.w3._2001.xmlschema.Attribute schemaAttribute = attributeMap.get(attrSchema).get(attribute.getName().getLocalPart()); - if (schemaAttribute == null) { - schemaAttribute = new org.w3._2001.xmlschema.TopLevelAttribute(); - schemaAttribute.setName(attribute.getName().getLocalPart()); - schemaAttribute.setType(getType(attribute.getValue())); - attrSchema.getSimpleTypeOrComplexTypeOrGroup().add(schemaAttribute); - attributeMap.get(attrSchema).put(attribute.getName().getLocalPart(), schemaAttribute); - } - addSchemaDependency(schema, attrSchema); - - } - { - org.w3._2001.xmlschema.Attribute schemaAttribute = new org.w3._2001.xmlschema.Attribute(); - schemaAttribute.setRef(new QName(attribute.getName().getNamespaceURI(),attribute.getName().getLocalPart())); - addAttribute(complexType, schemaAttribute); + + Schema schema = getOrCreateSchema(currentNS); + Schema attrSchema = getOrCreateSchema(attrNs); + + org.w3._2001.xmlschema.Attribute schemaAttribute = attributeMap.get(attrSchema).get(attrName); + if (schemaAttribute == null) { + schemaAttribute = new org.w3._2001.xmlschema.TopLevelAttribute(); + schemaAttribute.setName(attrName); + schemaAttribute.setType(getType(attribute.getValue())); + attrSchema.getSimpleTypeOrComplexTypeOrGroup().add(schemaAttribute); + attributeMap.get(attrSchema).put(attribute.getName().getLocalPart(), schemaAttribute); } + addSchemaDependency(schema, attrSchema); + + + org.w3._2001.xmlschema.Attribute localAttribute = new org.w3._2001.xmlschema.Attribute(); + localAttribute.setRef(new QName(attrNs,attrName)); + addAttribute(complexType, localAttribute); + return schemaAttribute; + } } @@ -372,6 +453,7 @@ public class XmlDataConverter { org.w3._2001.xmlschema.Attribute attr = (org.w3._2001.xmlschema.Attribute)annotated; if (schemaAttribute.getName().equals(attr.getName())) { updateAttributeType(attr, schemaAttribute.getType()); + return; } } } @@ -392,6 +474,8 @@ public class XmlDataConverter { private QName getType(String value) { + if ("true".equals(value) || "false".equals(value)) // || "1".equals(value) || "0".equals(value)) + return new QName(SchemaConversionBase.SCHEMA_NS, "boolean"); try { Integer.parseInt(value); return new QName(SchemaConversionBase.SCHEMA_NS, "integer"); @@ -405,12 +489,54 @@ public class XmlDataConverter { } catch (NumberFormatException e) { } - if ("True".equals(value) || "False".equals(value)) - return new QName(SchemaConversionBase.SCHEMA_NS, "boolean"); + return new QName(SchemaConversionBase.SCHEMA_NS, "string"); } + private org.w3._2001.xmlschema.Attribute updateAttributeType(org.w3._2001.xmlschema.Attribute schemaAttribute, QName newType) { + + QName currentType = schemaAttribute.getType(); + if (currentType == null && schemaAttribute.getRef() != null) { + Schema schema = schemaMap.get(schemaAttribute.getRef().getNamespaceURI()); + + schemaAttribute = attributeMap.get(schema).get(schemaAttribute.getRef().getLocalPart()); + currentType = schemaAttribute.getType(); + } + if (currentType == null) + throw new RuntimeException("Could not resolve attribute"); + + + schemaAttribute.setType(mergePrimitiveType(currentType, newType)); + return schemaAttribute; + + } + + private QName mergePrimitiveType(QName currentType, QName newType) { + if (!newType.getLocalPart().equals(currentType.getLocalPart())) { + + + if (currentType.getLocalPart().equals("integer") && newType.getLocalPart().equals("double")) { + // change integer to double + return newType; + } else if (currentType.getLocalPart().equals("double") && newType.getLocalPart().equals("integer")) { + // nothing to do, integer can be parsed as double + return currentType; + } else if (currentType.getLocalPart().equals("boolean") && newType.getLocalPart().equals("integer")) { + // change boolean to int + return newType; + } else if (currentType.getLocalPart().equals("integer") && newType.getLocalPart().equals("boolean")) { + // nothing to do, boolean (0 & 1) can be parsed as integer. + // FIXME : what about true / false? Now type detection system does not accept 0 or 1 as boolean values, while XML supports it. See method: QName getType(String value) + // FIXME : also, if we support 0 & 1 as booleans, we need to consider possible double values as well. + return currentType; + } else if (!currentType.getLocalPart().equals("string")){ + return new QName(SchemaConversionBase.SCHEMA_NS, "string"); + } + } + return currentType; + } + private Schema getOrCreateSchema(StartElement parseElement) { return getOrCreateSchema(parseElement.getName().getNamespaceURI()); } -- 2.45.2