package org.simantics.xml.data; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayDeque; import java.util.Date; import java.util.Deque; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBElement; import javax.xml.bind.JAXBException; import javax.xml.bind.Marshaller; import javax.xml.namespace.QName; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.Attribute; import javax.xml.stream.events.Characters; import javax.xml.stream.events.EndElement; import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; import org.simantics.xml.sax.SchemaConversionBase; import org.w3._2001.xmlschema.Annotated; import org.w3._2001.xmlschema.ComplexType; import org.w3._2001.xmlschema.Element; import org.w3._2001.xmlschema.ExplicitGroup; import org.w3._2001.xmlschema.Import; import org.w3._2001.xmlschema.LocalComplexType; import org.w3._2001.xmlschema.LocalElement; import org.w3._2001.xmlschema.LocalSimpleType; import org.w3._2001.xmlschema.OpenAttrs; import org.w3._2001.xmlschema.Restriction; import org.w3._2001.xmlschema.Schema; import org.w3._2001.xmlschema.SimpleType; import org.w3._2001.xmlschema.TopLevelElement; /** * This class generates XML-file parsers based on bunch of XML data files. It is recommended to use schema based parser (org.simantics.xml.sax.SchemaConverter) if possible. * Parser generated by this class is not reliable... * * @author luukkainen * */ public class XmlDataConverter { File outputPlugin; File conversionFile; List inputFiles; String pluginName; private boolean useElementNSforAttributes = true; // If true, attributes with undefined namespace are written to the same name space as the element. If false. the attributes are written to the root namespace. private boolean ignoreAttributeNS = false; // Completely ignore attribute namespaces. When true, all attributes are written to the elements. private String ignorePattern = "(\\w)*"; // Ignore attribute namespaces. When pattern exists, and local name of a attribute matches the regexp, the interpreted namespace is ignored. private boolean nonStandardBooleans = false; // Accept non standard boolean values (True, False). private String[] header; public XmlDataConverter(List inputFiles, File conversionFile, File outputPlugin) { if (inputFiles.size() == 0) throw new IllegalArgumentException("At least one input file must be given."); this.outputPlugin = outputPlugin; this.conversionFile = conversionFile; this.inputFiles = inputFiles; pluginName = outputPlugin.getName(); } public void convert() throws IOException, XMLStreamException, JAXBException { init(); doConvert(); Map fileMap = new HashMap<>(); JAXBContext jc = JAXBContext.newInstance("org.w3._2001.xmlschema"); Marshaller m = jc.createMarshaller(); m.setProperty("jaxb.formatted.output", true); Set filenames = new HashSet<>(); for (Schema s : schemaMap.values()) { String name = s.getTargetNamespace(); // Special case for XAML if (name.startsWith("clr-namespace:")) { name = name.substring("clr-namespace:".length()); int i = name.indexOf(";assembly"); if (i > 0) name = name.substring(0, i); } name = name.replaceAll("\\.", "_"); name = name.replaceAll("/", "_"); name = name.replaceAll(":", "_"); name = name.replaceAll(";", "_"); if (filenames.contains(name)) { int i = 2; while (filenames.contains(name+i)) { i++; } name = name+i; } filenames.add(name); File file = new File(outputPlugin.getAbsolutePath() + File.separator + name +".xsd"); fileMap.put(s, file); } for (Schema s : schemaMap.values()) { for (OpenAttrs openAttrs : s.getIncludeOrImportOrRedefine()) { if (openAttrs instanceof Import) { Import import1 = (Import)openAttrs; Schema dep = schemaMap.get(import1.getNamespace()); import1.setSchemaLocation(fileMap.get(dep).getName()); } } } for (Schema s : schemaMap.values()) { File file = fileMap.get(s); m.marshal(s, file); } Schema rootSchema = schemaMap.values().iterator().next(); DataSchemaConverter schemaConverter = new DataSchemaConverter(rootSchema,fileMap.get(rootSchema),conversionFile,outputPlugin); schemaConverter.setFileMap(fileMap); schemaConverter.setSchemaMap(schemaMap); schemaConverter.convert(); header = null; schemaMap = null; elementMap = null; } protected void init() throws IOException { header = new String[4]; header[0] = "Generated with org.simantics.xml.sax XML data file converter"; header[1] = ""; header[2] = "File " + inputFiles.get(0).getAbsolutePath().replaceAll(Matcher.quoteReplacement("\\"), "/") + " , total file count: " + (inputFiles.size()) + ""; header[3] = "Date " + new Date().toString(); schemaMap = new HashMap<>(); elementMap = new HashMap<>(); attributeMap = new HashMap<>(); elementNsMap = new HashMap<>(); } Map schemaMap = new LinkedHashMap<>(); Map> elementMap = new HashMap<>(); Map elementNsMap = new HashMap<>(); Map> attributeMap = new HashMap<>(); String defaultNS; protected void doConvert() throws IOException, XMLStreamException, JAXBException { XMLInputFactory input = XMLInputFactory.newInstance(); for (File inputFile : inputFiles) { XMLEventReader reader = input.createXMLEventReader(new FileInputStream(inputFile)); convertFile(reader); } } private void convertFile(XMLEventReader reader) throws XMLStreamException { Deque elementStack = new ArrayDeque<>(); while (reader.hasNext()) { XMLEvent event = reader.nextEvent(); if (event.isStartElement()) { StartElement parseElement = event.asStartElement(); Element schemaElement = null; String currentNS = parseElement.getName().getNamespaceURI(); Schema s = schemaMap.get(currentNS); String elementName = parseElement.getName().getLocalPart(); if (s == null) { s = getOrCreateSchema(parseElement); } else { schemaElement = elementMap.get(s).get(elementName); } Element parentElement = elementStack.peek(); boolean newElement = false; boolean sameNameSpace = true; if (parentElement != null) { String parentNs = elementNsMap.get(parentElement); sameNameSpace =currentNS.equals(parentNs); if (!sameNameSpace) { Schema ps = getOrCreateSchema(parentNs); addSchemaDependency(ps, s); } } else { defaultNS = currentNS; } if (schemaElement == null) { if (elementStack.isEmpty()) { schemaElement = new TopLevelElement(); s.getSimpleTypeOrComplexTypeOrGroup().add(schemaElement); } else { schemaElement = new TopLevelElement(); s.getSimpleTypeOrComplexTypeOrGroup().add(schemaElement); } schemaElement.setName(elementName); elementNsMap.put(schemaElement, currentNS); elementMap.get(s).put(elementName, schemaElement); newElement = true; } if (parentElement != null) { ComplexType complexType = getOrCreateComplexType(parentElement); ExplicitGroup choice = complexType.getChoice(); if (choice == null) { choice = new ExplicitGroup(); complexType.setChoice(choice); choice.setMaxOccurs("unbounded"); } LocalElement localElement = new LocalElement(); localElement.setRef(new QName(parseElement.getName().getNamespaceURI(), elementName)); addElement(choice, new QName(SchemaConversionBase.SCHEMA_NS,"element"), localElement); } elementStack.push(schemaElement); Iterator attributeIterator = parseElement.getAttributes(); // while (attributeIterator.hasNext()) { // Attribute attribute = attributeIterator.next(); // System.out.println("Attribute " + attribute.getName() + " " + attribute.getValue()); // } if (newElement) { attributeIterator = parseElement.getAttributes(); if (attributeIterator.hasNext()) { ComplexType complexType = getOrCreateComplexType(schemaElement); while (attributeIterator.hasNext()) { Attribute attribute = attributeIterator.next(); if ("http://www.w3.org/XML/1998/namespace".equals(attribute.getName().getNamespaceURI())) continue; addAttribute(attribute, complexType, currentNS); } } } else { ComplexType complexType = schemaElement.getComplexType(); attributeIterator = parseElement.getAttributes(); if (complexType != null || attributeIterator.hasNext()) { complexType = getOrCreateComplexType(schemaElement); Map currentAttributes = new HashMap<>(); Iterator currentAttributeIterator = complexType.getAttributeOrAttributeGroup().iterator(); while (currentAttributeIterator.hasNext()) { Annotated annotated = currentAttributeIterator.next(); if (annotated instanceof org.w3._2001.xmlschema.Attribute) { org.w3._2001.xmlschema.Attribute localAttribute = (org.w3._2001.xmlschema.Attribute)annotated; String n = localAttribute.getName(); if (n != null) currentAttributes.put(n, localAttribute); else if (localAttribute.getRef() != null) { currentAttributes.put(localAttribute.getRef().getLocalPart(), localAttribute); } } } while (attributeIterator.hasNext()) { Attribute attribute = attributeIterator.next(); if ("http://www.w3.org/XML/1998/namespace".equals(attribute.getName().getNamespaceURI())) continue; org.w3._2001.xmlschema.Attribute localAttribute = currentAttributes.get(attribute.getName().getLocalPart()); if (localAttribute == null) { addAttribute(attribute, complexType, currentNS); } else { QName newType = getType(attribute.getValue()); org.w3._2001.xmlschema.Attribute schemaAttribute = updateAttributeType(localAttribute, newType); String attrNs = getNS(attribute, currentNS); if (!ignoreAttributeNs(attribute) && attribute.getName().getNamespaceURI().length() > 0) { // Attribute has explicit ns definition. if (localAttribute.getRef() != null) { // current local attribute is reference, check that the namespaces match if (!localAttribute.getRef().getNamespaceURI().equals(attrNs)) throw new RuntimeException("Conflicting namespaces for attribute " + attribute.getName().getLocalPart() + " " + attrNs + " " + localAttribute.getRef().getNamespaceURI()); } else if (!attrNs.equals(currentNS)){ // move the attribute to explicit namespace. complexType.getAttributeOrAttributeGroup().remove(localAttribute); org.w3._2001.xmlschema.Attribute scAttribute = addAttribute(attribute, complexType, currentNS); scAttribute.setType(schemaAttribute.getType()); } } } } } } } else if (event.isEndElement()) { EndElement element = event.asEndElement(); // System.out.println("End " + element.getName()); elementStack.pop(); } else if (event.isAttribute()) { } else if (event.isStartDocument()) { } else if (event.isEndDocument()) { } else if (event.isEntityReference()) { } else if (event.isCharacters()) { Characters characters = event.asCharacters(); Element element = elementStack.peek(); if (element != null) { String text = characters.getData().trim(); if (text.length() > 0) { setElementCharactersData(element, text, characters); } } } else if (event.isNamespace()) { } } } private void setElementCharactersData(Element element, String text, Characters characters) { //System.out.println(element.getName() + " " + characters.getData()); if (element.getComplexType() != null) element.getComplexType().setMixed(true); else { SimpleType simpleType = getOrCreateSimpleType(element); QName type = getType(text); Restriction restriction = simpleType.getRestriction(); if (restriction == null) { restriction = new Restriction(); restriction.setBase(type); simpleType.setRestriction(restriction); } else { restriction.setBase(mergePrimitiveType(restriction.getBase(), type)); } } } private ComplexType getOrCreateComplexType(Element element) { LocalComplexType complexType = element.getComplexType(); if (complexType == null) { complexType = new LocalComplexType(); element.setComplexType(complexType); if (element.getSimpleType() != null) { // Convert SimpleType to ComplexType element.setSimpleType(null); complexType.setMixed(true); } } return complexType; } private SimpleType getOrCreateSimpleType(Element element) { LocalSimpleType simpleType = element.getSimpleType(); if (simpleType == null) { simpleType = new LocalSimpleType(); element.setSimpleType(simpleType); } return simpleType; } private void addElement(ExplicitGroup choice, QName type, LocalElement localElement) { for (Object o : choice.getParticle()) { JAXBElement el = (JAXBElement)o; if (el.getName().equals(type)) { QName ref = el.getValue().getRef(); QName ref2 = localElement.getRef(); if (ref != null) { if (ref.equals(ref2)) return; } else if (el.getValue().getType().equals(localElement.getType())) return; } } choice.getParticle().add(new JAXBElement(type, LocalElement.class, null, localElement)); } private void addSchemaDependency(Schema parentSchema, Schema schema) { for (OpenAttrs openAttrs : parentSchema.getIncludeOrImportOrRedefine()) { if (openAttrs instanceof Import) { Import import1 = (Import)openAttrs; if (import1.getNamespace().equals(schema.getTargetNamespace())) return; } } Import import1 = new Import(); import1.setNamespace(schema.getTargetNamespace()); parentSchema.getIncludeOrImportOrRedefine().add(import1); } private String getNS(Attribute attribute, String currentNS) { if (ignoreAttributeNs(attribute)) return currentNS; String attrNs = attribute.getName().getNamespaceURI(); if (attrNs.length() == 0) { if (useElementNSforAttributes) attrNs = currentNS; else attrNs = defaultNS; } return attrNs; } private boolean ignoreAttributeNs(Attribute attribute) { if (ignoreAttributeNS) return true; if (ignorePattern != null) { return attribute.getName().getLocalPart().matches(ignorePattern); } return false; } private org.w3._2001.xmlschema.Attribute addAttribute(Attribute attribute, ComplexType complexType, String currentNS) { String attrNs = getNS(attribute, currentNS); String attrName = attribute.getName().getLocalPart(); if (attrNs.equals(currentNS)) { org.w3._2001.xmlschema.Attribute schemaAttribute = new org.w3._2001.xmlschema.Attribute(); schemaAttribute.setName(attrName); schemaAttribute.setType(getType(attribute.getValue())); addAttribute(complexType, schemaAttribute); return schemaAttribute; } else { Schema schema = getOrCreateSchema(currentNS); Schema attrSchema = getOrCreateSchema(attrNs); org.w3._2001.xmlschema.Attribute schemaAttribute = attributeMap.get(attrSchema).get(attrName); if (schemaAttribute == null) { schemaAttribute = new org.w3._2001.xmlschema.TopLevelAttribute(); schemaAttribute.setName(attrName); schemaAttribute.setType(getType(attribute.getValue())); attrSchema.getSimpleTypeOrComplexTypeOrGroup().add(schemaAttribute); attributeMap.get(attrSchema).put(attribute.getName().getLocalPart(), schemaAttribute); } addSchemaDependency(schema, attrSchema); org.w3._2001.xmlschema.Attribute localAttribute = new org.w3._2001.xmlschema.Attribute(); localAttribute.setRef(new QName(attrNs,attrName)); addAttribute(complexType, localAttribute); return schemaAttribute; } } private void addAttribute(ComplexType complexType, org.w3._2001.xmlschema.Attribute schemaAttribute) { if (schemaAttribute.getName() != null) { for (Annotated annotated : complexType.getAttributeOrAttributeGroup()) { if (annotated instanceof org.w3._2001.xmlschema.Attribute) { org.w3._2001.xmlschema.Attribute attr = (org.w3._2001.xmlschema.Attribute)annotated; if (schemaAttribute.getName().equals(attr.getName())) { updateAttributeType(attr, schemaAttribute.getType()); return; } } } } else { for (Annotated annotated : complexType.getAttributeOrAttributeGroup()) { if (annotated instanceof org.w3._2001.xmlschema.Attribute) { org.w3._2001.xmlschema.Attribute attr = (org.w3._2001.xmlschema.Attribute)annotated; if (attr.getName() != null) continue; if (schemaAttribute.getRef().equals(attr.getRef())) { return; } } } } complexType.getAttributeOrAttributeGroup().add(schemaAttribute); } private QName getType(String value) { if (!nonStandardBooleans) { if ("true".equals(value) || "false".equals(value)) // || "1".equals(value) || "0".equals(value)) return new QName(SchemaConversionBase.SCHEMA_NS, "boolean"); } else { if ("true".equalsIgnoreCase(value) || "false".equalsIgnoreCase(value)) { return new QName(SchemaConversionBase.SCHEMA_NS, "boolean"); } } try { Integer.parseInt(value); return new QName(SchemaConversionBase.SCHEMA_NS, "integer"); } catch (NumberFormatException e) { } try { Double.parseDouble(value); return new QName(SchemaConversionBase.SCHEMA_NS, "double"); } catch (NumberFormatException e) { } return new QName(SchemaConversionBase.SCHEMA_NS, "string"); } private org.w3._2001.xmlschema.Attribute updateAttributeType(org.w3._2001.xmlschema.Attribute schemaAttribute, QName newType) { QName currentType = schemaAttribute.getType(); if (currentType == null && schemaAttribute.getRef() != null) { Schema schema = schemaMap.get(schemaAttribute.getRef().getNamespaceURI()); schemaAttribute = attributeMap.get(schema).get(schemaAttribute.getRef().getLocalPart()); currentType = schemaAttribute.getType(); } if (currentType == null) throw new RuntimeException("Could not resolve attribute"); schemaAttribute.setType(mergePrimitiveType(currentType, newType)); return schemaAttribute; } private QName mergePrimitiveType(QName currentType, QName newType) { if (!newType.getLocalPart().equals(currentType.getLocalPart())) { if (currentType.getLocalPart().equals("integer") && newType.getLocalPart().equals("double")) { // change integer to double return newType; } else if (currentType.getLocalPart().equals("double") && newType.getLocalPart().equals("integer")) { // nothing to do, integer can be parsed as double return currentType; } else if (currentType.getLocalPart().equals("boolean") && newType.getLocalPart().equals("integer")) { // change boolean to int return newType; } else if (currentType.getLocalPart().equals("integer") && newType.getLocalPart().equals("boolean")) { // nothing to do, boolean (0 & 1) can be parsed as integer. // FIXME : what about true / false? Now type detection system does not accept 0 or 1 as boolean values, while XML supports it. See method: QName getType(String value) // FIXME : also, if we support 0 & 1 as booleans, we need to consider possible double values as well. return currentType; } else if (!currentType.getLocalPart().equals("string")){ return new QName(SchemaConversionBase.SCHEMA_NS, "string"); } } return currentType; } private Schema getOrCreateSchema(StartElement parseElement) { return getOrCreateSchema(parseElement.getName().getNamespaceURI()); } private Schema getOrCreateSchema(String ns) { if (ns == null) throw new IllegalArgumentException("Schema NS cannot be null."); Schema s = schemaMap.get(ns); if (s == null) { s = new Schema(); s.setTargetNamespace(ns); schemaMap.put(ns, s); elementMap.put(s, new HashMap()); attributeMap.put(s, new HashMap()); } return s; } }