package org.simantics.xml.data; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayDeque; import java.util.Date; import java.util.Deque; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBElement; import javax.xml.bind.JAXBException; import javax.xml.bind.Marshaller; import javax.xml.namespace.QName; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.Attribute; import javax.xml.stream.events.Characters; import javax.xml.stream.events.EndElement; import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; import org.simantics.xml.sax.SchemaConversionBase; import org.w3._2001.xmlschema.Annotated; import org.w3._2001.xmlschema.ComplexType; import org.w3._2001.xmlschema.Element; import org.w3._2001.xmlschema.ExplicitGroup; import org.w3._2001.xmlschema.Import; import org.w3._2001.xmlschema.LocalComplexType; import org.w3._2001.xmlschema.LocalElement; import org.w3._2001.xmlschema.OpenAttrs; import org.w3._2001.xmlschema.Schema; import org.w3._2001.xmlschema.TopLevelElement; /** * This class generates XML-file parsers based on bunch of XML data files. It is recommended to use schema based parser (org.simantics.xml.sax.SchemaConverter) if possible. * Parser generated by this class is not reliable... * * @author luukkainen * */ public class XmlDataConverter { File outputPlugin; File conversionFile; List inputFiles; String pluginName; private String[] header; public XmlDataConverter(List inputFiles, File conversionFile, File outputPlugin) { if (inputFiles.size() == 0) throw new IllegalArgumentException("At least one input file must be given."); this.outputPlugin = outputPlugin; this.conversionFile = conversionFile; this.inputFiles = inputFiles; pluginName = outputPlugin.getName(); } public void convert() throws IOException, XMLStreamException, JAXBException { init(); doConvert(); Map fileMap = new HashMap<>(); JAXBContext jc = JAXBContext.newInstance("org.w3._2001.xmlschema"); Marshaller m = jc.createMarshaller(); m.setProperty("jaxb.formatted.output", true); Set filenames = new HashSet<>(); for (Schema s : schemaMap.values()) { String name = s.getTargetNamespace(); // Special case for XAML if (name.startsWith("clr-namespace:")) { name = name.substring("clr-namespace:".length()); int i = name.indexOf(";assembly"); if (i > 0) name = name.substring(0, i); } name = name.replaceAll("\\.", "_"); name = name.replaceAll("/", "_"); name = name.replaceAll(":", "_"); name = name.replaceAll(";", "_"); if (filenames.contains(name)) { int i = 2; while (filenames.contains(name+i)) { i++; } name = name+i; } filenames.add(name); File file = new File(outputPlugin.getAbsolutePath() + File.separator + name +".xsd"); fileMap.put(s, file); } for (Schema s : schemaMap.values()) { for (OpenAttrs openAttrs : s.getIncludeOrImportOrRedefine()) { if (openAttrs instanceof Import) { Import import1 = (Import)openAttrs; Schema dep = schemaMap.get(import1.getNamespace()); import1.setSchemaLocation(fileMap.get(dep).getName()); } } } for (Schema s : schemaMap.values()) { File file = fileMap.get(s); m.marshal(s, file); } Schema rootSchema = schemaMap.values().iterator().next(); DataSchemaConverter schemaConverter = new DataSchemaConverter(rootSchema,fileMap.get(rootSchema),conversionFile,outputPlugin); schemaConverter.setFileMap(fileMap); schemaConverter.setSchemaMap(schemaMap); schemaConverter.convert(); header = null; schemaMap = null; elementMap = null; } protected void init() throws IOException { header = new String[4]; header[0] = "Generated with org.simantics.xml.sax XML data file converter"; header[1] = ""; header[2] = "File " + inputFiles.get(0).getAbsolutePath().replaceAll(Matcher.quoteReplacement("\\"), "/") + " , total file count: " + (inputFiles.size()) + ""; header[3] = "Date " + new Date().toString(); schemaMap = new HashMap<>(); elementMap = new HashMap<>(); attributeMap = new HashMap<>(); elementNsMap = new HashMap<>(); } Map schemaMap = new LinkedHashMap<>(); Map> elementMap = new HashMap<>(); Map elementNsMap = new HashMap<>(); Map> attributeMap = new HashMap<>(); protected void doConvert() throws IOException, XMLStreamException, JAXBException { XMLInputFactory input = XMLInputFactory.newInstance(); for (File inputFile : inputFiles) { XMLEventReader reader = input.createXMLEventReader(new FileInputStream(inputFile)); convertFile(reader); } } private void convertFile(XMLEventReader reader) throws XMLStreamException { Deque elementStack = new ArrayDeque<>(); while (reader.hasNext()) { XMLEvent event = reader.nextEvent(); if (event.isStartElement()) { StartElement parseElement = event.asStartElement(); // System.out.println("Start " + parseElement.getName()); Element schemaElement = null; String currentNS = parseElement.getName().getNamespaceURI(); Schema s = schemaMap.get(currentNS); String elementName = parseElement.getName().getLocalPart(); if (s == null) { s = getOrCreateSchema(parseElement); } else { schemaElement = elementMap.get(s).get(elementName); } Element parentElement = elementStack.peek(); boolean newElement = false; boolean sameNameSpace = true; if (parentElement != null) { String parentNs = elementNsMap.get(parentElement); sameNameSpace =currentNS.equals(parentNs); if (!sameNameSpace) { Schema ps = getOrCreateSchema(parentNs); addSchemaDependency(ps, s); } } if (schemaElement == null) { if (elementStack.isEmpty()) { schemaElement = new TopLevelElement(); s.getSimpleTypeOrComplexTypeOrGroup().add(schemaElement); } else { schemaElement = new TopLevelElement(); s.getSimpleTypeOrComplexTypeOrGroup().add(schemaElement); } schemaElement.setName(elementName); elementNsMap.put(schemaElement, currentNS); elementMap.get(s).put(elementName, schemaElement); newElement = true; } if (parentElement != null) { ComplexType complexType = parentElement.getComplexType(); ExplicitGroup choice = complexType.getChoice(); if (choice == null) { choice = new ExplicitGroup(); complexType.setChoice(choice); choice.setMaxOccurs("unbounded"); } LocalElement localElement = new LocalElement(); localElement.setRef(new QName(parseElement.getName().getNamespaceURI(), elementName)); addElement(choice, new QName(SchemaConversionBase.SCHEMA_NS,"element"), localElement); } elementStack.push(schemaElement); Iterator attributeIterator = parseElement.getAttributes(); // while (attributeIterator.hasNext()) { // Attribute attribute = attributeIterator.next(); // System.out.println("Attribute " + attribute.getName() + " " + attribute.getValue()); // } if (newElement) { LocalComplexType complexType = new LocalComplexType(); schemaElement.setComplexType(complexType); attributeIterator = parseElement.getAttributes(); while (attributeIterator.hasNext()) { Attribute attribute = attributeIterator.next(); if ("http://www.w3.org/XML/1998/namespace".equals(attribute.getName().getNamespaceURI())) continue; addAttribute(attribute, complexType, currentNS); } } else { LocalComplexType complexType = schemaElement.getComplexType(); attributeIterator = parseElement.getAttributes(); Map currentAttributes = new HashMap<>(); Iterator currentAttributeIterator = complexType.getAttributeOrAttributeGroup().iterator(); while (currentAttributeIterator.hasNext()) { Annotated annotated = currentAttributeIterator.next(); if (annotated instanceof org.w3._2001.xmlschema.Attribute) { org.w3._2001.xmlschema.Attribute schemaAttribute = (org.w3._2001.xmlschema.Attribute)annotated; String n = schemaAttribute.getName(); if (n != null) currentAttributes.put(n, schemaAttribute); } } while (attributeIterator.hasNext()) { Attribute attribute = attributeIterator.next(); if ("http://www.w3.org/XML/1998/namespace".equals(attribute.getName().getNamespaceURI())) continue; org.w3._2001.xmlschema.Attribute schemaAttribute = currentAttributes.get(attribute.getName().getLocalPart()); if (schemaAttribute == null) { addAttribute(attribute, complexType, currentNS); } else { QName newType = getType(attribute.getValue()); updateAttributeType(schemaAttribute, newType); } } } } else if (event.isEndElement()) { EndElement element = event.asEndElement(); // System.out.println("End " + element.getName()); elementStack.pop(); } else if (event.isAttribute()) { } else if (event.isStartDocument()) { } else if (event.isEndDocument()) { } else if (event.isEntityReference()) { } else if (event.isCharacters()) { Characters characters = event.asCharacters(); // if (!characters.isWhiteSpace()) // System.out.println(characters.getData()); } else if (event.isNamespace()) { } } } private void updateAttributeType(org.w3._2001.xmlschema.Attribute schemaAttribute, QName newType) { QName currentType = schemaAttribute.getType(); if (!newType.getLocalPart().equals(currentType.getLocalPart())) { if (currentType.getLocalPart().equals("integer") && newType.getLocalPart().equals("double")) { // change integer to double schemaAttribute.setType(newType); } else if (currentType.getLocalPart().equals("double") && newType.getLocalPart().equals("integer")) { // nothing to do, integer can be parsed as double } else if (!currentType.getLocalPart().equals("string")){ schemaAttribute.setType(new QName(SchemaConversionBase.SCHEMA_NS, "string")); } } } private void addElement(ExplicitGroup choice, QName type, LocalElement localElement) { for (Object o : choice.getParticle()) { JAXBElement el = (JAXBElement)o; if (el.getName().equals(type)) { QName ref = el.getValue().getRef(); QName ref2 = localElement.getRef(); if (ref != null) { if (ref.equals(ref2)) return; } else if (el.getValue().getType().equals(localElement.getType())) return; } } choice.getParticle().add(new JAXBElement(type, LocalElement.class, null, localElement)); } private void addSchemaDependency(Schema parentSchema, Schema schema) { for (OpenAttrs openAttrs : parentSchema.getIncludeOrImportOrRedefine()) { if (openAttrs instanceof Import) { Import import1 = (Import)openAttrs; if (import1.getNamespace().equals(schema.getTargetNamespace())) return; } } Import import1 = new Import(); import1.setNamespace(schema.getTargetNamespace()); parentSchema.getIncludeOrImportOrRedefine().add(import1); } private void addAttribute(Attribute attribute, ComplexType complexType, String currentNS) { if (attribute.getName().getNamespaceURI().length() == 0 || attribute.getName().getNamespaceURI().equals(currentNS)) { org.w3._2001.xmlschema.Attribute schemaAttribute = new org.w3._2001.xmlschema.Attribute(); schemaAttribute.setName(attribute.getName().getLocalPart()); schemaAttribute.setType(getType(attribute.getValue())); addAttribute(complexType, schemaAttribute); } else { { Schema schema = getOrCreateSchema(currentNS); Schema attrSchema = getOrCreateSchema(attribute.getName().getNamespaceURI()); org.w3._2001.xmlschema.Attribute schemaAttribute = attributeMap.get(attrSchema).get(attribute.getName().getLocalPart()); if (schemaAttribute == null) { schemaAttribute = new org.w3._2001.xmlschema.TopLevelAttribute(); schemaAttribute.setName(attribute.getName().getLocalPart()); schemaAttribute.setType(getType(attribute.getValue())); attrSchema.getSimpleTypeOrComplexTypeOrGroup().add(schemaAttribute); attributeMap.get(attrSchema).put(attribute.getName().getLocalPart(), schemaAttribute); } addSchemaDependency(schema, attrSchema); } { org.w3._2001.xmlschema.Attribute schemaAttribute = new org.w3._2001.xmlschema.Attribute(); schemaAttribute.setRef(new QName(attribute.getName().getNamespaceURI(),attribute.getName().getLocalPart())); addAttribute(complexType, schemaAttribute); } } } private void addAttribute(ComplexType complexType, org.w3._2001.xmlschema.Attribute schemaAttribute) { if (schemaAttribute.getName() != null) { for (Annotated annotated : complexType.getAttributeOrAttributeGroup()) { if (annotated instanceof org.w3._2001.xmlschema.Attribute) { org.w3._2001.xmlschema.Attribute attr = (org.w3._2001.xmlschema.Attribute)annotated; if (schemaAttribute.getName().equals(attr.getName())) { updateAttributeType(attr, schemaAttribute.getType()); } } } } else { for (Annotated annotated : complexType.getAttributeOrAttributeGroup()) { if (annotated instanceof org.w3._2001.xmlschema.Attribute) { org.w3._2001.xmlschema.Attribute attr = (org.w3._2001.xmlschema.Attribute)annotated; if (attr.getName() != null) continue; if (schemaAttribute.getRef().equals(attr.getRef())) { return; } } } } complexType.getAttributeOrAttributeGroup().add(schemaAttribute); } private QName getType(String value) { try { Integer.parseInt(value); return new QName(SchemaConversionBase.SCHEMA_NS, "integer"); } catch (NumberFormatException e) { } try { Double.parseDouble(value); return new QName(SchemaConversionBase.SCHEMA_NS, "double"); } catch (NumberFormatException e) { } if ("True".equals(value) || "False".equals(value)) return new QName(SchemaConversionBase.SCHEMA_NS, "boolean"); return new QName(SchemaConversionBase.SCHEMA_NS, "string"); } private Schema getOrCreateSchema(StartElement parseElement) { return getOrCreateSchema(parseElement.getName().getNamespaceURI()); } private Schema getOrCreateSchema(String ns) { if (ns == null) throw new IllegalArgumentException("Schema NS cannot be null."); Schema s = schemaMap.get(ns); if (s == null) { s = new Schema(); s.setTargetNamespace(ns); schemaMap.put(ns, s); elementMap.put(s, new HashMap()); attributeMap.put(s, new HashMap()); } return s; } }