]> gerrit.simantics Code Review - simantics/interop.git/blobdiff - org.simantics.xml.sax/src/org/simantics/xml/data/XmlDataConverter.java
XML data based schema and ontology generation
[simantics/interop.git] / org.simantics.xml.sax / src / org / simantics / xml / data / XmlDataConverter.java
diff --git a/org.simantics.xml.sax/src/org/simantics/xml/data/XmlDataConverter.java b/org.simantics.xml.sax/src/org/simantics/xml/data/XmlDataConverter.java
new file mode 100644 (file)
index 0000000..b4e7c21
--- /dev/null
@@ -0,0 +1,442 @@
+package org.simantics.xml.data;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.ArrayDeque;
+import java.util.Date;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBElement;
+import javax.xml.bind.JAXBException;
+import javax.xml.bind.Marshaller;
+import javax.xml.namespace.QName;
+import javax.xml.stream.XMLEventReader;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.events.Attribute;
+import javax.xml.stream.events.Characters;
+import javax.xml.stream.events.EndElement;
+import javax.xml.stream.events.StartElement;
+import javax.xml.stream.events.XMLEvent;
+
+import org.simantics.xml.sax.SchemaConversionBase;
+import org.w3._2001.xmlschema.Annotated;
+import org.w3._2001.xmlschema.ComplexType;
+import org.w3._2001.xmlschema.Element;
+import org.w3._2001.xmlschema.ExplicitGroup;
+import org.w3._2001.xmlschema.Import;
+import org.w3._2001.xmlschema.LocalComplexType;
+import org.w3._2001.xmlschema.LocalElement;
+import org.w3._2001.xmlschema.OpenAttrs;
+import org.w3._2001.xmlschema.Schema;
+import org.w3._2001.xmlschema.TopLevelElement;
+
+/**
+ * This class generates XML-file parsers based on bunch of XML data files. It is recommended to use schema based parser (org.simantics.xml.sax.SchemaConverter) if possible.
+ * Parser generated by this class is not reliable...
+ * 
+ * @author luukkainen
+ *
+ */
+public class XmlDataConverter {
+       
+       File outputPlugin;
+       File conversionFile;
+       List<File> inputFiles;
+       
+       String pluginName;
+       
+       private String[] header;
+       
+       public XmlDataConverter(List<File> inputFiles, File conversionFile, File outputPlugin) {
+               if (inputFiles.size() == 0)
+                       throw new IllegalArgumentException("At least one input file must be given.");
+               this.outputPlugin = outputPlugin;
+               this.conversionFile = conversionFile;
+               this.inputFiles = inputFiles;
+               
+               pluginName = outputPlugin.getName();
+               
+       }
+       
+       public void convert() throws IOException, XMLStreamException, JAXBException {
+               
+               init();
+               doConvert();
+               
+               Map<Schema, File> fileMap = new HashMap<>();
+               JAXBContext jc = JAXBContext.newInstance("org.w3._2001.xmlschema");
+               Marshaller m = jc.createMarshaller();
+               m.setProperty("jaxb.formatted.output", true);
+               Set<String> filenames = new HashSet<>(); 
+               for (Schema s : schemaMap.values()) {
+                       String name = s.getTargetNamespace();
+                       // Special case for XAML
+                       if (name.startsWith("clr-namespace:")) {
+                               name = name.substring("clr-namespace:".length());
+                               int i = name.indexOf(";assembly");
+                               if (i > 0)
+                                       name = name.substring(0, i);
+                       }
+                       name = name.replaceAll("\\.", "_");
+                       name = name.replaceAll("/", "_");
+                       name = name.replaceAll(":", "_");
+                       name = name.replaceAll(";", "_");
+                       if (filenames.contains(name)) {
+                               int i = 2;
+                               while (filenames.contains(name+i)) {
+                                       i++;
+                               }
+                               name = name+i;
+                       }
+                       filenames.add(name);
+                       File file = new File(outputPlugin.getAbsolutePath() + File.separator + name +".xsd");
+                       fileMap.put(s, file);
+               }
+               for (Schema s : schemaMap.values()) {
+                       for (OpenAttrs openAttrs : s.getIncludeOrImportOrRedefine()) {
+                               if (openAttrs instanceof Import) {
+                                       Import import1 = (Import)openAttrs;
+                                       Schema dep = schemaMap.get(import1.getNamespace());
+                                       import1.setSchemaLocation(fileMap.get(dep).getName());
+                               }
+                       }
+               }
+               for (Schema s : schemaMap.values()) {
+                       File file = fileMap.get(s);
+                       m.marshal(s, file);
+               }
+               Schema rootSchema = schemaMap.values().iterator().next();
+               DataSchemaConverter schemaConverter = new DataSchemaConverter(rootSchema,fileMap.get(rootSchema),conversionFile,outputPlugin);
+               schemaConverter.setFileMap(fileMap);
+               schemaConverter.setSchemaMap(schemaMap);
+               schemaConverter.convert();
+               
+               
+               header = null;
+               schemaMap = null;
+               elementMap = null;
+       }
+       
+       protected void init()  throws IOException {
+               
+               header = new String[4];
+               header[0] = "Generated with org.simantics.xml.sax XML data file converter";
+               header[1] = "";
+               header[2] = "File " + inputFiles.get(0).getAbsolutePath().replaceAll(Matcher.quoteReplacement("\\"), "/") + " , total file count: " + (inputFiles.size()) + "";
+               header[3] = "Date " + new Date().toString();
+               
+               schemaMap = new HashMap<>();
+               elementMap = new HashMap<>();
+               attributeMap = new HashMap<>();
+               elementNsMap = new HashMap<>();
+       }
+       
+       Map<String, Schema> schemaMap = new LinkedHashMap<>();
+       Map<Schema,Map<String,Element>> elementMap = new HashMap<>();
+       Map<Element,String> elementNsMap = new HashMap<>();
+       Map<Schema,Map<String,org.w3._2001.xmlschema.Attribute>> attributeMap = new HashMap<>();
+       
+       protected void doConvert() throws IOException, XMLStreamException, JAXBException {
+               XMLInputFactory input = XMLInputFactory.newInstance();
+               Deque<Element> elementStack = new ArrayDeque<>();
+               
+               for (File inputFile : inputFiles) {
+                       XMLEventReader reader = input.createXMLEventReader(new FileInputStream(inputFile));
+                       while (reader.hasNext()) {
+                               XMLEvent event = reader.nextEvent();
+                               if (event.isStartElement()) {
+                                       StartElement parseElement = event.asStartElement();
+//                                     System.out.println("Start " + parseElement.getName());
+                                       Element schemaElement = null;
+                                       String currentNS = parseElement.getName().getNamespaceURI();
+                                       Schema s = schemaMap.get(currentNS);
+                                       String elementName = parseElement.getName().getLocalPart();
+                                       if ("GroupComponent".equals(elementName))
+                                               System.out.println();
+                                       if (s == null) {
+                                               s = getOrCreateSchema(parseElement);
+                                       } else {
+                                               schemaElement = elementMap.get(s).get(elementName);
+                                       }
+                                       Element parentElement = elementStack.peek();
+                                       
+                                       boolean newElement = false;
+                                       boolean sameNameSpace = true;
+                                       
+                                       if (parentElement != null) {
+                                               //QName parentType = parentElement.getType();
+                                               String parentNs = elementNsMap.get(parentElement);
+                                               sameNameSpace =currentNS.equals(parentNs);
+                                               if (!sameNameSpace) {
+                                                       Schema ps = getOrCreateSchema(parentNs);
+                                                       addSchemaDependency(ps, s);
+                                               }
+                                                       
+                                       }
+                                       if (schemaElement == null) {
+                                               LocalElement localElement = null;
+                                               //QName type = null;
+                                               if (elementStack.isEmpty()) {
+                                                       schemaElement = new TopLevelElement();
+                                                       s.getSimpleTypeOrComplexTypeOrGroup().add(schemaElement);
+                                               } else {
+                                                       
+                                                       
+//                                                     if (sameNameSpace) {
+//                                                             localElement = new LocalElement();
+//                                                             schemaElement = localElement;
+//                                                             //type = new QName(elementName);
+//                                                             
+//                                                     } else {
+                                                               schemaElement = new TopLevelElement();
+                                                               s.getSimpleTypeOrComplexTypeOrGroup().add(schemaElement);
+                                                               //type = new QName(SchemaConversionBase.SCHEMA_NS,"element");
+                                                               localElement = new LocalElement();
+                                                               localElement.setRef(new QName(parseElement.getName().getNamespaceURI(), elementName));
+                                                                       
+//                                                     }
+                                               }
+                                               schemaElement.setName(elementName);
+                                               elementNsMap.put(schemaElement, currentNS);
+//                                             if (sameNameSpace) {
+//                                                     schemaElement.setType(new QName(parseElement.getName().getNamespaceURI(),elementName));
+//                                             } else {
+//                                                     schemaElement.setType(new QName(parseElement.getName().getNamespaceURI(), elementName));
+//                                             }
+                                               if (!elementStack.isEmpty()) {
+                                                       ComplexType complexType = parentElement.getComplexType(); 
+                                                       ExplicitGroup choice = complexType.getChoice();
+                                                       if (choice == null) {
+                                                               choice = new ExplicitGroup();
+                                                               complexType.setChoice(choice);
+                                                               choice.setMaxOccurs("unbounded");
+                                                       }
+                                                       addElement(choice, new QName(SchemaConversionBase.SCHEMA_NS,"element"), localElement);
+                                               }
+                                               
+                                               
+                                               elementMap.get(s).put(elementName, schemaElement);
+                                               newElement = true;
+                                       }
+                                       elementStack.push(schemaElement);
+                                       
+                                       Iterator<Attribute> attributeIterator = parseElement.getAttributes();
+                                       
+//                                     while (attributeIterator.hasNext()) {
+//                                             Attribute attribute = attributeIterator.next();
+//                                             System.out.println("Attribute " + attribute.getName() + " " + attribute.getValue());
+//                                     }
+                                       if (newElement) {
+                                               LocalComplexType complexType = new LocalComplexType();
+                                               schemaElement.setComplexType(complexType);
+                                               attributeIterator = parseElement.getAttributes();
+                                               while (attributeIterator.hasNext()) {
+                                                       Attribute attribute = attributeIterator.next();
+                                                       addAttribute(attribute, complexType, currentNS);
+                                               }
+                                               
+                                       } else {
+                                               LocalComplexType complexType = schemaElement.getComplexType();
+                                               attributeIterator = parseElement.getAttributes();
+                                               Map<String,org.w3._2001.xmlschema.Attribute> currentAttributes = new HashMap<>();
+                                               Iterator<Annotated> currentAttributeIterator = complexType.getAttributeOrAttributeGroup().iterator();
+                                               while (currentAttributeIterator.hasNext()) {
+                                                       Annotated annotated = currentAttributeIterator.next();
+                                                       if (annotated instanceof org.w3._2001.xmlschema.Attribute) {
+                                                               org.w3._2001.xmlschema.Attribute schemaAttribute = (org.w3._2001.xmlschema.Attribute)annotated;
+                                                               String n = schemaAttribute.getName();
+                                                               if (n != null)
+                                                                       currentAttributes.put(n, schemaAttribute);
+                                                       }
+                                               }
+                                               while (attributeIterator.hasNext()) {
+                                                       Attribute attribute = attributeIterator.next();
+                                                       org.w3._2001.xmlschema.Attribute schemaAttribute = currentAttributes.get(attribute.getName().getLocalPart());
+                                                       if (schemaAttribute == null) {
+                                                               addAttribute(attribute, complexType, currentNS);
+                                                       } else {
+                                                               QName newType = getType(attribute.getValue());
+                                                               updateAttributeType(schemaAttribute, newType);
+                                                       }
+                                                       
+                                               }
+                                       }
+                                       
+                               } else if (event.isEndElement()) {
+                                       EndElement element = event.asEndElement();
+//                                     System.out.println("End " + element.getName());
+                                       elementStack.pop();
+                               } else if (event.isAttribute()) {
+                                       
+                               } else if (event.isStartDocument()) {
+                               
+                               } else if (event.isEndDocument()) {
+                                       
+                               } else if (event.isEntityReference()) {
+                               
+                               } else if (event.isCharacters()) {
+                                       Characters characters = event.asCharacters();
+//                                     if (!characters.isWhiteSpace())
+//                                             System.out.println(characters.getData());
+                           } else if (event.isNamespace()) {
+                               
+                           }
+                       }
+               }
+               
+       }
+       
+       private void updateAttributeType(org.w3._2001.xmlschema.Attribute schemaAttribute, QName newType) {
+               
+               QName currentType = schemaAttribute.getType();
+               if (!newType.getLocalPart().equals(currentType.getLocalPart())) {
+                               
+                       
+                       if (currentType.getLocalPart().equals("integer") && newType.getLocalPart().equals("double")) {
+                               // change integer to double
+                               schemaAttribute.setType(newType);
+                       } else if (currentType.getLocalPart().equals("double") && newType.getLocalPart().equals("integer")) {
+                               // nothing to do, integer can be parsed as double
+                       } else if (!currentType.getLocalPart().equals("string")){
+                               schemaAttribute.setType(new QName(SchemaConversionBase.SCHEMA_NS, "string"));
+                       }
+               }
+       }
+       
+       private void addElement(ExplicitGroup choice, QName type, LocalElement localElement) {
+               for (Object o  : choice.getParticle()) {
+                       JAXBElement<LocalElement> el = (JAXBElement<LocalElement>)o;
+                       if (el.getName().equals(type)) {
+                               QName ref = el.getValue().getRef();
+                               QName ref2 = localElement.getRef();
+                               if (ref != null) {
+                                       if (ref.equals(ref2))
+                                               return;
+                               } else if (el.getValue().getType().equals(localElement.getType()))
+                                               return; 
+                       }
+                               
+               }
+               choice.getParticle().add(new JAXBElement<LocalElement>(type, LocalElement.class, null, localElement));
+       }
+       
+       private void addSchemaDependency(Schema parentSchema, Schema schema) {
+               for (OpenAttrs openAttrs : parentSchema.getIncludeOrImportOrRedefine()) {
+                       if (openAttrs instanceof Import) {
+                               Import import1 = (Import)openAttrs;
+                               if (import1.getNamespace().equals(schema.getTargetNamespace()))
+                                       return;
+                       }
+               }
+               Import import1 = new Import();
+               import1.setNamespace(schema.getTargetNamespace());
+               parentSchema.getIncludeOrImportOrRedefine().add(import1);
+       }
+       
+       private void addAttribute(Attribute attribute, ComplexType complexType, String currentNS) {
+               if (attribute.getName().getLocalPart().equals("GridOptions.GridVisibility"))
+                       System.out.println();
+               if (attribute.getName().getNamespaceURI().length() == 0 || attribute.getName().getNamespaceURI().equals(currentNS)) {
+                       org.w3._2001.xmlschema.Attribute schemaAttribute = new org.w3._2001.xmlschema.Attribute();
+                       schemaAttribute.setName(attribute.getName().getLocalPart());
+                       schemaAttribute.setType(getType(attribute.getValue()));
+                       addAttribute(complexType, schemaAttribute);
+               } else {
+                       {
+                               Schema schema = getOrCreateSchema(currentNS);
+                               Schema attrSchema = getOrCreateSchema(attribute.getName().getNamespaceURI());
+                               
+                               org.w3._2001.xmlschema.Attribute schemaAttribute = attributeMap.get(attrSchema).get(attribute.getName().getLocalPart());
+                               if (schemaAttribute == null) {
+                                       schemaAttribute = new org.w3._2001.xmlschema.TopLevelAttribute();
+                                       schemaAttribute.setName(attribute.getName().getLocalPart());
+                                       schemaAttribute.setType(getType(attribute.getValue()));
+                                       attrSchema.getSimpleTypeOrComplexTypeOrGroup().add(schemaAttribute);
+                                       attributeMap.get(attrSchema).put(attribute.getName().getLocalPart(), schemaAttribute);
+                               }
+                               addSchemaDependency(schema, attrSchema);
+                               
+                       }
+                       {
+                               org.w3._2001.xmlschema.Attribute schemaAttribute = new org.w3._2001.xmlschema.Attribute();
+                               schemaAttribute.setRef(new QName(attribute.getName().getNamespaceURI(),attribute.getName().getLocalPart()));
+                               addAttribute(complexType, schemaAttribute);
+                       }
+                       
+               }
+       }
+       
+       private void addAttribute(ComplexType complexType, org.w3._2001.xmlschema.Attribute schemaAttribute) {
+               if (schemaAttribute.getName() != null) {
+                       for (Annotated annotated : complexType.getAttributeOrAttributeGroup()) {
+                               if (annotated instanceof org.w3._2001.xmlschema.Attribute) {
+                                       org.w3._2001.xmlschema.Attribute attr = (org.w3._2001.xmlschema.Attribute)annotated;
+                                       if (schemaAttribute.getName().equals(attr.getName())) {
+                                               updateAttributeType(attr, schemaAttribute.getType());
+                                       }
+                               }
+                       }
+               } else {
+                       for (Annotated annotated : complexType.getAttributeOrAttributeGroup()) {
+                               if (annotated instanceof org.w3._2001.xmlschema.Attribute) {
+                                       org.w3._2001.xmlschema.Attribute attr = (org.w3._2001.xmlschema.Attribute)annotated;
+                                       if (attr.getName() != null)
+                                               continue;
+                                       if (schemaAttribute.getRef().equals(attr.getRef())) {
+                                               return;
+                                       }
+                               }
+                       }
+               }
+               complexType.getAttributeOrAttributeGroup().add(schemaAttribute);
+       }
+       
+       
+       private QName getType(String value) {
+               try {
+                       Integer.parseInt(value);
+                       return new QName(SchemaConversionBase.SCHEMA_NS, "integer");
+               } catch (NumberFormatException e) {
+                       
+               }
+               
+               try {
+                       Double.parseDouble(value);
+                       return new QName(SchemaConversionBase.SCHEMA_NS, "double");
+               } catch (NumberFormatException e) {
+                       
+               }
+               if ("True".equals(value) || "False".equals(value))
+                       return new QName(SchemaConversionBase.SCHEMA_NS, "boolean");
+               return new QName(SchemaConversionBase.SCHEMA_NS, "string");
+               
+       }
+       
+       private Schema getOrCreateSchema(StartElement parseElement) {
+               return getOrCreateSchema(parseElement.getName().getNamespaceURI());
+       }
+       
+       private Schema getOrCreateSchema(String ns) {
+               Schema s = schemaMap.get(ns);
+               if (s == null) {
+                       s = new Schema();
+                       s.setTargetNamespace(ns);
+                       schemaMap.put(ns, s);
+                       elementMap.put(s, new HashMap<String,Element>());
+                       attributeMap.put(s, new HashMap<String, org.w3._2001.xmlschema.Attribute>());
+               }
+               return s;
+       }
+
+}