]> gerrit.simantics Code Review - simantics/interop.git/blob - org.simantics.xml.sax/src/org/simantics/xml/data/XmlDataConverter.java
XML data based schema and ontology generation
[simantics/interop.git] / org.simantics.xml.sax / src / org / simantics / xml / data / XmlDataConverter.java
1 package org.simantics.xml.data;
2
3 import java.io.File;
4 import java.io.FileInputStream;
5 import java.io.IOException;
6 import java.util.ArrayDeque;
7 import java.util.Date;
8 import java.util.Deque;
9 import java.util.HashMap;
10 import java.util.HashSet;
11 import java.util.Iterator;
12 import java.util.LinkedHashMap;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Set;
16 import java.util.regex.Matcher;
17
18 import javax.xml.bind.JAXBContext;
19 import javax.xml.bind.JAXBElement;
20 import javax.xml.bind.JAXBException;
21 import javax.xml.bind.Marshaller;
22 import javax.xml.namespace.QName;
23 import javax.xml.stream.XMLEventReader;
24 import javax.xml.stream.XMLInputFactory;
25 import javax.xml.stream.XMLStreamException;
26 import javax.xml.stream.events.Attribute;
27 import javax.xml.stream.events.Characters;
28 import javax.xml.stream.events.EndElement;
29 import javax.xml.stream.events.StartElement;
30 import javax.xml.stream.events.XMLEvent;
31
32 import org.simantics.xml.sax.SchemaConversionBase;
33 import org.w3._2001.xmlschema.Annotated;
34 import org.w3._2001.xmlschema.ComplexType;
35 import org.w3._2001.xmlschema.Element;
36 import org.w3._2001.xmlschema.ExplicitGroup;
37 import org.w3._2001.xmlschema.Import;
38 import org.w3._2001.xmlschema.LocalComplexType;
39 import org.w3._2001.xmlschema.LocalElement;
40 import org.w3._2001.xmlschema.OpenAttrs;
41 import org.w3._2001.xmlschema.Schema;
42 import org.w3._2001.xmlschema.TopLevelElement;
43
44 /**
45  * This class generates XML-file parsers based on bunch of XML data files. It is recommended to use schema based parser (org.simantics.xml.sax.SchemaConverter) if possible.
46  * Parser generated by this class is not reliable...
47  * 
48  * @author luukkainen
49  *
50  */
51 public class XmlDataConverter {
52         
53         File outputPlugin;
54         File conversionFile;
55         List<File> inputFiles;
56         
57         String pluginName;
58         
59         private String[] header;
60         
61         public XmlDataConverter(List<File> inputFiles, File conversionFile, File outputPlugin) {
62                 if (inputFiles.size() == 0)
63                         throw new IllegalArgumentException("At least one input file must be given.");
64                 this.outputPlugin = outputPlugin;
65                 this.conversionFile = conversionFile;
66                 this.inputFiles = inputFiles;
67                 
68                 pluginName = outputPlugin.getName();
69                 
70         }
71         
72         public void convert() throws IOException, XMLStreamException, JAXBException {
73                 
74                 init();
75                 doConvert();
76                 
77                 Map<Schema, File> fileMap = new HashMap<>();
78                 JAXBContext jc = JAXBContext.newInstance("org.w3._2001.xmlschema");
79                 Marshaller m = jc.createMarshaller();
80                 m.setProperty("jaxb.formatted.output", true);
81                 Set<String> filenames = new HashSet<>(); 
82                 for (Schema s : schemaMap.values()) {
83                         String name = s.getTargetNamespace();
84                         // Special case for XAML
85                         if (name.startsWith("clr-namespace:")) {
86                                 name = name.substring("clr-namespace:".length());
87                                 int i = name.indexOf(";assembly");
88                                 if (i > 0)
89                                         name = name.substring(0, i);
90                         }
91                         name = name.replaceAll("\\.", "_");
92                         name = name.replaceAll("/", "_");
93                         name = name.replaceAll(":", "_");
94                         name = name.replaceAll(";", "_");
95                         if (filenames.contains(name)) {
96                                 int i = 2;
97                                 while (filenames.contains(name+i)) {
98                                         i++;
99                                 }
100                                 name = name+i;
101                         }
102                         filenames.add(name);
103                         File file = new File(outputPlugin.getAbsolutePath() + File.separator + name +".xsd");
104                         fileMap.put(s, file);
105                 }
106                 for (Schema s : schemaMap.values()) {
107                         for (OpenAttrs openAttrs : s.getIncludeOrImportOrRedefine()) {
108                                 if (openAttrs instanceof Import) {
109                                         Import import1 = (Import)openAttrs;
110                                         Schema dep = schemaMap.get(import1.getNamespace());
111                                         import1.setSchemaLocation(fileMap.get(dep).getName());
112                                 }
113                         }
114                 }
115                 for (Schema s : schemaMap.values()) {
116                         File file = fileMap.get(s);
117                         m.marshal(s, file);
118                 }
119                 Schema rootSchema = schemaMap.values().iterator().next();
120                 DataSchemaConverter schemaConverter = new DataSchemaConverter(rootSchema,fileMap.get(rootSchema),conversionFile,outputPlugin);
121                 schemaConverter.setFileMap(fileMap);
122                 schemaConverter.setSchemaMap(schemaMap);
123                 schemaConverter.convert();
124                 
125                 
126                 header = null;
127                 schemaMap = null;
128                 elementMap = null;
129         }
130         
131         protected void init()  throws IOException {
132                 
133                 header = new String[4];
134                 header[0] = "Generated with org.simantics.xml.sax XML data file converter";
135                 header[1] = "";
136                 header[2] = "File " + inputFiles.get(0).getAbsolutePath().replaceAll(Matcher.quoteReplacement("\\"), "/") + " , total file count: " + (inputFiles.size()) + "";
137                 header[3] = "Date " + new Date().toString();
138                 
139                 schemaMap = new HashMap<>();
140                 elementMap = new HashMap<>();
141                 attributeMap = new HashMap<>();
142                 elementNsMap = new HashMap<>();
143         }
144         
145         Map<String, Schema> schemaMap = new LinkedHashMap<>();
146         Map<Schema,Map<String,Element>> elementMap = new HashMap<>();
147         Map<Element,String> elementNsMap = new HashMap<>();
148         Map<Schema,Map<String,org.w3._2001.xmlschema.Attribute>> attributeMap = new HashMap<>();
149         
150         protected void doConvert() throws IOException, XMLStreamException, JAXBException {
151                 XMLInputFactory input = XMLInputFactory.newInstance();
152                 Deque<Element> elementStack = new ArrayDeque<>();
153                 
154                 for (File inputFile : inputFiles) {
155                         XMLEventReader reader = input.createXMLEventReader(new FileInputStream(inputFile));
156                         while (reader.hasNext()) {
157                                 XMLEvent event = reader.nextEvent();
158                                 if (event.isStartElement()) {
159                                         StartElement parseElement = event.asStartElement();
160 //                                      System.out.println("Start " + parseElement.getName());
161                                         Element schemaElement = null;
162                                         String currentNS = parseElement.getName().getNamespaceURI();
163                                         Schema s = schemaMap.get(currentNS);
164                                         String elementName = parseElement.getName().getLocalPart();
165                                         if ("GroupComponent".equals(elementName))
166                                                 System.out.println();
167                                         if (s == null) {
168                                                 s = getOrCreateSchema(parseElement);
169                                         } else {
170                                                 schemaElement = elementMap.get(s).get(elementName);
171                                         }
172                                         Element parentElement = elementStack.peek();
173                                         
174                                         boolean newElement = false;
175                                         boolean sameNameSpace = true;
176                                         
177                                         if (parentElement != null) {
178                                                 //QName parentType = parentElement.getType();
179                                                 String parentNs = elementNsMap.get(parentElement);
180                                                 sameNameSpace =currentNS.equals(parentNs);
181                                                 if (!sameNameSpace) {
182                                                         Schema ps = getOrCreateSchema(parentNs);
183                                                         addSchemaDependency(ps, s);
184                                                 }
185                                                         
186                                         }
187                                         if (schemaElement == null) {
188                                                 LocalElement localElement = null;
189                                                 //QName type = null;
190                                                 if (elementStack.isEmpty()) {
191                                                         schemaElement = new TopLevelElement();
192                                                         s.getSimpleTypeOrComplexTypeOrGroup().add(schemaElement);
193                                                 } else {
194                                                         
195                                                         
196 //                                                      if (sameNameSpace) {
197 //                                                              localElement = new LocalElement();
198 //                                                              schemaElement = localElement;
199 //                                                              //type = new QName(elementName);
200 //                                                              
201 //                                                      } else {
202                                                                 schemaElement = new TopLevelElement();
203                                                                 s.getSimpleTypeOrComplexTypeOrGroup().add(schemaElement);
204                                                                 //type = new QName(SchemaConversionBase.SCHEMA_NS,"element");
205                                                                 localElement = new LocalElement();
206                                                                 localElement.setRef(new QName(parseElement.getName().getNamespaceURI(), elementName));
207                                                                         
208 //                                                      }
209                                                 }
210                                                 schemaElement.setName(elementName);
211                                                 elementNsMap.put(schemaElement, currentNS);
212 //                                              if (sameNameSpace) {
213 //                                                      schemaElement.setType(new QName(parseElement.getName().getNamespaceURI(),elementName));
214 //                                              } else {
215 //                                                      schemaElement.setType(new QName(parseElement.getName().getNamespaceURI(), elementName));
216 //                                              }
217                                                 if (!elementStack.isEmpty()) {
218                                                         ComplexType complexType = parentElement.getComplexType(); 
219                                                         ExplicitGroup choice = complexType.getChoice();
220                                                         if (choice == null) {
221                                                                 choice = new ExplicitGroup();
222                                                                 complexType.setChoice(choice);
223                                                                 choice.setMaxOccurs("unbounded");
224                                                         }
225                                                         addElement(choice, new QName(SchemaConversionBase.SCHEMA_NS,"element"), localElement);
226                                                 }
227                                                 
228                                                 
229                                                 elementMap.get(s).put(elementName, schemaElement);
230                                                 newElement = true;
231                                         }
232                                         elementStack.push(schemaElement);
233                                         
234                                         Iterator<Attribute> attributeIterator = parseElement.getAttributes();
235                                         
236 //                                      while (attributeIterator.hasNext()) {
237 //                                              Attribute attribute = attributeIterator.next();
238 //                                              System.out.println("Attribute " + attribute.getName() + " " + attribute.getValue());
239 //                                      }
240                                         if (newElement) {
241                                                 LocalComplexType complexType = new LocalComplexType();
242                                                 schemaElement.setComplexType(complexType);
243                                                 attributeIterator = parseElement.getAttributes();
244                                                 while (attributeIterator.hasNext()) {
245                                                         Attribute attribute = attributeIterator.next();
246                                                         addAttribute(attribute, complexType, currentNS);
247                                                 }
248                                                 
249                                         } else {
250                                                 LocalComplexType complexType = schemaElement.getComplexType();
251                                                 attributeIterator = parseElement.getAttributes();
252                                                 Map<String,org.w3._2001.xmlschema.Attribute> currentAttributes = new HashMap<>();
253                                                 Iterator<Annotated> currentAttributeIterator = complexType.getAttributeOrAttributeGroup().iterator();
254                                                 while (currentAttributeIterator.hasNext()) {
255                                                         Annotated annotated = currentAttributeIterator.next();
256                                                         if (annotated instanceof org.w3._2001.xmlschema.Attribute) {
257                                                                 org.w3._2001.xmlschema.Attribute schemaAttribute = (org.w3._2001.xmlschema.Attribute)annotated;
258                                                                 String n = schemaAttribute.getName();
259                                                                 if (n != null)
260                                                                         currentAttributes.put(n, schemaAttribute);
261                                                         }
262                                                 }
263                                                 while (attributeIterator.hasNext()) {
264                                                         Attribute attribute = attributeIterator.next();
265                                                         org.w3._2001.xmlschema.Attribute schemaAttribute = currentAttributes.get(attribute.getName().getLocalPart());
266                                                         if (schemaAttribute == null) {
267                                                                 addAttribute(attribute, complexType, currentNS);
268                                                         } else {
269                                                                 QName newType = getType(attribute.getValue());
270                                                                 updateAttributeType(schemaAttribute, newType);
271                                                         }
272                                                         
273                                                 }
274                                         }
275                                         
276                                 } else if (event.isEndElement()) {
277                                         EndElement element = event.asEndElement();
278 //                                      System.out.println("End " + element.getName());
279                                         elementStack.pop();
280                                 } else if (event.isAttribute()) {
281                                         
282                                 } else if (event.isStartDocument()) {
283                                 
284                                 } else if (event.isEndDocument()) {
285                                         
286                                 } else if (event.isEntityReference()) {
287                                 
288                                 } else if (event.isCharacters()) {
289                                         Characters characters = event.asCharacters();
290 //                                      if (!characters.isWhiteSpace())
291 //                                              System.out.println(characters.getData());
292                             } else if (event.isNamespace()) {
293                                 
294                             }
295                         }
296                 }
297                 
298         }
299         
300         private void updateAttributeType(org.w3._2001.xmlschema.Attribute schemaAttribute, QName newType) {
301                 
302                 QName currentType = schemaAttribute.getType();
303                 if (!newType.getLocalPart().equals(currentType.getLocalPart())) {
304                                 
305                         
306                         if (currentType.getLocalPart().equals("integer") && newType.getLocalPart().equals("double")) {
307                                 // change integer to double
308                                 schemaAttribute.setType(newType);
309                         } else if (currentType.getLocalPart().equals("double") && newType.getLocalPart().equals("integer")) {
310                                 // nothing to do, integer can be parsed as double
311                         } else if (!currentType.getLocalPart().equals("string")){
312                                 schemaAttribute.setType(new QName(SchemaConversionBase.SCHEMA_NS, "string"));
313                         }
314                 }
315         }
316         
317         private void addElement(ExplicitGroup choice, QName type, LocalElement localElement) {
318                 for (Object o  : choice.getParticle()) {
319                         JAXBElement<LocalElement> el = (JAXBElement<LocalElement>)o;
320                         if (el.getName().equals(type)) {
321                                 QName ref = el.getValue().getRef();
322                                 QName ref2 = localElement.getRef();
323                                 if (ref != null) {
324                                         if (ref.equals(ref2))
325                                                 return;
326                                 } else if (el.getValue().getType().equals(localElement.getType()))
327                                                 return; 
328                         }
329                                 
330                 }
331                 choice.getParticle().add(new JAXBElement<LocalElement>(type, LocalElement.class, null, localElement));
332         }
333         
334         private void addSchemaDependency(Schema parentSchema, Schema schema) {
335                 for (OpenAttrs openAttrs : parentSchema.getIncludeOrImportOrRedefine()) {
336                         if (openAttrs instanceof Import) {
337                                 Import import1 = (Import)openAttrs;
338                                 if (import1.getNamespace().equals(schema.getTargetNamespace()))
339                                         return;
340                         }
341                 }
342                 Import import1 = new Import();
343                 import1.setNamespace(schema.getTargetNamespace());
344                 parentSchema.getIncludeOrImportOrRedefine().add(import1);
345         }
346         
347         private void addAttribute(Attribute attribute, ComplexType complexType, String currentNS) {
348                 if (attribute.getName().getLocalPart().equals("GridOptions.GridVisibility"))
349                         System.out.println();
350                 if (attribute.getName().getNamespaceURI().length() == 0 || attribute.getName().getNamespaceURI().equals(currentNS)) {
351                         org.w3._2001.xmlschema.Attribute schemaAttribute = new org.w3._2001.xmlschema.Attribute();
352                         schemaAttribute.setName(attribute.getName().getLocalPart());
353                         schemaAttribute.setType(getType(attribute.getValue()));
354                         addAttribute(complexType, schemaAttribute);
355                 } else {
356                         {
357                                 Schema schema = getOrCreateSchema(currentNS);
358                                 Schema attrSchema = getOrCreateSchema(attribute.getName().getNamespaceURI());
359                                 
360                                 org.w3._2001.xmlschema.Attribute schemaAttribute = attributeMap.get(attrSchema).get(attribute.getName().getLocalPart());
361                                 if (schemaAttribute == null) {
362                                         schemaAttribute = new org.w3._2001.xmlschema.TopLevelAttribute();
363                                         schemaAttribute.setName(attribute.getName().getLocalPart());
364                                         schemaAttribute.setType(getType(attribute.getValue()));
365                                         attrSchema.getSimpleTypeOrComplexTypeOrGroup().add(schemaAttribute);
366                                         attributeMap.get(attrSchema).put(attribute.getName().getLocalPart(), schemaAttribute);
367                                 }
368                                 addSchemaDependency(schema, attrSchema);
369                                 
370                         }
371                         {
372                                 org.w3._2001.xmlschema.Attribute schemaAttribute = new org.w3._2001.xmlschema.Attribute();
373                                 schemaAttribute.setRef(new QName(attribute.getName().getNamespaceURI(),attribute.getName().getLocalPart()));
374                                 addAttribute(complexType, schemaAttribute);
375                         }
376                         
377                 }
378         }
379         
380         private void addAttribute(ComplexType complexType, org.w3._2001.xmlschema.Attribute schemaAttribute) {
381                 if (schemaAttribute.getName() != null) {
382                         for (Annotated annotated : complexType.getAttributeOrAttributeGroup()) {
383                                 if (annotated instanceof org.w3._2001.xmlschema.Attribute) {
384                                         org.w3._2001.xmlschema.Attribute attr = (org.w3._2001.xmlschema.Attribute)annotated;
385                                         if (schemaAttribute.getName().equals(attr.getName())) {
386                                                 updateAttributeType(attr, schemaAttribute.getType());
387                                         }
388                                 }
389                         }
390                 } else {
391                         for (Annotated annotated : complexType.getAttributeOrAttributeGroup()) {
392                                 if (annotated instanceof org.w3._2001.xmlschema.Attribute) {
393                                         org.w3._2001.xmlschema.Attribute attr = (org.w3._2001.xmlschema.Attribute)annotated;
394                                         if (attr.getName() != null)
395                                                 continue;
396                                         if (schemaAttribute.getRef().equals(attr.getRef())) {
397                                                 return;
398                                         }
399                                 }
400                         }
401                 }
402                 complexType.getAttributeOrAttributeGroup().add(schemaAttribute);
403         }
404         
405         
406         private QName getType(String value) {
407                 try {
408                         Integer.parseInt(value);
409                         return new QName(SchemaConversionBase.SCHEMA_NS, "integer");
410                 } catch (NumberFormatException e) {
411                         
412                 }
413                 
414                 try {
415                         Double.parseDouble(value);
416                         return new QName(SchemaConversionBase.SCHEMA_NS, "double");
417                 } catch (NumberFormatException e) {
418                         
419                 }
420                 if ("True".equals(value) || "False".equals(value))
421                         return new QName(SchemaConversionBase.SCHEMA_NS, "boolean");
422                 return new QName(SchemaConversionBase.SCHEMA_NS, "string");
423                 
424         }
425         
426         private Schema getOrCreateSchema(StartElement parseElement) {
427                 return getOrCreateSchema(parseElement.getName().getNamespaceURI());
428         }
429         
430         private Schema getOrCreateSchema(String ns) {
431                 Schema s = schemaMap.get(ns);
432                 if (s == null) {
433                         s = new Schema();
434                         s.setTargetNamespace(ns);
435                         schemaMap.put(ns, s);
436                         elementMap.put(s, new HashMap<String,Element>());
437                         attributeMap.put(s, new HashMap<String, org.w3._2001.xmlschema.Attribute>());
438                 }
439                 return s;
440         }
441
442 }