]> gerrit.simantics Code Review - simantics/interop.git/blob - org.simantics.xml.sax/src/org/simantics/xml/data/XmlDataConverter.java
Improved attribute namespace handling + element characters data parsing
[simantics/interop.git] / org.simantics.xml.sax / src / org / simantics / xml / data / XmlDataConverter.java
1 package org.simantics.xml.data;
2
3 import java.io.File;
4 import java.io.FileInputStream;
5 import java.io.IOException;
6 import java.util.ArrayDeque;
7 import java.util.Date;
8 import java.util.Deque;
9 import java.util.HashMap;
10 import java.util.HashSet;
11 import java.util.Iterator;
12 import java.util.LinkedHashMap;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Set;
16 import java.util.regex.Matcher;
17
18 import javax.xml.bind.JAXBContext;
19 import javax.xml.bind.JAXBElement;
20 import javax.xml.bind.JAXBException;
21 import javax.xml.bind.Marshaller;
22 import javax.xml.namespace.QName;
23 import javax.xml.stream.XMLEventReader;
24 import javax.xml.stream.XMLInputFactory;
25 import javax.xml.stream.XMLStreamException;
26 import javax.xml.stream.events.Attribute;
27 import javax.xml.stream.events.Characters;
28 import javax.xml.stream.events.EndElement;
29 import javax.xml.stream.events.StartElement;
30 import javax.xml.stream.events.XMLEvent;
31
32 import org.simantics.xml.sax.SchemaConversionBase;
33 import org.w3._2001.xmlschema.Annotated;
34 import org.w3._2001.xmlschema.ComplexType;
35 import org.w3._2001.xmlschema.Element;
36 import org.w3._2001.xmlschema.ExplicitGroup;
37 import org.w3._2001.xmlschema.Import;
38 import org.w3._2001.xmlschema.LocalComplexType;
39 import org.w3._2001.xmlschema.LocalElement;
40 import org.w3._2001.xmlschema.LocalSimpleType;
41 import org.w3._2001.xmlschema.OpenAttrs;
42 import org.w3._2001.xmlschema.Restriction;
43 import org.w3._2001.xmlschema.Schema;
44 import org.w3._2001.xmlschema.SimpleType;
45 import org.w3._2001.xmlschema.TopLevelElement;
46
47 /**
48  * This class generates XML-file parsers based on bunch of XML data files. It is recommended to use schema based parser (org.simantics.xml.sax.SchemaConverter) if possible.
49  * Parser generated by this class is not reliable...
50  * 
51  * @author luukkainen
52  *
53  */
54 public class XmlDataConverter {
55         
56         File outputPlugin;
57         File conversionFile;
58         List<File> inputFiles;
59         
60         String pluginName;
61         
62         private boolean useElementNSforAttributes = true; // If true, attributes with undefined namespace are written to the same name space as teh element. If false. the attributes are written to the root namespace.
63         private boolean ignoreAttributeNS = true; // Completely ignore attribute namespaces. When true, all attributes are written to the elements. 
64         
65         private String[] header;
66         
67         public XmlDataConverter(List<File> inputFiles, File conversionFile, File outputPlugin) {
68                 if (inputFiles.size() == 0)
69                         throw new IllegalArgumentException("At least one input file must be given.");
70                 this.outputPlugin = outputPlugin;
71                 this.conversionFile = conversionFile;
72                 this.inputFiles = inputFiles;
73                 
74                 pluginName = outputPlugin.getName();
75                 
76         }
77         
78         public void convert() throws IOException, XMLStreamException, JAXBException {
79                 
80                 init();
81                 doConvert();
82                 
83                 Map<Schema, File> fileMap = new HashMap<>();
84                 JAXBContext jc = JAXBContext.newInstance("org.w3._2001.xmlschema");
85                 Marshaller m = jc.createMarshaller();
86                 m.setProperty("jaxb.formatted.output", true);
87                 Set<String> filenames = new HashSet<>(); 
88                 for (Schema s : schemaMap.values()) {
89                         String name = s.getTargetNamespace();
90                         // Special case for XAML
91                         if (name.startsWith("clr-namespace:")) {
92                                 name = name.substring("clr-namespace:".length());
93                                 int i = name.indexOf(";assembly");
94                                 if (i > 0)
95                                         name = name.substring(0, i);
96                         }
97                         name = name.replaceAll("\\.", "_");
98                         name = name.replaceAll("/", "_");
99                         name = name.replaceAll(":", "_");
100                         name = name.replaceAll(";", "_");
101                         if (filenames.contains(name)) {
102                                 int i = 2;
103                                 while (filenames.contains(name+i)) {
104                                         i++;
105                                 }
106                                 name = name+i;
107                         }
108                         filenames.add(name);
109                         File file = new File(outputPlugin.getAbsolutePath() + File.separator + name +".xsd");
110                         fileMap.put(s, file);
111                 }
112                 for (Schema s : schemaMap.values()) {
113                         for (OpenAttrs openAttrs : s.getIncludeOrImportOrRedefine()) {
114                                 if (openAttrs instanceof Import) {
115                                         Import import1 = (Import)openAttrs;
116                                         Schema dep = schemaMap.get(import1.getNamespace());
117                                         import1.setSchemaLocation(fileMap.get(dep).getName());
118                                 }
119                         }
120                 }
121                 for (Schema s : schemaMap.values()) {
122                         File file = fileMap.get(s);
123                         m.marshal(s, file);
124                 }
125                 Schema rootSchema = schemaMap.values().iterator().next();
126                 DataSchemaConverter schemaConverter = new DataSchemaConverter(rootSchema,fileMap.get(rootSchema),conversionFile,outputPlugin);
127                 schemaConverter.setFileMap(fileMap);
128                 schemaConverter.setSchemaMap(schemaMap);
129                 schemaConverter.convert();
130                 
131                 
132                 header = null;
133                 schemaMap = null;
134                 elementMap = null;
135         }
136         
137         protected void init()  throws IOException {
138                 
139                 header = new String[4];
140                 header[0] = "Generated with org.simantics.xml.sax XML data file converter";
141                 header[1] = "";
142                 header[2] = "File " + inputFiles.get(0).getAbsolutePath().replaceAll(Matcher.quoteReplacement("\\"), "/") + " , total file count: " + (inputFiles.size()) + "";
143                 header[3] = "Date " + new Date().toString();
144                 
145                 schemaMap = new HashMap<>();
146                 elementMap = new HashMap<>();
147                 attributeMap = new HashMap<>();
148                 elementNsMap = new HashMap<>();
149         }
150         
151         Map<String, Schema> schemaMap = new LinkedHashMap<>();
152         Map<Schema,Map<String,Element>> elementMap = new HashMap<>();
153         Map<Element,String> elementNsMap = new HashMap<>();
154         Map<Schema,Map<String,org.w3._2001.xmlschema.Attribute>> attributeMap = new HashMap<>();
155         String defaultNS;
156         
157         protected void doConvert() throws IOException, XMLStreamException, JAXBException {
158                 XMLInputFactory input = XMLInputFactory.newInstance();
159                 
160                 
161                 for (File inputFile : inputFiles) {
162                         XMLEventReader reader = input.createXMLEventReader(new FileInputStream(inputFile));
163                         convertFile(reader);
164                 }
165                 
166         }
167         
168         private void convertFile(XMLEventReader reader) throws XMLStreamException {
169                 Deque<Element> elementStack = new ArrayDeque<>();
170                 while (reader.hasNext()) {
171                         XMLEvent event = reader.nextEvent();
172                         if (event.isStartElement()) {
173                                 StartElement parseElement = event.asStartElement();
174
175                                 Element schemaElement = null;
176                                 String currentNS = parseElement.getName().getNamespaceURI();
177                                 Schema s = schemaMap.get(currentNS);
178                                 String elementName = parseElement.getName().getLocalPart();
179                                 if (s == null) {
180                                         s = getOrCreateSchema(parseElement);
181                                 } else {
182                                         schemaElement = elementMap.get(s).get(elementName);
183                                 }
184
185                                 Element parentElement = elementStack.peek();
186                                 
187                                 boolean newElement = false;
188                                 boolean sameNameSpace = true;
189                                 
190                                 if (parentElement != null) {
191                                         String parentNs = elementNsMap.get(parentElement);
192                                         sameNameSpace =currentNS.equals(parentNs);
193                                         if (!sameNameSpace) {
194                                                 Schema ps = getOrCreateSchema(parentNs);
195                                                 addSchemaDependency(ps, s);
196                                         }
197                                                 
198                                 } else {
199                                         defaultNS = currentNS;
200                                 }
201                                 if (schemaElement == null) {
202                                         if (elementStack.isEmpty()) {
203                                                 schemaElement = new TopLevelElement();
204                                                 s.getSimpleTypeOrComplexTypeOrGroup().add(schemaElement);
205                                         } else {
206                                                 schemaElement = new TopLevelElement();
207                                                 s.getSimpleTypeOrComplexTypeOrGroup().add(schemaElement);
208                                         }
209                                         schemaElement.setName(elementName);
210                                         elementNsMap.put(schemaElement, currentNS);                             
211                                         
212                                         
213                                         elementMap.get(s).put(elementName, schemaElement);
214                                         newElement = true;
215                                 }
216                                 if (parentElement != null) {
217                                         ComplexType complexType = getOrCreateComplexType(parentElement); 
218                                         ExplicitGroup choice = complexType.getChoice();
219                                         if (choice == null) {
220                                                 choice = new ExplicitGroup();
221                                                 complexType.setChoice(choice);
222                                                 choice.setMaxOccurs("unbounded");
223                                         }
224                                         LocalElement localElement = new LocalElement();
225                                         localElement.setRef(new QName(parseElement.getName().getNamespaceURI(), elementName));
226                                         
227                                         addElement(choice, new QName(SchemaConversionBase.SCHEMA_NS,"element"), localElement);
228                                 }
229                                 
230                                 elementStack.push(schemaElement);
231                                 
232                                 Iterator<Attribute> attributeIterator = parseElement.getAttributes();
233                                 
234 //                              while (attributeIterator.hasNext()) {
235 //                                      Attribute attribute = attributeIterator.next();
236 //                                      System.out.println("Attribute " + attribute.getName() + " " + attribute.getValue());
237 //                              }
238                                 if (newElement) {
239                                         attributeIterator = parseElement.getAttributes();
240                                         if (attributeIterator.hasNext()) {
241                                                 ComplexType complexType =  getOrCreateComplexType(schemaElement);
242                                                 while (attributeIterator.hasNext()) {
243                                                         Attribute attribute = attributeIterator.next();
244                                                         if ("http://www.w3.org/XML/1998/namespace".equals(attribute.getName().getNamespaceURI()))
245                                                                 continue;
246                                                         addAttribute(attribute, complexType, currentNS);
247                                                 }
248                                         }
249                                         
250                                 } else {
251                                         ComplexType complexType = schemaElement.getComplexType();
252                                         attributeIterator = parseElement.getAttributes();
253                                         if (complexType != null || attributeIterator.hasNext()) {
254                                                 complexType = getOrCreateComplexType(schemaElement);
255                                                 Map<String,org.w3._2001.xmlschema.Attribute> currentAttributes = new HashMap<>();
256                                                 Iterator<Annotated> currentAttributeIterator = complexType.getAttributeOrAttributeGroup().iterator();
257                                                 while (currentAttributeIterator.hasNext()) {
258                                                         Annotated annotated = currentAttributeIterator.next();
259                                                         if (annotated instanceof org.w3._2001.xmlschema.Attribute) {
260                                                                 org.w3._2001.xmlschema.Attribute localAttribute = (org.w3._2001.xmlschema.Attribute)annotated;
261                                                                 String n = localAttribute.getName();
262                                                                 if (n != null)
263                                                                         currentAttributes.put(n, localAttribute);
264                                                                 else if (localAttribute.getRef() != null) {
265                                                                         currentAttributes.put(localAttribute.getRef().getLocalPart(), localAttribute);
266                                                                 }
267                                                         }
268                                                 }
269                                                 while (attributeIterator.hasNext()) {
270                                                         Attribute attribute = attributeIterator.next();
271                                                         if ("http://www.w3.org/XML/1998/namespace".equals(attribute.getName().getNamespaceURI()))
272                                                                 continue;
273                                                         org.w3._2001.xmlschema.Attribute localAttribute = currentAttributes.get(attribute.getName().getLocalPart());
274                                                         if (localAttribute == null) {
275                                                                 addAttribute(attribute, complexType, currentNS);
276                                                         } else {
277                                                                 QName newType = getType(attribute.getValue());
278                                                                 org.w3._2001.xmlschema.Attribute schemaAttribute = updateAttributeType(localAttribute, newType);
279                                                                 
280                                                                 String attrNs = getNS(attribute, currentNS);
281                                                                 if (!ignoreAttributeNS && attribute.getName().getNamespaceURI().length() > 0) {
282                                                                         // Attribute has explicit ns definition.
283                                                                         if (localAttribute.getRef() != null) {
284                                                                                 // current local attribute is reference, check that the namespaces match
285                                                                                 if (!localAttribute.getRef().getNamespaceURI().equals(attrNs))
286                                                                                         throw new RuntimeException("Conflicting namespaces for attribute " +  attribute.getName().getLocalPart() + " " + attrNs + " " + localAttribute.getRef().getNamespaceURI());
287                                                                         } else if (!attrNs.equals(currentNS)){
288                                                                                 // move the attribute to explicit namespace.
289                                                                                 complexType.getAttributeOrAttributeGroup().remove(localAttribute);
290                                                                                 org.w3._2001.xmlschema.Attribute  scAttribute = addAttribute(attribute, complexType, currentNS);
291                                                                                 scAttribute.setType(schemaAttribute.getType());
292                                                                         }
293                                                                         
294                                                                 }
295                                                         }
296                                                         
297                                                 }
298                                         }
299                                 }
300                                 
301                         } else if (event.isEndElement()) {
302                                 EndElement element = event.asEndElement();
303 //                              System.out.println("End " + element.getName());
304                                 elementStack.pop();
305                         } else if (event.isAttribute()) {
306                         
307                         } else if (event.isStartDocument()) {
308
309                         } else if (event.isEndDocument()) {
310                                 
311                         } else if (event.isEntityReference()) {
312                         
313                         } else if (event.isCharacters()) {
314                                 Characters characters = event.asCharacters();
315                                 Element element = elementStack.peek();
316                                 if (element != null) {
317                                         String text = characters.getData().trim();
318                                         if (text.length() > 0) {
319                                                 setElementCharactersData(element, text, characters);
320                                         }
321                                 }
322                     } else if (event.isNamespace()) {
323                     
324                     }
325                 }
326         }
327         
328         private void setElementCharactersData(Element element, String text, Characters characters) {
329                 
330                 //System.out.println(element.getName() + " " + characters.getData());
331                 if (element.getComplexType() != null)
332                         element.getComplexType().setMixed(true);
333                 else {
334                         SimpleType simpleType = getOrCreateSimpleType(element);
335                         QName type = getType(text);
336                         Restriction restriction = simpleType.getRestriction();
337                         if (restriction == null) {
338                                 restriction = new Restriction();
339                                 restriction.setBase(type);
340                                 simpleType.setRestriction(restriction);
341                         } else {
342                                 restriction.setBase(mergePrimitiveType(restriction.getBase(), type));
343                         }
344                 }
345         }
346         
347         private ComplexType getOrCreateComplexType(Element element) {
348                 LocalComplexType complexType = element.getComplexType();
349                 if (complexType == null) {
350                         complexType = new LocalComplexType();
351                         element.setComplexType(complexType);
352                         if (element.getSimpleType() != null) {
353                                 // Convert SimpleType to ComplexType
354                                 element.setSimpleType(null);
355                                 complexType.setMixed(true);
356                         }
357                 }
358                 return complexType;
359         }
360         
361         private SimpleType getOrCreateSimpleType(Element element) {
362                 LocalSimpleType simpleType = element.getSimpleType();
363                 if (simpleType == null) {
364                         simpleType = new LocalSimpleType();
365                         element.setSimpleType(simpleType);
366                 }
367                 return simpleType;
368         }
369         
370         private void addElement(ExplicitGroup choice, QName type, LocalElement localElement) {
371                 for (Object o  : choice.getParticle()) {
372                         JAXBElement<LocalElement> el = (JAXBElement<LocalElement>)o;
373                         if (el.getName().equals(type)) {
374                                 QName ref = el.getValue().getRef();
375                                 QName ref2 = localElement.getRef();
376                                 if (ref != null) {
377                                         if (ref.equals(ref2))
378                                                 return;
379                                 } else if (el.getValue().getType().equals(localElement.getType()))
380                                                 return; 
381                         }
382                                 
383                 }
384                 choice.getParticle().add(new JAXBElement<LocalElement>(type, LocalElement.class, null, localElement));
385         }
386         
387         private void addSchemaDependency(Schema parentSchema, Schema schema) {
388                 for (OpenAttrs openAttrs : parentSchema.getIncludeOrImportOrRedefine()) {
389                         if (openAttrs instanceof Import) {
390                                 Import import1 = (Import)openAttrs;
391                                 if (import1.getNamespace().equals(schema.getTargetNamespace()))
392                                         return;
393                         }
394                 }
395                 Import import1 = new Import();
396                 import1.setNamespace(schema.getTargetNamespace());
397                 parentSchema.getIncludeOrImportOrRedefine().add(import1);
398         }
399         
400         private String getNS(Attribute attribute, String currentNS) {
401                 if (ignoreAttributeNS)
402                         return currentNS;
403                 String attrNs = attribute.getName().getNamespaceURI();
404                 if (attrNs.length() == 0) {
405                         if (useElementNSforAttributes)
406                                 attrNs = currentNS;
407                         else
408                                 attrNs = defaultNS;
409                 } 
410                 return attrNs;
411         }
412         
413         
414         private org.w3._2001.xmlschema.Attribute addAttribute(Attribute attribute, ComplexType complexType, String currentNS) {
415                 String attrNs = getNS(attribute, currentNS);
416                 String attrName = attribute.getName().getLocalPart();
417                 
418                 if (attrNs.equals(currentNS)) {
419                         org.w3._2001.xmlschema.Attribute schemaAttribute = new org.w3._2001.xmlschema.Attribute();
420                         schemaAttribute.setName(attrName);
421                         schemaAttribute.setType(getType(attribute.getValue()));
422                         addAttribute(complexType, schemaAttribute);
423                         return schemaAttribute;
424                 } else {
425                         
426                         Schema schema = getOrCreateSchema(currentNS);
427                         Schema attrSchema = getOrCreateSchema(attrNs);
428                         
429                         org.w3._2001.xmlschema.Attribute schemaAttribute = attributeMap.get(attrSchema).get(attrName);
430                         if (schemaAttribute == null) {
431                                 schemaAttribute = new org.w3._2001.xmlschema.TopLevelAttribute();
432                                 schemaAttribute.setName(attrName);
433                                 schemaAttribute.setType(getType(attribute.getValue()));
434                                 attrSchema.getSimpleTypeOrComplexTypeOrGroup().add(schemaAttribute);
435                                 attributeMap.get(attrSchema).put(attribute.getName().getLocalPart(), schemaAttribute);
436                         }
437                         addSchemaDependency(schema, attrSchema);
438                         
439                 
440                         org.w3._2001.xmlschema.Attribute localAttribute = new org.w3._2001.xmlschema.Attribute();
441                         localAttribute.setRef(new QName(attrNs,attrName));
442                         addAttribute(complexType, localAttribute);
443                         return schemaAttribute;
444                         
445                         
446                 }
447         }
448         
449         private void addAttribute(ComplexType complexType, org.w3._2001.xmlschema.Attribute schemaAttribute) {
450                 if (schemaAttribute.getName() != null) {
451                         for (Annotated annotated : complexType.getAttributeOrAttributeGroup()) {
452                                 if (annotated instanceof org.w3._2001.xmlschema.Attribute) {
453                                         org.w3._2001.xmlschema.Attribute attr = (org.w3._2001.xmlschema.Attribute)annotated;
454                                         if (schemaAttribute.getName().equals(attr.getName())) {
455                                                 updateAttributeType(attr, schemaAttribute.getType());
456                                                 return;
457                                         }
458                                 }
459                         }
460                 } else {
461                         for (Annotated annotated : complexType.getAttributeOrAttributeGroup()) {
462                                 if (annotated instanceof org.w3._2001.xmlschema.Attribute) {
463                                         org.w3._2001.xmlschema.Attribute attr = (org.w3._2001.xmlschema.Attribute)annotated;
464                                         if (attr.getName() != null)
465                                                 continue;
466                                         if (schemaAttribute.getRef().equals(attr.getRef())) {
467                                                 return;
468                                         }
469                                 }
470                         }
471                 }
472                 complexType.getAttributeOrAttributeGroup().add(schemaAttribute);
473         }
474         
475         
476         private QName getType(String value) {
477                 if ("true".equals(value) || "false".equals(value)) // || "1".equals(value) || "0".equals(value))
478                         return new QName(SchemaConversionBase.SCHEMA_NS, "boolean");
479                 try {
480                         Integer.parseInt(value);
481                         return new QName(SchemaConversionBase.SCHEMA_NS, "integer");
482                 } catch (NumberFormatException e) {
483                         
484                 }
485                 
486                 try {
487                         Double.parseDouble(value);
488                         return new QName(SchemaConversionBase.SCHEMA_NS, "double");
489                 } catch (NumberFormatException e) {
490                         
491                 }
492                 
493                 return new QName(SchemaConversionBase.SCHEMA_NS, "string");
494                 
495         }
496         
497         private org.w3._2001.xmlschema.Attribute updateAttributeType(org.w3._2001.xmlschema.Attribute schemaAttribute, QName newType) {
498                 
499                 QName currentType = schemaAttribute.getType();
500                 if (currentType == null && schemaAttribute.getRef() != null) {
501                         Schema schema = schemaMap.get(schemaAttribute.getRef().getNamespaceURI());
502                         
503                         schemaAttribute = attributeMap.get(schema).get(schemaAttribute.getRef().getLocalPart());
504                         currentType = schemaAttribute.getType();
505                 }
506                 if (currentType == null)
507                         throw new RuntimeException("Could not resolve attribute");
508                 
509                 
510                 schemaAttribute.setType(mergePrimitiveType(currentType, newType));
511                 return schemaAttribute;
512                 
513         }
514         
515         private QName mergePrimitiveType(QName currentType, QName newType) {
516                 if (!newType.getLocalPart().equals(currentType.getLocalPart())) {
517                                 
518                         
519                         if (currentType.getLocalPart().equals("integer") && newType.getLocalPart().equals("double")) {
520                                 // change integer to double
521                                 return newType;
522                         } else if (currentType.getLocalPart().equals("double") && newType.getLocalPart().equals("integer")) {
523                                 // nothing to do, integer can be parsed as double
524                                 return currentType;
525                         } else if (currentType.getLocalPart().equals("boolean") && newType.getLocalPart().equals("integer")) {
526                                 // change boolean to int
527                                 return newType;
528                         } else if (currentType.getLocalPart().equals("integer") && newType.getLocalPart().equals("boolean")) {
529                                 //  nothing to do, boolean (0 & 1) can be parsed as integer.
530                                 // FIXME : what about true / false? Now type detection system does not accept 0 or 1 as boolean values, while XML supports it. See method: QName getType(String value)
531                                 // FIXME : also, if we support 0 & 1 as booleans, we need to consider possible double values as well.
532                                 return currentType;
533                         } else if (!currentType.getLocalPart().equals("string")){
534                                 return new QName(SchemaConversionBase.SCHEMA_NS, "string");
535                         }
536                 }
537                 return currentType;
538         }
539         
540         private Schema getOrCreateSchema(StartElement parseElement) {
541                 return getOrCreateSchema(parseElement.getName().getNamespaceURI());
542         }
543         
544         private Schema getOrCreateSchema(String ns) {
545                 if (ns == null)
546                         throw new IllegalArgumentException("Schema NS cannot be null.");
547                 Schema s = schemaMap.get(ns);
548                 if (s == null) {
549                         s = new Schema();
550                         s.setTargetNamespace(ns);
551                         schemaMap.put(ns, s);
552                         elementMap.put(s, new HashMap<String,Element>());
553                         attributeMap.put(s, new HashMap<String, org.w3._2001.xmlschema.Attribute>());
554                 }
555                 return s;
556         }
557
558 }