XmlInstanceInspector.java
/*
* Copyright (C) 2017 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.atlasmap.xml.inspect;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.xml.XMLConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import io.atlasmap.v2.CollectionType;
import io.atlasmap.v2.FieldStatus;
import io.atlasmap.v2.FieldType;
import io.atlasmap.v2.Fields;
import io.atlasmap.xml.core.XmlComplexTypeFactory;
import io.atlasmap.xml.core.XmlPath;
import io.atlasmap.xml.v2.AtlasXmlModelFactory;
import io.atlasmap.xml.v2.XmlComplexType;
import io.atlasmap.xml.v2.XmlDocument;
import io.atlasmap.xml.v2.XmlField;
import io.atlasmap.xml.v2.XmlFields;
import io.atlasmap.xml.v2.XmlNamespace;
import io.atlasmap.xml.v2.XmlNamespaces;
public class XmlInstanceInspector {
private static final Logger LOG = LoggerFactory.getLogger(XmlInstanceInspector.class);
private XmlDocument xmlDocument = AtlasXmlModelFactory.createXmlDocument();
public void inspect(Document document) {
xmlDocument.setFields(new Fields());
parseDocument(document.getDocumentElement());
}
public XmlDocument getXmlDocument() {
return xmlDocument;
}
private void parseDocument(Node rootNode) {
if (rootNode.getParentNode() == null || rootNode.getParentNode().getNodeType() != Node.DOCUMENT_NODE) {
return;
}
XmlComplexType rootComplexType = createXmlComplexType(rootNode, null);
xmlDocument.getFields().getField().add(rootComplexType);
mapAttributes(rootNode, rootComplexType);
if (rootNode.hasChildNodes()) {
mapChildNodes(rootNode.getChildNodes(), rootComplexType);
}
}
private void mapChildNodes(NodeList nodes, XmlComplexType rootComplexType) {
for (int i = 0; i < nodes.getLength(); i++) {
Node childNode = nodes.item(i);
if (!Arrays.asList(new Short[]{Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE}).contains(childNode.getNodeType())) {
continue;
}
if (childNode.hasAttributes()) {
mapAttributes(childNode, rootComplexType);
}
if (((Element) childNode).getElementsByTagName("*").getLength() > 0) {
mapParentNode(childNode, rootComplexType);
} else {
mapNodeToXmlField(childNode, rootComplexType);
}
}
}
private void mapNamespace(Node node) {
if (xmlDocument.getXmlNamespaces() == null) {
XmlNamespaces namespaces = new XmlNamespaces();
xmlDocument.setXmlNamespaces(namespaces);
}
List<XmlNamespace> namespaces = xmlDocument.getXmlNamespaces().getXmlNamespace();
if (namespaces.stream().noneMatch(ns -> {
if (ns.getAlias() == null) {
return node.getPrefix() == null && ns.getUri().equals(node.getNamespaceURI());
}
return ns.getAlias().equals(node.getPrefix());
})) {
XmlNamespace namespace = new XmlNamespace();
namespace.setAlias(node.getPrefix());
namespace.setUri(node.getNamespaceURI());
namespaces.add(namespace);
};
}
private void mapParentNode(Node node, XmlComplexType parent) {
if (!node.hasChildNodes()) {
return;
}
NodeList childNodes = node.getChildNodes();
XmlComplexType childParent = null;
XmlField[] existing = parent.getXmlFields().getXmlField().stream().filter(f ->
f.getName().equals(node.getNodeName()) && f.getFieldType() == FieldType.COMPLEX).toArray(XmlField[]::new);
if (existing.length > 0) {
childParent = (XmlComplexType) existing[0];
if (existing.length > 1) {
LOG.warn("Ignoring duplicate complex field '{}'", childParent.getPath());
}
updateCollectionType(node.getParentNode(), childParent);
} else {
childParent = createXmlComplexType(node, parent);
parent.getXmlFields().getXmlField().add(childParent);
}
if (node.hasAttributes()) {
mapAttributes(node, childParent);
}
for (int i = 0; i < childNodes.getLength(); i++) {
Node e = childNodes.item(i);
if (e.getNodeType() != Node.ELEMENT_NODE) {
continue;
}
// do we have child elements?
NodeList childElements = ((Element) e).getElementsByTagName("*");
if (childElements.getLength() > 0) {
mapParentNode(e, childParent);
} else {
mapNodeToXmlField(e, childParent);
if (e.hasAttributes()) {
mapAttributes(e, childParent);
}
}
}
if (node.getNamespaceURI() != null) {
mapNamespace(node);
}
}
private void mapAttributes(Node node, XmlComplexType xmlComplexType) {
NamedNodeMap attrs = node.getAttributes();
if (attrs == null) {
return;
}
for (int i = 0; i < attrs.getLength(); i++) {
Node attrNode = attrs.item(i);
// don't map default namespace attribute ...
if (attrNode.getNamespaceURI() != null
&& attrNode.getNamespaceURI().equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
continue;
} else if (attrNode.getNamespaceURI() != null
&& attrNode.getNamespaceURI().equals(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI)) {
mapNamespace(attrNode);
xmlComplexType.setTypeName(attrNode.getTextContent());
continue;
} else if (attrNode.getNamespaceURI() != null) {
mapNamespace(attrNode);
}
mapNodeToXmlField(attrNode, xmlComplexType);
}
}
private void mapNodeToXmlField(Node node, XmlComplexType parentComplexType) {
XmlField xmlField = null;
if (node.getNodeType() == Node.ATTRIBUTE_NODE) {
XmlField[] existing = parentComplexType.getXmlFields().getXmlField().stream().filter(f ->
f.getName().equals(node.getNodeName()) && f.isAttribute()).toArray(XmlField[]::new);
if (existing.length > 0) {
xmlField = existing[0];
if (existing.length > 1) {
LOG.error("Ignoring duplicated attribute '{}'", xmlField.getPath());
}
}
} else {
XmlField[] existing = parentComplexType.getXmlFields().getXmlField().stream().filter(f ->
f.getName().equals(node.getNodeName()) && !f.isAttribute()).toArray(XmlField[]::new);
if (existing.length > 0) {
xmlField = existing[0];
if (existing.length > 1) {
LOG.warn("Ignoring duplicated element '{}'", xmlField.getPath());
}
}
}
if (xmlField == null) {
xmlField = AtlasXmlModelFactory.createXmlField();
xmlField.setValue(node.getTextContent());
xmlField.setFieldType(FieldType.STRING);
xmlField.setName(node.getNodeName());
xmlField.setStatus(FieldStatus.SUPPORTED);
xmlField.setAttribute(node.getNodeType() == Node.ATTRIBUTE_NODE);
parentComplexType.getXmlFields().getXmlField().add(xmlField);
StringBuffer fieldPath = new StringBuffer();
if (node.getNodeType() == Node.ATTRIBUTE_NODE) {
fieldPath.append(XmlPath.PATH_ATTRIBUTE_PREFIX);
}
fieldPath.append(node.getNodeName());
XmlPath path = new XmlPath(parentComplexType.getPath());
path.appendField(fieldPath.toString());
xmlField.setPath(path.toString());
}
if (node.getNodeType() == Node.ELEMENT_NODE) {
updateCollectionType(node.getParentNode(), xmlField);
}
if (node.getNamespaceURI() != null) {
mapNamespace(node);
}
}
private XmlComplexType createXmlComplexType(Node childNode, XmlComplexType parentField) {
XmlComplexType childComplexType = XmlComplexTypeFactory.createXmlComlexField();
childComplexType.setXmlFields(new XmlFields());
childComplexType.setName(childNode.getNodeName());
XmlPath path = null;
if (parentField == null) {
path = new XmlPath(XmlPath.PATH_SEPARATOR + childNode.getNodeName());
} else {
path = new XmlPath(parentField.getPath());
Element parentElement = (Element)childNode.getParentNode();
if (isCollection(parentElement, childNode.getNodeName())) {
childComplexType.setCollectionType(CollectionType.LIST);
path.appendField(childNode.getNodeName() + XmlPath.PATH_LIST_START + XmlPath.PATH_LIST_END);
} else {
childComplexType.setCollectionType(CollectionType.NONE);
path.appendField(childNode.getNodeName());
}
}
childComplexType.setPath(path.toString());
return childComplexType;
}
private void updateCollectionType(Node parentNode, XmlField field) {
if (field.getCollectionType() == CollectionType.LIST
|| parentNode.getNodeType() != Node.ELEMENT_NODE) {
return;
}
if (!isCollection((Element)parentNode, field.getName())) {
return;
}
field.setCollectionType(CollectionType.LIST);
field.setPath(field.getPath() + XmlPath.PATH_LIST_START + XmlPath.PATH_LIST_END);
// Propagate parent collection to descendants
if (field instanceof XmlComplexType) {
XmlComplexType complex = (XmlComplexType)field;
for (XmlField child : complex.getXmlFields().getXmlField()) {
updateFieldPathFromParent(child, complex);
}
}
}
private boolean isCollection(Element parent, String name) {
NodeList siblings = parent.getChildNodes();
List<Element> dups = new ArrayList<>();
for (int i=0; i<siblings.getLength(); i++) {
Node n = siblings.item(i);
if (n.getNodeType() == Node.ELEMENT_NODE && n.getNodeName().equals(name)) {
dups.add((Element)n);
}
}
return dups.size() > 1;
}
private void updateFieldPathFromParent(XmlField child, XmlComplexType parent) {
XmlPath oldPath = new XmlPath(child.getPath());
XmlPath newPath = new XmlPath(parent.getPath());
newPath.appendField(oldPath.getLastSegment().getExpression());
child.setPath(newPath.toString());
if (child instanceof XmlComplexType) {
for (XmlField grandChild : ((XmlComplexType)child).getXmlFields().getXmlField()) {
updateFieldPathFromParent(grandChild, (XmlComplexType)child);
}
}
}
}