CsvDfdlSchemaGenerator.java
/*
* Copyright (C) 2017 Red Hat, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.atlasmap.dfdl.core.schema;
import java.io.InputStream;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import io.atlasmap.api.AtlasException;
import io.atlasmap.dfdl.core.DfdlConstants;
import io.atlasmap.dfdl.core.DfdlSchemaGenerator;
import io.atlasmap.xml.core.XmlIOHelper;
/**
* An implementation of {@code DfdlSchemaGenerator} for CSV document.
* This class uses "csv-template.dfdl.xsd" as a template. By consuming CSV header line
* as an option, it fills corresponding element into schema.
*/
public class CsvDfdlSchemaGenerator implements DfdlSchemaGenerator {
public static final String NAME = "csv";
public static final String DEFAULT_DELIMITER = ",";
private static final Logger LOG = LoggerFactory.getLogger(CsvDfdlSchemaGenerator.class);
private static final String TEMPLATE_FILE = "csv-template.dfdl.xsd";
private static final String NS_XS = "http://www.w3.org/2001/XMLSchema";
private static final String NS_DFDL = "http://www.ogf.org/dfdl/dfdl-1.0/";
private static final String NS_ATLAS = "http://atlasmap.io/dfdl/csv";
private XmlIOHelper helper = new XmlIOHelper(CsvDfdlSchemaGenerator.class.getClassLoader());
public enum Options {
HEADER(DfdlConstants.OPTION_PREFIX + ".csv.header"),
EXAMPLE(DfdlConstants.OPTION_EXAMPLE_DATA),
DELIMITER(DfdlConstants.OPTION_PREFIX + ".csv.delimiter");
private final String value;
Options(String value) {
this.value = value;
}
public String value() {
return this.value;
}
}
@Override
public String getName() {
return NAME;
}
@Override
public String[] getOptions() {
return EnumSet.allOf(Options.class).stream().map(e -> e.value).toArray(String[]::new);
}
@Override
public Document generate(ClassLoader classLoader, Map<String, String> options) throws Exception {
String header = (String) options.get(Options.HEADER.value());
String example = (String) options.get(Options.EXAMPLE.value());
String delimiter = (String) options.get(Options.DELIMITER.value());
if ((header == null || header.isEmpty()) && (example == null || example.isEmpty())) {
throw new AtlasException(String.format("'%s' or '%s' must be specified to generate CSV DFDL schema",
Options.HEADER.value(), Options.EXAMPLE.value()));
}
header = header!= null && !header.isEmpty() ? header.split("\\R")[0] : example.split("\\R")[0];
if (delimiter == null) {
delimiter = DEFAULT_DELIMITER;
}
String[] fieldNames = header.split(delimiter);
InputStream is = classLoader.getResourceAsStream(TEMPLATE_FILE);
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
Document xsd = factory.newDocumentBuilder().parse(is);
XPath xpath = XPathFactory.newInstance().newXPath();
xpath.setNamespaceContext(new NamespaceResolver());
String targetPath = "//xs:schema/xs:element[@name='file']/xs:complexType/xs:sequence"
+ "/xs:element[@name='record']/xs:complexType/xs:sequence";
Node parentNode = (Node) xpath.compile(targetPath).evaluate(xsd, XPathConstants.NODE);
if (parentNode == null) {
throw new AtlasException(String.format("Invalid DFDL template for CSV format: path '%s' could not be found", targetPath));
}
parentNode.getAttributes().getNamedItemNS(NS_DFDL, "separator").setNodeValue(delimiter);
for (String fieldName : fieldNames) {
Element e = xsd.createElementNS(NS_XS, "element");
e.setAttribute("name", fieldName);
e.setAttribute("type", "xs:string");
e.setAttribute("minOccurs", "1");
e.setAttribute("maxOccurs", "1");
parentNode.appendChild(e);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Generated CSV DFDL Schema:");
LOG.debug(helper.writeDocumentToString(false, xsd));
}
return xsd;
}
class NamespaceResolver implements NamespaceContext {
private Map<String, String> nsmap = new HashMap<>();
NamespaceResolver() {
nsmap.put("xs", NS_XS);
nsmap.put("dfdl", NS_DFDL);
nsmap.put("atlas", NS_ATLAS);
}
@Override
public String getNamespaceURI(String prefix) {
return nsmap.get(prefix);
}
@Override
public String getPrefix(String namespaceURI) {
for (Entry<String, String> set : nsmap.entrySet()) {
if (set.getValue().equals(namespaceURI)) {
return set.getKey();
}
}
return null;
}
@Override
public Iterator<String> getPrefixes(String namespaceURI) {
List<String> prefixes = new LinkedList<>();
for (Entry<String, String> set : nsmap.entrySet()) {
if (set.getValue().equals(namespaceURI)) {
prefixes.add(set.getKey());
}
}
return prefixes.iterator();
}
}
}