/* * Copyright 2005-2010 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.wamblee.xml; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import javax.xml.validation.Schema; import javax.xml.validation.SchemaFactory; import javax.xml.validation.Validator; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.bootstrap.DOMImplementationRegistry; import org.w3c.dom.ls.DOMImplementationLS; import org.w3c.dom.ls.LSException; import org.w3c.dom.ls.LSInput; import org.w3c.dom.ls.LSParser; import org.xml.sax.SAXException; /** * Some basic XML utilities for common reoccuring tasks for DOM documents. * * @author Erik Brakkee */ public final class DomUtils { private static final Logger LOG = Logger .getLogger(DomUtils.class.getName()); /** * Disabled default constructor. * */ private DomUtils() { // Empty. } /** * Parses an XML document from a string. * * @param aDocument * document. * * @return * */ public static Document read(String aDocument) throws XMLException { ByteArrayInputStream is = new ByteArrayInputStream(aDocument.getBytes()); return read(is); } /** * Parses an XML document from a stream. * * @param aIs * Input stream. * * @return * */ public static Document read(InputStream aIs) throws XMLException { try { DOMImplementationRegistry registry = DOMImplementationRegistry .newInstance(); DOMImplementationLS impl = (DOMImplementationLS) registry .getDOMImplementation("LS"); LSParser builder = impl.createLSParser( DOMImplementationLS.MODE_SYNCHRONOUS, null); LSInput input = impl.createLSInput(); input.setByteStream(aIs); return builder.parse(input); } catch (IllegalAccessException e) { throw new XMLException(e.getMessage(), e); } catch (InstantiationException e) { throw new XMLException(e.getMessage(), e); } catch (ClassNotFoundException e) { throw new XMLException(e.getMessage(), e); } catch (LSException e) { throw new XMLException(e.getMessage(), e); } finally { try { aIs.close(); } catch (Exception e) { LOG.log(Level.WARNING, "Error closing XML file", e); } } } /** * Reads and validates a document against a schema. * * @param aIs * Input stream. * @param aSchema * Schema. * * @return Parsed and validated document. * */ public static Document readAndValidate(InputStream aIs, InputStream aSchema) throws XMLException { try { Document doc = read(aIs); final Schema schema = SchemaFactory.newInstance( XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema( new StreamSource(aSchema)); Validator validator = schema.newValidator(); validator.validate(new DOMSource(doc)); return doc; } catch (SAXException e) { throw new XMLException(e.getMessage(), e); } catch (IOException e) { throw new XMLException(e.getMessage(), e); } finally { try { aSchema.close(); } catch (Exception e) { LOG.log(Level.WARNING, "Error closing schema", e); } try { aIs.close(); } catch (Exception e) { LOG.log(Level.WARNING, "Error closing XML file", e); } } } /** * Serializes an XML document to a stream. * * @param aDocument * Document to serialize. * @param aOs * Output stream. * */ public static void serialize(Document aDocument, OutputStream aOs) throws IOException { try { TransformerFactory factory = TransformerFactory.newInstance(); Transformer identityTransform = factory.newTransformer(); DOMSource source = new DOMSource(aDocument); StreamResult result = new StreamResult(aOs); identityTransform.transform(source, result); } catch (TransformerException e) { throw new IOException(e.getMessage(), e); } } /** * Serializes an XML document. * * @param aDocument * Document to serialize. * * @return Serialized document. * */ public static String serialize(Document aDocument) throws IOException { ByteArrayOutputStream os = new ByteArrayOutputStream(); serialize(aDocument, os); return os.toString(); } /** * Removes duplicate attributes from a DOM tree.This is useful for * postprocessing the output of JTidy as a workaround for a bug in JTidy. * * @param aNode * Node to remove duplicate attributes from (recursively). * Attributes of the node itself are not dealt with. Only the * child nodes are dealt with. */ public static void removeDuplicateAttributes(Node aNode) { NodeList list = aNode.getChildNodes(); for (int i = 0; i < list.getLength(); i++) { Node node = list.item(i); if (node instanceof Element) { removeDuplicateAttributes((Element) node); removeDuplicateAttributes(node); } } } /** * Removes duplicate attributes from an element. * * @param aElement * Element. */ private static void removeDuplicateAttributes(Element aElement) { NamedNodeMap attributes = aElement.getAttributes(); Map uniqueAttributes = new TreeMap(); List attlist = new ArrayList(); for (int i = 0; i < attributes.getLength(); i++) { Attr attribute = (Attr) attributes.item(i); if (uniqueAttributes.containsKey(attribute.getNodeName())) { LOG.info("Detected duplicate attribute (will be removed)'" + attribute.getNodeName() + "'"); } uniqueAttributes.put(attribute.getNodeName(), attribute); attlist.add(attribute); } // Remove all attributes from the element. for (Attr att : attlist) { aElement.removeAttributeNode(att); } // Add the unique attributes back to the element. for (Attr att : uniqueAttributes.values()) { aElement.setAttributeNode(att); } } }