/* * Copyright 2005-2010 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.wamblee.xml; import org.apache.xml.serialize.OutputFormat; import org.apache.xml.serialize.XMLSerializer; import org.dom4j.DocumentException; import org.dom4j.io.DOMReader; import org.dom4j.io.DOMWriter; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.stream.StreamSource; import javax.xml.validation.Schema; import javax.xml.validation.SchemaFactory; /** * Some basic XML utilities for common reoccuring tasks for DOM documents. * * @author Erik Brakkee */ public final class DomUtils { private static final Logger LOG = Logger.getLogger(DomUtils.class.getName()); /** * Disabled default constructor. * */ private DomUtils() { // Empty. } /** * Parses an XML document from a string. * * @param aDocument * document. * * @return * */ public static Document read(String aDocument) throws XMLException { ByteArrayInputStream is = new ByteArrayInputStream(aDocument.getBytes()); return read(is); } /** * Parses an XML document from a stream. * * @param aIs * Input stream. * * @return * */ public static Document read(InputStream aIs) throws XMLException { try { DocumentBuilder builder = DocumentBuilderFactory.newInstance() .newDocumentBuilder(); return builder.parse(aIs); } catch (SAXException e) { throw new XMLException(e.getMessage(), e); } catch (IOException e) { throw new XMLException(e.getMessage(), e); } catch (ParserConfigurationException e) { throw new XMLException(e.getMessage(), e); } finally { try { aIs.close(); } catch (Exception e) { LOG.log(Level.WARNING, "Error closing XML file", e); } } } /** * Reads and validates a document against a schema. * * @param aIs * Input stream. * @param aSchema * Schema. * * @return Parsed and validated document. * */ public static Document readAndValidate(InputStream aIs, InputStream aSchema) throws XMLException { try { final Schema schema = SchemaFactory.newInstance( XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema( new StreamSource(aSchema)); final DocumentBuilderFactory factory = DocumentBuilderFactory .newInstance(); factory.setValidating(true); factory.setNamespaceAware(true); factory.setSchema(schema); return factory.newDocumentBuilder().parse(aIs); } catch (SAXException e) { throw new XMLException(e.getMessage(), e); } catch (IOException e) { throw new XMLException(e.getMessage(), e); } catch (ParserConfigurationException e) { throw new XMLException(e.getMessage(), e); } finally { try { aSchema.close(); } catch (Exception e) { LOG.log(Level.WARNING, "Error closing schema", e); } try { aIs.close(); } catch (Exception e) { LOG.log(Level.WARNING, "Error closing XML file", e); } } } /** * Serializes an XML document to a stream. * * @param aDocument * Document to serialize. * @param aOs * Output stream. * */ public static void serialize(Document aDocument, OutputStream aOs) throws IOException { XMLSerializer serializer = new XMLSerializer(aOs, new OutputFormat()); serializer.serialize(aDocument); } /** * Serializes an XML document. * * @param aDocument * Document to serialize. * * @return Serialized document. * */ public static String serialize(Document aDocument) throws IOException { ByteArrayOutputStream os = new ByteArrayOutputStream(); serialize(aDocument, os); return os.toString(); } /** * Converts a dom4j document into a w3c DOM document. * * @param aDocument * Document to convert. * * @return W3C DOM document. * */ public static Document convert(org.dom4j.Document aDocument) throws DocumentException { return new DOMWriter().write(aDocument); } /** * Converts a W3C DOM document into a dom4j document. * * @param aDocument * Document to convert. * * @return Dom4j document. */ public static org.dom4j.Document convert(Document aDocument) { return new DOMReader().read(aDocument); } /** * Removes duplicate attributes from a DOM tree.This is useful for * postprocessing the output of JTidy as a workaround for a bug in JTidy. * * @param aNode * Node to remove duplicate attributes from (recursively). * Attributes of the node itself are not dealt with. Only the * child nodes are dealt with. */ public static void removeDuplicateAttributes(Node aNode) { NodeList list = aNode.getChildNodes(); for (int i = 0; i < list.getLength(); i++) { Node node = list.item(i); if (node instanceof Element) { removeDuplicateAttributes((Element) node); removeDuplicateAttributes(node); } } } /** * Removes duplicate attributes from an element. * * @param aElement * Element. */ private static void removeDuplicateAttributes(Element aElement) { NamedNodeMap attributes = aElement.getAttributes(); Map uniqueAttributes = new TreeMap(); List attlist = new ArrayList(); for (int i = 0; i < attributes.getLength(); i++) { Attr attribute = (Attr) attributes.item(i); if (uniqueAttributes.containsKey(attribute.getNodeName())) { LOG.info("Detected duplicate attribute (will be removed)'" + attribute.getNodeName() + "'"); } uniqueAttributes.put(attribute.getNodeName(), attribute); attlist.add(attribute); } // Remove all attributes from the element. for (Attr att : attlist) { aElement.removeAttributeNode(att); } // Add the unique attributes back to the element. for (Attr att : uniqueAttributes.values()) { aElement.setAttributeNode(att); } } }