2 * Copyright 2005 the original author or authors.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package org.wamblee.xml;
18 import org.apache.commons.logging.Log;
19 import org.apache.commons.logging.LogFactory;
21 import org.apache.xml.serialize.OutputFormat;
22 import org.apache.xml.serialize.XMLSerializer;
24 import org.dom4j.DocumentException;
26 import org.dom4j.io.DOMReader;
27 import org.dom4j.io.DOMWriter;
29 import org.w3c.dom.Attr;
30 import org.w3c.dom.Document;
31 import org.w3c.dom.Element;
32 import org.w3c.dom.NamedNodeMap;
33 import org.w3c.dom.Node;
34 import org.w3c.dom.NodeList;
36 import org.xml.sax.SAXException;
38 import java.io.ByteArrayInputStream;
39 import java.io.ByteArrayOutputStream;
40 import java.io.IOException;
41 import java.io.InputStream;
42 import java.io.OutputStream;
44 import java.util.ArrayList;
45 import java.util.List;
47 import java.util.TreeMap;
49 import javax.xml.XMLConstants;
50 import javax.xml.parsers.DocumentBuilder;
51 import javax.xml.parsers.DocumentBuilderFactory;
52 import javax.xml.parsers.ParserConfigurationException;
53 import javax.xml.transform.stream.StreamSource;
54 import javax.xml.validation.Schema;
55 import javax.xml.validation.SchemaFactory;
59 * Some basic XML utilities for common reoccuring tasks for DOM documents.
61 * @author Erik Brakkee
63 public final class DomUtils {
67 private static final Log LOG = LogFactory.getLog(DomUtils.class);
70 * Disabled default constructor.
78 * Parses an XML document from a string.
80 * @param aDocument document.
84 * @throws XMLException DOCUMENT ME!
86 public static Document read(String aDocument) throws XMLException {
87 ByteArrayInputStream is = new ByteArrayInputStream(aDocument.getBytes());
93 * Parses an XML document from a stream.
95 * @param aIs Input stream.
99 * @throws XMLException DOCUMENT ME!
101 public static Document read(InputStream aIs) throws XMLException {
103 DocumentBuilder builder = DocumentBuilderFactory.newInstance()
104 .newDocumentBuilder();
106 return builder.parse(aIs);
107 } catch (SAXException e) {
108 throw new XMLException(e.getMessage(), e);
109 } catch (IOException e) {
110 throw new XMLException(e.getMessage(), e);
111 } catch (ParserConfigurationException e) {
112 throw new XMLException(e.getMessage(), e);
116 } catch (Exception e) {
117 LOG.warn("Error closing XML file", e);
123 * Reads and validates a document against a schema.
125 * @param aIs Input stream.
126 * @param aSchema Schema.
128 * @return Parsed and validated document.
130 * @throws XMLException DOCUMENT ME!
132 public static Document readAndValidate(InputStream aIs, InputStream aSchema)
133 throws XMLException {
135 final Schema schema = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
136 .newSchema(new StreamSource(aSchema));
138 final DocumentBuilderFactory factory = DocumentBuilderFactory
140 factory.setValidating(true);
141 factory.setNamespaceAware(true);
142 factory.setSchema(schema);
144 return factory.newDocumentBuilder().parse(aIs);
145 } catch (SAXException e) {
146 throw new XMLException(e.getMessage(), e);
147 } catch (IOException e) {
148 throw new XMLException(e.getMessage(), e);
149 } catch (ParserConfigurationException e) {
150 throw new XMLException(e.getMessage(), e);
154 } catch (Exception e) {
155 LOG.warn("Error closing schema", e);
160 } catch (Exception e) {
161 LOG.warn("Error closing XML file", e);
167 * Serializes an XML document to a stream.
169 * @param aDocument Document to serialize.
170 * @param aOs Output stream.
172 * @throws IOException DOCUMENT ME!
174 public static void serialize(Document aDocument, OutputStream aOs)
176 XMLSerializer serializer = new XMLSerializer(aOs, new OutputFormat());
177 serializer.serialize(aDocument);
181 * Serializes an XML document.
183 * @param aDocument Document to serialize.
185 * @return Serialized document.
187 * @throws IOException DOCUMENT ME!
189 public static String serialize(Document aDocument)
191 ByteArrayOutputStream os = new ByteArrayOutputStream();
192 serialize(aDocument, os);
194 return os.toString();
198 * Converts a dom4j document into a w3c DOM document.
200 * @param aDocument Document to convert.
202 * @return W3C DOM document.
204 * @throws DocumentException DOCUMENT ME!
206 public static Document convert(org.dom4j.Document aDocument)
207 throws DocumentException {
208 return new DOMWriter().write(aDocument);
212 * Converts a W3C DOM document into a dom4j document.
214 * @param aDocument Document to convert.
216 * @return Dom4j document.
218 public static org.dom4j.Document convert(Document aDocument) {
219 return new DOMReader().read(aDocument);
223 * Removes duplicate attributes from a DOM tree.This is useful for
224 * postprocessing the output of JTidy as a workaround for a bug in JTidy.
226 * @param aNode Node to remove duplicate attributes from (recursively).
227 * Attributes of the node itself are not dealt with. Only the child
228 * nodes are dealt with.
230 public static void removeDuplicateAttributes(Node aNode) {
231 NodeList list = aNode.getChildNodes();
233 for (int i = 0; i < list.getLength(); i++) {
234 Node node = list.item(i);
236 if (node instanceof Element) {
237 removeDuplicateAttributes((Element) node);
238 removeDuplicateAttributes(node);
244 * Removes duplicate attributes from an element.
246 * @param aElement Element.
248 private static void removeDuplicateAttributes(Element aElement) {
249 NamedNodeMap attributes = aElement.getAttributes();
250 Map<String, Attr> uniqueAttributes = new TreeMap<String, Attr>();
251 List<Attr> attlist = new ArrayList<Attr>();
253 for (int i = 0; i < attributes.getLength(); i++) {
254 Attr attribute = (Attr) attributes.item(i);
256 if (uniqueAttributes.containsKey(attribute.getNodeName())) {
257 LOG.info("Detected duplicate attribute (will be removed)'"
258 + attribute.getNodeName() + "'");
261 uniqueAttributes.put(attribute.getNodeName(), attribute);
262 attlist.add(attribute);
265 // Remove all attributes from the element.
266 for (Attr att : attlist) {
267 aElement.removeAttributeNode(att);
270 // Add the unique attributes back to the element.
271 for (Attr att : uniqueAttributes.values()) {
272 aElement.setAttributeNode(att);