2 * Copyright 2005 the original author or authors.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package org.wamblee.xml;
19 import java.io.ByteArrayInputStream;
20 import java.io.ByteArrayOutputStream;
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.io.OutputStream;
24 import java.util.ArrayList;
25 import java.util.List;
27 import java.util.TreeMap;
29 import javax.xml.XMLConstants;
30 import javax.xml.parsers.DocumentBuilder;
31 import javax.xml.parsers.DocumentBuilderFactory;
32 import javax.xml.parsers.ParserConfigurationException;
33 import javax.xml.transform.stream.StreamSource;
34 import javax.xml.validation.Schema;
35 import javax.xml.validation.SchemaFactory;
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.xml.serialize.OutputFormat;
40 import org.apache.xml.serialize.XMLSerializer;
41 import org.dom4j.DocumentException;
42 import org.dom4j.io.DOMReader;
43 import org.dom4j.io.DOMWriter;
44 import org.w3c.dom.Attr;
45 import org.w3c.dom.Document;
46 import org.w3c.dom.Element;
47 import org.w3c.dom.NamedNodeMap;
48 import org.w3c.dom.Node;
49 import org.w3c.dom.NodeList;
50 import org.xml.sax.SAXException;
52 import com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl;
53 import com.sun.org.apache.xerces.internal.jaxp.validation.xs.SchemaFactoryImpl;
56 * Some basic XML utilities for common reoccuring tasks for DOM documents.
58 public final class DomUtils {
60 private static final Log LOG = LogFactory.getLog(DomUtils.class);
63 * Disabled default constructor.
71 * Parses an XML document from a string.
77 public static Document read(String aDocument) throws XMLException {
78 ByteArrayInputStream is = new ByteArrayInputStream(aDocument.getBytes());
83 * Parses an XML document from a stream.
89 public static Document read(InputStream aIs) throws XMLException {
91 DocumentBuilder builder = DocumentBuilderFactory.newInstance()
92 .newDocumentBuilder();
93 return builder.parse(aIs);
94 } catch (SAXException e) {
95 throw new XMLException(e.getMessage(), e);
96 } catch (IOException e) {
97 throw new XMLException(e.getMessage(), e);
98 } catch (ParserConfigurationException e) {
99 throw new XMLException(e.getMessage(), e);
103 } catch (Exception e) {
104 LOG.warn("Error closing XML file", e);
110 * Reads and validates a document against a schema.
116 * @return Parsed and validated document.
118 public static Document readAndValidate(InputStream aIs, InputStream aSchema)
119 throws XMLException {
122 final Schema schema = SchemaFactory.newInstance(
123 XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(
124 new StreamSource(aSchema));
126 final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
127 factory.setValidating(true);
128 factory.setNamespaceAware(true);
129 factory.setSchema(schema);
131 return factory.newDocumentBuilder().parse(aIs);
132 } catch (SAXException e) {
133 throw new XMLException(e.getMessage(), e);
134 } catch (IOException e) {
135 throw new XMLException(e.getMessage(), e);
136 } catch (ParserConfigurationException e) {
137 throw new XMLException(e.getMessage(), e);
141 } catch (Exception e) {
142 LOG.warn("Error closing schema", e);
146 } catch (Exception e) {
147 LOG.warn("Error closing XML file", e);
154 * Serializes an XML document to a stream.
157 * Document to serialize.
161 public static void serialize(Document aDocument, OutputStream aOs)
163 XMLSerializer serializer = new XMLSerializer(aOs, new OutputFormat());
164 serializer.serialize(aDocument);
168 * Serializes an XML document.
171 * Document to serialize.
172 * @return Serialized document.
174 public static String serialize(Document aDocument) throws IOException {
175 ByteArrayOutputStream os = new ByteArrayOutputStream();
176 serialize(aDocument, os);
177 return os.toString();
181 * Converts a dom4j document into a w3c DOM document.
184 * Document to convert.
185 * @return W3C DOM document.
187 public static Document convert(org.dom4j.Document aDocument)
188 throws DocumentException {
189 return new DOMWriter().write(aDocument);
193 * Converts a W3C DOM document into a dom4j document.
196 * Document to convert.
197 * @return Dom4j document.
199 public static org.dom4j.Document convert(Document aDocument) {
200 return new DOMReader().read(aDocument);
204 * Removes duplicate attributes from a DOM tree.This is useful for
205 * postprocessing the output of JTidy as a workaround for a bug in JTidy.
208 * Node to remove duplicate attributes from (recursively).
209 * Attributes of the node itself are not dealt with. Only the
210 * child nodes are dealt with.
212 public static void removeDuplicateAttributes(Node aNode) {
213 NodeList list = aNode.getChildNodes();
214 for (int i = 0; i < list.getLength(); i++) {
215 Node node = list.item(i);
216 if (node instanceof Element) {
217 removeDuplicateAttributes((Element) node);
218 removeDuplicateAttributes(node);
224 * Removes duplicate attributes from an element.
229 private static void removeDuplicateAttributes(Element aElement) {
230 NamedNodeMap attributes = aElement.getAttributes();
231 Map<String, Attr> uniqueAttributes = new TreeMap<String, Attr>();
232 List<Attr> attlist = new ArrayList<Attr>();
233 for (int i = 0; i < attributes.getLength(); i++) {
234 Attr attribute = (Attr) attributes.item(i);
235 if (uniqueAttributes.containsKey(attribute.getNodeName())) {
236 LOG.info("Detected duplicate attribute (will be removed)'"
237 + attribute.getNodeName() + "'");
239 uniqueAttributes.put(attribute.getNodeName(), attribute);
240 attlist.add(attribute);
242 // Remove all attributes from the element.
243 for (Attr att : attlist) {
244 aElement.removeAttributeNode(att);
246 // Add the unique attributes back to the element.
247 for (Attr att : uniqueAttributes.values()) {
248 aElement.setAttributeNode(att);