2 * Copyright 2005-2010 the original author or authors.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package org.wamblee.xml;
18 import java.io.ByteArrayInputStream;
19 import java.io.ByteArrayOutputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.io.OutputStream;
23 import java.util.ArrayList;
24 import java.util.List;
26 import java.util.TreeMap;
27 import java.util.logging.Level;
28 import java.util.logging.Logger;
30 import javax.xml.XMLConstants;
31 import javax.xml.parsers.DocumentBuilder;
32 import javax.xml.parsers.DocumentBuilderFactory;
33 import javax.xml.parsers.ParserConfigurationException;
34 import javax.xml.transform.Transformer;
35 import javax.xml.transform.TransformerException;
36 import javax.xml.transform.TransformerFactory;
37 import javax.xml.transform.dom.DOMSource;
38 import javax.xml.transform.stream.StreamResult;
39 import javax.xml.transform.stream.StreamSource;
40 import javax.xml.validation.Schema;
41 import javax.xml.validation.SchemaFactory;
42 import javax.xml.validation.Validator;
44 import org.w3c.dom.Attr;
45 import org.w3c.dom.Document;
46 import org.w3c.dom.Element;
47 import org.w3c.dom.NamedNodeMap;
48 import org.w3c.dom.Node;
49 import org.w3c.dom.NodeList;
50 import org.w3c.dom.bootstrap.DOMImplementationRegistry;
51 import org.w3c.dom.ls.DOMImplementationLS;
52 import org.w3c.dom.ls.LSException;
53 import org.w3c.dom.ls.LSInput;
54 import org.w3c.dom.ls.LSParser;
55 import org.xml.sax.SAXException;
58 * Some basic XML utilities for common reoccuring tasks for DOM documents.
60 * @author Erik Brakkee
62 public final class DomUtils {
63 private static final Logger LOG = Logger
64 .getLogger(DomUtils.class.getName());
67 * Disabled default constructor.
75 * Parses an XML document from a string.
83 public static Document read(String aDocument) throws XMLException {
84 ByteArrayInputStream is = new ByteArrayInputStream(aDocument.getBytes());
90 * Parses an XML document from a stream.
98 public static Document read(InputStream aIs) throws XMLException {
100 DOMImplementationLS impl = getDomImplementationLS();
102 LSParser builder = impl.createLSParser(
103 DOMImplementationLS.MODE_SYNCHRONOUS, null);
104 LSInput input = impl.createLSInput();
105 input.setByteStream(aIs);
106 return builder.parse(input);
107 } catch (LSException e) {
108 throw new XMLException(e.getMessage(), e);
112 } catch (Exception e) {
113 LOG.log(Level.WARNING, "Error closing XML file", e);
119 * Gets a dom level 3 implementation.
120 * @return Dom implementation.
121 * @throws ClassNotFoundException
122 * @throws InstantiationException
123 * @throws IllegalAccessException
125 public static DOMImplementationLS getDomImplementationLS() {
126 final String message = "Could not get Dom level 3 implementation";
128 DOMImplementationRegistry registry = DOMImplementationRegistry
131 DOMImplementationLS impl = (DOMImplementationLS) registry
132 .getDOMImplementation("LS");
134 } catch (ClassCastException e) {
135 throw new RuntimeException(message, e);
136 } catch (ClassNotFoundException e) {
137 throw new RuntimeException(message, e);
138 } catch (InstantiationException e) {
139 throw new RuntimeException(message, e);
140 } catch (IllegalAccessException e) {
141 throw new RuntimeException(message, e);
146 * Reads and validates a document against a schema.
153 * @return Parsed and validated document.
156 public static Document readAndValidate(InputStream aIs, InputStream aSchema)
157 throws XMLException {
159 Document doc = read(aIs);
160 final Schema schema = SchemaFactory.newInstance(
161 XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(
162 new StreamSource(aSchema));
163 Validator validator = schema.newValidator();
164 validator.validate(new DOMSource(doc));
167 } catch (SAXException e) {
168 throw new XMLException(e.getMessage(), e);
169 } catch (IOException e) {
170 throw new XMLException(e.getMessage(), e);
174 } catch (Exception e) {
175 LOG.log(Level.WARNING, "Error closing schema", e);
180 } catch (Exception e) {
181 LOG.log(Level.WARNING, "Error closing XML file", e);
187 * Serializes an XML document to a stream.
190 * Document to serialize.
195 public static void serialize(Document aDocument, OutputStream aOs)
198 TransformerFactory factory = TransformerFactory.newInstance();
199 Transformer identityTransform = factory.newTransformer();
200 DOMSource source = new DOMSource(aDocument);
201 StreamResult result = new StreamResult(aOs);
202 identityTransform.transform(source, result);
203 } catch (TransformerException e) {
204 throw new IOException(e.getMessage(), e);
209 * Serializes an XML document.
212 * Document to serialize.
214 * @return Serialized document.
217 public static String serialize(Document aDocument) throws IOException {
218 ByteArrayOutputStream os = new ByteArrayOutputStream();
219 serialize(aDocument, os);
221 return os.toString();
225 * Removes duplicate attributes from a DOM tree.This is useful for
226 * postprocessing the output of JTidy as a workaround for a bug in JTidy.
229 * Node to remove duplicate attributes from (recursively).
230 * Attributes of the node itself are not dealt with. Only the
231 * child nodes are dealt with.
233 public static void removeDuplicateAttributes(Node aNode) {
234 NodeList list = aNode.getChildNodes();
236 for (int i = 0; i < list.getLength(); i++) {
237 Node node = list.item(i);
239 if (node instanceof Element) {
240 removeDuplicateAttributes((Element) node);
241 removeDuplicateAttributes(node);
247 * Removes duplicate attributes from an element.
252 private static void removeDuplicateAttributes(Element aElement) {
253 NamedNodeMap attributes = aElement.getAttributes();
254 Map<String, Attr> uniqueAttributes = new TreeMap<String, Attr>();
255 List<Attr> attlist = new ArrayList<Attr>();
257 for (int i = 0; i < attributes.getLength(); i++) {
258 Attr attribute = (Attr) attributes.item(i);
260 if (uniqueAttributes.containsKey(attribute.getNodeName())) {
261 LOG.info("Detected duplicate attribute (will be removed)'" +
262 attribute.getNodeName() + "'");
265 uniqueAttributes.put(attribute.getNodeName(), attribute);
266 attlist.add(attribute);
269 // Remove all attributes from the element.
270 for (Attr att : attlist) {
271 aElement.removeAttributeNode(att);
274 // Add the unique attributes back to the element.
275 for (Attr att : uniqueAttributes.values()) {
276 aElement.setAttributeNode(att);