package org.wamblee.xml;
+import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.dom4j.DocumentException;
import org.dom4j.io.DOMReader;
import org.dom4j.io.DOMWriter;
+import org.w3c.dom.Attr;
import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
/**
*/
public final class DomUtils {
+ private static final Log LOG = LogFactory.getLog(DomUtils.class);
+
/**
* Disabled default constructor.
*
// Empty.
}
+ /**
+ * Parses an XML document from a string.
+ * @param aDocument document.
+ * @return
+ */
+ public static Document read(String aDocument) throws SAXException, ParserConfigurationException, IOException {
+ ByteArrayInputStream is = new ByteArrayInputStream(aDocument.getBytes());
+ return read(is);
+ }
/**
* Parses an XML document from a stream.
public static org.dom4j.Document convert(Document aDocument) {
return new DOMReader().read(aDocument);
}
+
+ /**
+ * Removes duplicate attributes from a DOM tree.This is useful for postprocessing the
+ * output of JTidy as a workaround for a bug in JTidy.
+ *
+ * @param aNode
+ * Node to remove duplicate attributes from (recursively).
+ * Attributes of the node itself are not dealt with. Only the
+ * child nodes are dealt with.
+ */
+ public static void removeDuplicateAttributes(Node aNode) {
+ NodeList list = aNode.getChildNodes();
+ for (int i = 0; i < list.getLength(); i++) {
+ Node node = list.item(i);
+ if (node instanceof Element) {
+ removeDuplicateAttributes((Element) node);
+ removeDuplicateAttributes(node);
+ }
+ }
+ }
+
+ /**
+ * Removes duplicate attributes from an element.
+ *
+ * @param aElement
+ * Element.
+ */
+ private static void removeDuplicateAttributes(Element aElement) {
+ NamedNodeMap attributes = aElement.getAttributes();
+ Map<String, Attr> uniqueAttributes = new TreeMap<String, Attr>();
+ List<Attr> attlist = new ArrayList<Attr>();
+ for (int i = 0; i < attributes.getLength(); i++) {
+ Attr attribute = (Attr) attributes.item(i);
+ if (uniqueAttributes.containsKey(attribute.getNodeName())) {
+ LOG.info("Detected duplicate attribute (will be removed)'"
+ + attribute.getNodeName() + "'");
+ }
+ uniqueAttributes.put(attribute.getNodeName(), attribute);
+ attlist.add(attribute);
+ }
+ // Remove all attributes from the element.
+ for (Attr att : attlist) {
+ aElement.removeAttributeNode(att);
+ }
+ // Add the unique attributes back to the element.
+ for (Attr att : uniqueAttributes.values()) {
+ aElement.setAttributeNode(att);
+ }
+ }
}