X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=support%2Fsrc%2Forg%2Fwamblee%2Fxml%2FDomUtils.java;h=05eae128640fa9b70a829a7e7c33814e379ca891;hb=9a7549ba39d26974b00d99569819c1239f080049;hp=4fdb52e752b6517440ed68538c36c1a86b2a9be9;hpb=336db50f7988a7a10533703c1fdcd758e1ed01d3;p=utils diff --git a/support/src/org/wamblee/xml/DomUtils.java b/support/src/org/wamblee/xml/DomUtils.java index 4fdb52e7..05eae128 100644 --- a/support/src/org/wamblee/xml/DomUtils.java +++ b/support/src/org/wamblee/xml/DomUtils.java @@ -16,21 +16,33 @@ package org.wamblee.xml; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.xml.serialize.OutputFormat; import org.apache.xml.serialize.XMLSerializer; import org.dom4j.DocumentException; import org.dom4j.io.DOMReader; import org.dom4j.io.DOMWriter; +import org.w3c.dom.Attr; import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** @@ -39,6 +51,8 @@ import org.xml.sax.SAXException; */ public final class DomUtils { + private static final Log LOG = LogFactory.getLog(DomUtils.class); + /** * Disabled default constructor. * @@ -47,6 +61,15 @@ public final class DomUtils { // Empty. } + /** + * Parses an XML document from a string. + * @param aDocument document. + * @return + */ + public static Document read(String aDocument) throws SAXException, ParserConfigurationException, IOException { + ByteArrayInputStream is = new ByteArrayInputStream(aDocument.getBytes()); + return read(is); + } /** * Parses an XML document from a stream. @@ -96,4 +119,53 @@ public final class DomUtils { public static org.dom4j.Document convert(Document aDocument) { return new DOMReader().read(aDocument); } + + /** + * Removes duplicate attributes from a DOM tree.This is useful for postprocessing the + * output of JTidy as a workaround for a bug in JTidy. + * + * @param aNode + * Node to remove duplicate attributes from (recursively). + * Attributes of the node itself are not dealt with. Only the + * child nodes are dealt with. + */ + public static void removeDuplicateAttributes(Node aNode) { + NodeList list = aNode.getChildNodes(); + for (int i = 0; i < list.getLength(); i++) { + Node node = list.item(i); + if (node instanceof Element) { + removeDuplicateAttributes((Element) node); + removeDuplicateAttributes(node); + } + } + } + + /** + * Removes duplicate attributes from an element. + * + * @param aElement + * Element. + */ + private static void removeDuplicateAttributes(Element aElement) { + NamedNodeMap attributes = aElement.getAttributes(); + Map uniqueAttributes = new TreeMap(); + List attlist = new ArrayList(); + for (int i = 0; i < attributes.getLength(); i++) { + Attr attribute = (Attr) attributes.item(i); + if (uniqueAttributes.containsKey(attribute.getNodeName())) { + LOG.info("Detected duplicate attribute (will be removed)'" + + attribute.getNodeName() + "'"); + } + uniqueAttributes.put(attribute.getNodeName(), attribute); + attlist.add(attribute); + } + // Remove all attributes from the element. + for (Attr att : attlist) { + aElement.removeAttributeNode(att); + } + // Add the unique attributes back to the element. + for (Attr att : uniqueAttributes.values()) { + aElement.setAttributeNode(att); + } + } }