From 7346b8dd3cbff04da8efb31670ddaf0f7fbfda42 Mon Sep 17 00:00:00 2001 From: erik Date: Mon, 27 Mar 2006 22:27:32 +0000 Subject: [PATCH] --- .../wamblee/crawler/AbstractPageRequest.java | 4 +- support/src/org/wamblee/xml/DOMUtility.java | 75 ----------- support/src/org/wamblee/xml/DomUtils.java | 58 +++++++++ support/test/org/wamblee/xml/XmlUtils.java | 122 ++++++++++++------ .../org/wamblee/xml/XslTransformerTest.java | 1 + 5 files changed, 142 insertions(+), 118 deletions(-) delete mode 100644 support/src/org/wamblee/xml/DOMUtility.java diff --git a/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java b/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java index 5cb4fae6..c0284d69 100644 --- a/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java +++ b/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java @@ -39,7 +39,7 @@ import org.apache.xml.serialize.OutputFormat; import org.apache.xml.serialize.XMLSerializer; import org.w3c.dom.Document; import org.w3c.tidy.Tidy; -import org.wamblee.xml.DOMUtility; +import org.wamblee.xml.DomUtils; import org.wamblee.xml.XslTransformer; /** @@ -197,7 +197,7 @@ public abstract class AbstractPageRequest implements PageRequest { // in a system wide way. ByteArrayOutputStream os = new ByteArrayOutputStream(); Document w3cDoc = tidy.parseDOM(aMethod.getResponseBodyAsStream(), os); - DOMUtility.removeDuplicateAttributes(w3cDoc); + DomUtils.removeDuplicateAttributes(w3cDoc); LOG.debug("Content of response is \n" + os.toString()); ByteArrayOutputStream xhtml = new ByteArrayOutputStream(); diff --git a/support/src/org/wamblee/xml/DOMUtility.java b/support/src/org/wamblee/xml/DOMUtility.java deleted file mode 100644 index 620d6811..00000000 --- a/support/src/org/wamblee/xml/DOMUtility.java +++ /dev/null @@ -1,75 +0,0 @@ -package org.wamblee.xml; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -import org.w3c.dom.Attr; -import org.w3c.dom.Element; -import org.w3c.dom.NamedNodeMap; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; - -/** - * Utility class for performing various operations on DOM trees. - */ -public final class DOMUtility { - - /** - * Disabled constructor. - * - */ - private DOMUtility() { - // Empty - } - - /** - * Removes duplicate attributes from a DOM tree.This is useful for postprocessing the - * output of JTidy as a workaround for a bug in JTidy. - * - * @param aNode - * Node to remove duplicate attributes from (recursively). - * Attributes of the node itself are not dealt with. Only the - * child nodes are dealt with. - */ - public static void removeDuplicateAttributes(Node aNode) { - NodeList list = aNode.getChildNodes(); - for (int i = 0; i < list.getLength(); i++) { - Node node = list.item(i); - if (node instanceof Element) { - removeDuplicateAttributes((Element) node); - removeDuplicateAttributes(node); - } - } - } - - /** - * Removes duplicate attributes from an element. - * - * @param aElement - * Element. - */ - private static void removeDuplicateAttributes(Element aElement) { - NamedNodeMap attributes = aElement.getAttributes(); - Map uniqueAttributes = new TreeMap(); - List attlist = new ArrayList(); - for (int i = 0; i < attributes.getLength(); i++) { - Attr attribute = (Attr) attributes.item(i); - if (uniqueAttributes.containsKey(attribute.getNodeName())) { - System.out.println("Detected duplicate attribute '" - + attribute.getNodeName() + "'"); - } - uniqueAttributes.put(attribute.getNodeName(), attribute); - attlist.add(attribute); - } - // Remove all attributes from the element. - for (Attr att : attlist) { - aElement.removeAttributeNode(att); - } - // Add the unique attributes back to the element. - for (Attr att : uniqueAttributes.values()) { - aElement.setAttributeNode(att); - } - } -} diff --git a/support/src/org/wamblee/xml/DomUtils.java b/support/src/org/wamblee/xml/DomUtils.java index 4fdb52e7..75ba7f6d 100644 --- a/support/src/org/wamblee/xml/DomUtils.java +++ b/support/src/org/wamblee/xml/DomUtils.java @@ -20,6 +20,10 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -30,7 +34,12 @@ import org.apache.xml.serialize.XMLSerializer; import org.dom4j.DocumentException; import org.dom4j.io.DOMReader; import org.dom4j.io.DOMWriter; +import org.w3c.dom.Attr; import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** @@ -96,4 +105,53 @@ public final class DomUtils { public static org.dom4j.Document convert(Document aDocument) { return new DOMReader().read(aDocument); } + + /** + * Removes duplicate attributes from a DOM tree.This is useful for postprocessing the + * output of JTidy as a workaround for a bug in JTidy. + * + * @param aNode + * Node to remove duplicate attributes from (recursively). + * Attributes of the node itself are not dealt with. Only the + * child nodes are dealt with. + */ + public static void removeDuplicateAttributes(Node aNode) { + NodeList list = aNode.getChildNodes(); + for (int i = 0; i < list.getLength(); i++) { + Node node = list.item(i); + if (node instanceof Element) { + removeDuplicateAttributes((Element) node); + removeDuplicateAttributes(node); + } + } + } + + /** + * Removes duplicate attributes from an element. + * + * @param aElement + * Element. + */ + private static void removeDuplicateAttributes(Element aElement) { + NamedNodeMap attributes = aElement.getAttributes(); + Map uniqueAttributes = new TreeMap(); + List attlist = new ArrayList(); + for (int i = 0; i < attributes.getLength(); i++) { + Attr attribute = (Attr) attributes.item(i); + if (uniqueAttributes.containsKey(attribute.getNodeName())) { + System.out.println("Detected duplicate attribute '" + + attribute.getNodeName() + "'"); + } + uniqueAttributes.put(attribute.getNodeName(), attribute); + attlist.add(attribute); + } + // Remove all attributes from the element. + for (Attr att : attlist) { + aElement.removeAttributeNode(att); + } + // Add the unique attributes back to the element. + for (Attr att : uniqueAttributes.values()) { + aElement.setAttributeNode(att); + } + } } diff --git a/support/test/org/wamblee/xml/XmlUtils.java b/support/test/org/wamblee/xml/XmlUtils.java index c83bdae8..12292469 100644 --- a/support/test/org/wamblee/xml/XmlUtils.java +++ b/support/test/org/wamblee/xml/XmlUtils.java @@ -12,13 +12,15 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ + */ package org.wamblee.xml; import java.util.Collections; import java.util.Comparator; import java.util.List; +import java.util.Map; +import java.util.TreeMap; import junit.framework.TestCase; @@ -26,86 +28,124 @@ import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.Element; - - /** - * XML test support utilities. + * XML test support utilities. */ public final class XmlUtils { /** - * Disabled constructor. - * + * Disabled constructor. + * */ - private XmlUtils() { + private XmlUtils() { // Empty } - + + /** + * Checks equality of two XML documents excluding comment and processing + * nodes and trimming the text of the elements. + * + * @param aMsg + * @param aExpected + * @param aActual + */ + public static void assertEquals(String aMsg, + org.w3c.dom.Document aExpected, org.w3c.dom.Document aActual) { + assertEquals(aMsg, DomUtils.convert(aExpected), DomUtils + .convert(aActual)); + } + /** - * Checks equality of two XML documents excluding comment and processing nodes and - * trimming the text of the elements. + * Checks equality of two XML documents excluding comment and processing + * nodes and trimming the text of the elements. + * * @param aMsg * @param aExpected * @param aActual */ - public static void assertEquals(String aMsg, Document aExpected, Document aActual) { + public static void assertEquals(String aMsg, Document aExpected, + Document aActual) { assertEquals(aMsg, aExpected.getRootElement(), aActual.getRootElement()); } - + /** - * Checks equality of two XML elements excluding comment and processing nodes and trimming - * the text of the elements. + * Checks equality of two XML elements excluding comment and processing + * nodes and trimming the text of the elements. + * * @param aMsg * @param aExpected * @param aActual */ - public static void assertEquals(String aMsg, Element aExpected, Element aActual) { - + public static void assertEquals(String aMsg, Element aExpected, + Element aActual) { + // Name. - TestCase.assertEquals(aMsg + "/name()", aExpected.getName(), aActual.getName()); - + TestCase.assertEquals(aMsg + "/name()", aExpected.getName(), aActual + .getName()); + // Text - TestCase.assertEquals(aMsg + "/text()", aExpected.getTextTrim(), aActual.getTextTrim()); - + TestCase.assertEquals(aMsg + "/text()", aExpected.getTextTrim(), + aActual.getTextTrim()); + // Attributes - List expectedAttrs = aExpected.attributes(); + List expectedAttrs = aExpected.attributes(); Collections.sort(expectedAttrs, new AttributeComparator()); - List actualAttrs = aActual.attributes(); + List actualAttrs = aActual.attributes(); Collections.sort(actualAttrs, new AttributeComparator()); - - TestCase.assertEquals(aMsg + "/#attributes", expectedAttrs.size(), actualAttrs.size()); - for (int i = 0; i < expectedAttrs.size(); i++) { - String msg = aMsg + "/attribute(" + i + ")"; + + TestCase.assertEquals(aMsg + "/#attributes", expectedAttrs.size(), + actualAttrs.size()); + for (int i = 0; i < expectedAttrs.size(); i++) { + String msg = aMsg + "/@" + expectedAttrs.get(i).getName(); assertEquals(msg, expectedAttrs.get(i), actualAttrs.get(i)); } - - // Nested elements. + + // Nested elements. List expectedElems = aExpected.elements(); - List actualElems = aActual.elements(); - TestCase.assertEquals(aMsg + "/#elements", expectedElems.size(), actualElems.size()); - for (int i = 0; i < expectedElems.size(); i++) { - String msg = aMsg + "/element(" + i + ")"; + List actualElems = aActual.elements(); + TestCase.assertEquals(aMsg + "/#elements", expectedElems.size(), + actualElems.size()); + // determine the how-manyth element of the given name we are at. + Map elementIndex = new TreeMap(); + for (int i = 0; i < expectedElems.size(); i++) { + String elemName = expectedElems.get(i).getName(); + Integer index = elementIndex.get(elemName); + if (index == null) { + index = 1; + } else { + index++; + } + elementIndex.put(elemName, index); + String msg = aMsg + "/" + expectedElems.get(i).getName() + "(" + + index + ")"; + assertEquals(msg, expectedElems.get(i), actualElems.get(i)); } } - + /** * Checks equality of two attributes. + * * @param aMsg * @param aExpected * @param aActual */ - public static void assertEquals(String aMsg, Attribute aExpected, Attribute aActual) { - TestCase.assertEquals(aMsg + "@", aExpected.getName() + ":name", aActual.getName()); - TestCase.assertEquals(aMsg + "@" + aExpected.getName() + ":value", - aExpected.getValue(), aActual.getValue()); + public static void assertEquals(String aMsg, Attribute aExpected, + Attribute aActual) { + TestCase.assertEquals(aMsg + ":name", aExpected.getName(), aActual + .getName()); + TestCase.assertEquals(aMsg + ":value", aExpected.getValue(), aActual + .getValue()); } - + /** - * Comparator which compares attributes by name. + * Comparator which compares attributes by name. */ - private static final class AttributeComparator implements Comparator { - /* (non-Javadoc) + private static final class AttributeComparator implements + Comparator { + /* + * (non-Javadoc) + * * @see java.util.Comparator#compare(T, T) */ public int compare(Attribute aAttribute1, Attribute aAttribute2) { diff --git a/support/test/org/wamblee/xml/XslTransformerTest.java b/support/test/org/wamblee/xml/XslTransformerTest.java index 8352cb0d..cede6bb0 100644 --- a/support/test/org/wamblee/xml/XslTransformerTest.java +++ b/support/test/org/wamblee/xml/XslTransformerTest.java @@ -60,6 +60,7 @@ public class XslTransformerTest extends TestCase { Document output1 = transformer.transform(documentData, xslt); Document output2 = transformer.transform(document, xslt); + XmlUtils.assertEquals("output", output1, output2); ByteArrayOutputStream os = new ByteArrayOutputStream(); Result output = new StreamResult(os); transformer.transform(documentSource, output, xslt); -- 2.31.1