From 2415e335184c5d6f58f261d26b95f6c22f55ae0d Mon Sep 17 00:00:00 2001 From: Erik Brakkee Date: Fri, 17 Mar 2006 22:57:21 +0000 Subject: [PATCH] checkstyle --- .../wamblee/crawler/AbstractPageRequest.java | 146 ++++++++++-------- support/src/org/wamblee/xml/DOMUtility.java | 107 +++++++------ 2 files changed, 140 insertions(+), 113 deletions(-) diff --git a/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java b/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java index b37834bb..144abe78 100644 --- a/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java +++ b/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java @@ -66,12 +66,18 @@ public abstract class AbstractPageRequest implements PageRequest { private PrintStream _os; /** - * Constructs the request. - * @param aMaxTries Maximum retries to perform. - * @param aMaxDelay Maximum delay before executing a request. - * @param aParams Request parameters to use. - * @param aXslt XSLT used to convert the response. - * @param aOs Output stream for logging (if null then no logging is done). + * Constructs the request. + * + * @param aMaxTries + * Maximum retries to perform. + * @param aMaxDelay + * Maximum delay before executing a request. + * @param aParams + * Request parameters to use. + * @param aXslt + * XSLT used to convert the response. + * @param aOs + * Output stream for logging (if null then no logging is done). */ protected AbstractPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, String aXslt, PrintStream aOs) { @@ -98,21 +104,27 @@ public abstract class AbstractPageRequest implements PageRequest { } /** - * Gets the parameters for the request. - * @return Request parameters. + * Gets the parameters for the request. + * + * @return Request parameters. */ protected NameValuePair[] getParameters() { return _params; } /** - * Executes the request with a random delay and with a maximum number of - * retries. - * @param aClient HTTP client to use. - * @param aMethod Method representing the request. + * Executes the request with a random delay and with a maximum number of + * retries. + * + * @param aClient + * HTTP client to use. + * @param aMethod + * Method representing the request. * @return XML document describing the response. - * @throws IOException In case of IO problems. - * @throws TransformerException In case transformation of the HTML to XML fails. + * @throws IOException + * In case of IO problems. + * @throws TransformerException + * In case transformation of the HTML to XML fails. */ protected Document executeMethod(HttpClient aClient, HttpMethod aMethod) throws IOException, TransformerException { @@ -131,20 +143,25 @@ public abstract class AbstractPageRequest implements PageRequest { } /** - * Executes the request without doing any retries in case XSLT transformation - * fails. - * @param aClient HTTP client to use. - * @param aMethod Method to execute. - * @return XML document containing the result. - * @throws IOException In case of IO problems. - * @throws TransformerException In case transformation of the result to XML fails. + * Executes the request without doing any retries in case XSLT + * transformation fails. + * + * @param aClient + * HTTP client to use. + * @param aMethod + * Method to execute. + * @return XML document containing the result. + * @throws IOException + * In case of IO problems. + * @throws TransformerException + * In case transformation of the result to XML fails. */ protected Document executeMethodWithoutRetries(HttpClient aClient, HttpMethod aMethod) throws IOException, TransformerException { try { aMethod = executeWithRedirects(aClient, aMethod); byte[] xhtmlData = getXhtml(aMethod); - + Document transformed = new XSLT().transform(xhtmlData, new FileResource(new File(_xslt))); _os.println("Transformed result is: "); @@ -165,43 +182,44 @@ public abstract class AbstractPageRequest implements PageRequest { } /** - * Gets the result of the HTTP method as an XHTML document. - * @param aMethod Method to invoke. - * @return XHTML as a byte array. - * @throws URIException In case of poblems with the URI - * @throws IOException In case of problems obtaining the XHTML. + * Gets the result of the HTTP method as an XHTML document. + * + * @param aMethod + * Method to invoke. + * @return XHTML as a byte array. + * @throws IOException + * In case of problems obtaining the XHTML. */ - private byte[] getXhtml(HttpMethod aMethod) throws URIException, IOException { - // Transform the HTML into wellformed XML. - Tidy tidy = new Tidy(); - tidy.setXHTML(true); - tidy.setQuiet(true); - tidy.setShowWarnings(false); - if (_os != null) { - _os.println("Content of '" + aMethod.getURI() + "'"); - _os.println(); - } - // We write the jtidy output to XML since the DOM tree it produces is - // not namespace aware and namespace awareness is required by XSLT. - // An alternative is to configure namespace awareness of the XML parser - // in a system wide way. - Document w3cDoc = tidy.parseDOM(aMethod.getResponseBodyAsStream(), - _os); - DOMUtility.removeDuplicateAttributes(w3cDoc); - - ByteArrayOutputStream xhtml = new ByteArrayOutputStream(); - XMLSerializer serializer = new XMLSerializer(xhtml, new OutputFormat()); - serializer.serialize(w3cDoc); - xhtml.flush(); - if (_os != null) { - _os.println(); + private byte[] getXhtml(HttpMethod aMethod) throws IOException { + // Transform the HTML into wellformed XML. + Tidy tidy = new Tidy(); + tidy.setXHTML(true); + tidy.setQuiet(true); + tidy.setShowWarnings(false); + if (_os != null) { + _os.println("Content of '" + aMethod.getURI() + "'"); + _os.println(); } - return xhtml.toByteArray(); - } + // We write the jtidy output to XML since the DOM tree it produces is + // not namespace aware and namespace awareness is required by XSLT. + // An alternative is to configure namespace awareness of the XML parser + // in a system wide way. + Document w3cDoc = tidy.parseDOM(aMethod.getResponseBodyAsStream(), _os); + DOMUtility.removeDuplicateAttributes(w3cDoc); + + ByteArrayOutputStream xhtml = new ByteArrayOutputStream(); + XMLSerializer serializer = new XMLSerializer(xhtml, new OutputFormat()); + serializer.serialize(w3cDoc); + xhtml.flush(); + if (_os != null) { + _os.println(); + } + return xhtml.toByteArray(); + } /** * Sleeps for a random time but no more than the maximum delay. - * + * */ private void delay() { try { @@ -212,12 +230,16 @@ public abstract class AbstractPageRequest implements PageRequest { } /** - * Executes the request and follows redirects if needed. - * @param aClient HTTP client to use. - * @param aMethod Method to use. - * @return Final HTTP method used (differs from the parameter passed in in case - * of redirection). - * @throws IOException In case of network problems. + * Executes the request and follows redirects if needed. + * + * @param aClient + * HTTP client to use. + * @param aMethod + * Method to use. + * @return Final HTTP method used (differs from the parameter passed in in + * case of redirection). + * @throws IOException + * In case of network problems. */ private HttpMethod executeWithRedirects(HttpClient aClient, HttpMethod aMethod) throws IOException { @@ -235,8 +257,8 @@ public abstract class AbstractPageRequest implements PageRequest { Header header = aMethod.getResponseHeader(REDIRECT_HEADER); aMethod = new GetMethod(header.getValue()); return executeWithRedirects(aClient, aMethod); // TODO protect - // against infinite - // recursion. + // against infinite + // recursion. } default: { throw new RuntimeException("Method failed: " diff --git a/support/src/org/wamblee/xml/DOMUtility.java b/support/src/org/wamblee/xml/DOMUtility.java index 6a7a5af7..58170444 100644 --- a/support/src/org/wamblee/xml/DOMUtility.java +++ b/support/src/org/wamblee/xml/DOMUtility.java @@ -12,58 +12,63 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; /** - * Utility class for performing various operations on DOM trees. + * Utility class for performing various operations on DOM trees. */ public final class DOMUtility { - - /** - * Disabled constructor. - * - */ - private DOMUtility() { - // Empty - } - - /** - * Removes duplicate attributes from a DOM tree. - * @param aNode Node to remove duplicate attributes from (recursively). - * Attributes of the node itself are not dealt with. Only the child - * nodes are dealt with. - */ - public static void removeDuplicateAttributes(Node aNode) { - NodeList list = aNode.getChildNodes(); - for (int i = 0; i < list.getLength(); i++) { - Node node = list.item(i); - if ( node instanceof Element ) { - removeDuplicateAttributes((Element)node); + + /** + * Disabled constructor. + * + */ + private DOMUtility() { + // Empty + } + + /** + * Removes duplicate attributes from a DOM tree. + * + * @param aNode + * Node to remove duplicate attributes from (recursively). + * Attributes of the node itself are not dealt with. Only the + * child nodes are dealt with. + */ + public static void removeDuplicateAttributes(Node aNode) { + NodeList list = aNode.getChildNodes(); + for (int i = 0; i < list.getLength(); i++) { + Node node = list.item(i); + if (node instanceof Element) { + removeDuplicateAttributes((Element) node); removeDuplicateAttributes(node); - } - } - } - - /** - * Removes duplicate attributes from an element. - * @param aElement Element. - */ - private static void removeDuplicateAttributes(Element aElement) { - NamedNodeMap attributes = aElement.getAttributes(); - Map uniqueAttributes = new TreeMap(); - List attlist = new ArrayList(); - for (int i = 0; i < attributes.getLength(); i++) { - Attr attribute = (Attr)attributes.item(i); - if ( uniqueAttributes.containsKey(attribute.getNodeName())) { - System.out.println("Detected duplicate attribute '" + attribute.getNodeName() + "'"); - } - uniqueAttributes.put(attribute.getNodeName(), attribute); - attlist.add(attribute); - } - // Remove all attributes from the element. - for (Attr att: attlist) { - aElement.removeAttributeNode(att); - } - // Add the unique attributes back to the element. - for (Attr att: uniqueAttributes.values()) { - aElement.setAttributeNode(att); - } - } + } + } + } + + /** + * Removes duplicate attributes from an element. + * + * @param aElement + * Element. + */ + private static void removeDuplicateAttributes(Element aElement) { + NamedNodeMap attributes = aElement.getAttributes(); + Map uniqueAttributes = new TreeMap(); + List attlist = new ArrayList(); + for (int i = 0; i < attributes.getLength(); i++) { + Attr attribute = (Attr) attributes.item(i); + if (uniqueAttributes.containsKey(attribute.getNodeName())) { + System.out.println("Detected duplicate attribute '" + + attribute.getNodeName() + "'"); + } + uniqueAttributes.put(attribute.getNodeName(), attribute); + attlist.add(attribute); + } + // Remove all attributes from the element. + for (Attr att : attlist) { + aElement.removeAttributeNode(att); + } + // Add the unique attributes back to the element. + for (Attr att : uniqueAttributes.values()) { + aElement.setAttributeNode(att); + } + } } -- 2.31.1