X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2FAbstractPageRequest.java;h=432ebb4ac4f72010f30bd5371a15cce2d48e4f42;hb=3d3d3345af94775f62b60933bd9b2ba1583f5842;hp=7a3755febaa6fbe8d86259500317079358567b63;hpb=4ca88c7dec30b0fae2338844b44f43d4592a42c6;p=utils diff --git a/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java b/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java index 7a3755fe..432ebb4a 100644 --- a/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java +++ b/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java @@ -39,8 +39,7 @@ import org.apache.xml.serialize.OutputFormat; import org.apache.xml.serialize.XMLSerializer; import org.w3c.dom.Document; import org.w3c.tidy.Tidy; -import org.wamblee.xml.ClasspathUriResolver; -import org.wamblee.xml.DOMUtility; +import org.wamblee.xml.DomUtils; import org.wamblee.xml.XslTransformer; /** @@ -59,6 +58,8 @@ public abstract class AbstractPageRequest implements PageRequest { private NameValuePair[] _params; private String _xslt; + + private XslTransformer _transformer; /** * Constructs the request. @@ -73,7 +74,7 @@ public abstract class AbstractPageRequest implements PageRequest { * XSLT used to convert the response. */ protected AbstractPageRequest(int aMaxTries, int aMaxDelay, - NameValuePair[] aParams, String aXslt) { + NameValuePair[] aParams, String aXslt, XslTransformer aTransformer) { if (aParams == null) { throw new IllegalArgumentException("aParams is null"); } @@ -84,6 +85,7 @@ public abstract class AbstractPageRequest implements PageRequest { _maxDelay = aMaxDelay; _params = aParams; _xslt = aXslt; + _transformer = aTransformer; } /* @@ -154,9 +156,8 @@ public abstract class AbstractPageRequest implements PageRequest { aMethod = executeWithRedirects(aClient, aMethod); byte[] xhtmlData = getXhtml(aMethod); - XslTransformer xsltProcessor = new XslTransformer(new ClasspathUriResolver()); - Document transformed = xsltProcessor.transform(xhtmlData, - xsltProcessor.resolve(_xslt)); + Document transformed = _transformer.transform(xhtmlData, + _transformer.resolve(_xslt)); ByteArrayOutputStream os = new ByteArrayOutputStream(); Transformer transformer = TransformerFactory.newInstance() .newTransformer(); @@ -196,7 +197,7 @@ public abstract class AbstractPageRequest implements PageRequest { // in a system wide way. ByteArrayOutputStream os = new ByteArrayOutputStream(); Document w3cDoc = tidy.parseDOM(aMethod.getResponseBodyAsStream(), os); - DOMUtility.removeDuplicateAttributes(w3cDoc); + DomUtils.removeDuplicateAttributes(w3cDoc); LOG.debug("Content of response is \n" + os.toString()); ByteArrayOutputStream xhtml = new ByteArrayOutputStream(); @@ -251,7 +252,7 @@ public abstract class AbstractPageRequest implements PageRequest { // recursion. } default: { - throw new RuntimeException("Method failed: " + throw new IOException("Method failed: " + aMethod.getStatusLine()); } }