X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2FAbstractPageRequest.java;h=432ebb4ac4f72010f30bd5371a15cce2d48e4f42;hb=3d3d3345af94775f62b60933bd9b2ba1583f5842;hp=66627ca523faa4c56a112811e7ccdf9381107825;hpb=951167e3811e07ba8c8c02226fe08a8bca6acc3f;p=utils diff --git a/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java b/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java index 66627ca5..432ebb4a 100644 --- a/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java +++ b/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java @@ -39,9 +39,8 @@ import org.apache.xml.serialize.OutputFormat; import org.apache.xml.serialize.XMLSerializer; import org.w3c.dom.Document; import org.w3c.tidy.Tidy; -import org.wamblee.xml.ClasspathUriResolver; -import org.wamblee.xml.DOMUtility; -import org.wamblee.xml.XSLT; +import org.wamblee.xml.DomUtils; +import org.wamblee.xml.XslTransformer; /** * General support claas for all kinds of requests. @@ -59,6 +58,8 @@ public abstract class AbstractPageRequest implements PageRequest { private NameValuePair[] _params; private String _xslt; + + private XslTransformer _transformer; /** * Constructs the request. @@ -73,7 +74,7 @@ public abstract class AbstractPageRequest implements PageRequest { * XSLT used to convert the response. */ protected AbstractPageRequest(int aMaxTries, int aMaxDelay, - NameValuePair[] aParams, String aXslt) { + NameValuePair[] aParams, String aXslt, XslTransformer aTransformer) { if (aParams == null) { throw new IllegalArgumentException("aParams is null"); } @@ -84,6 +85,7 @@ public abstract class AbstractPageRequest implements PageRequest { _maxDelay = aMaxDelay; _params = aParams; _xslt = aXslt; + _transformer = aTransformer; } /* @@ -154,9 +156,8 @@ public abstract class AbstractPageRequest implements PageRequest { aMethod = executeWithRedirects(aClient, aMethod); byte[] xhtmlData = getXhtml(aMethod); - XSLT xsltProcessor = new XSLT(new ClasspathUriResolver()); - Document transformed = xsltProcessor.transform(xhtmlData, - xsltProcessor.resolve(_xslt)); + Document transformed = _transformer.transform(xhtmlData, + _transformer.resolve(_xslt)); ByteArrayOutputStream os = new ByteArrayOutputStream(); Transformer transformer = TransformerFactory.newInstance() .newTransformer(); @@ -196,7 +197,7 @@ public abstract class AbstractPageRequest implements PageRequest { // in a system wide way. ByteArrayOutputStream os = new ByteArrayOutputStream(); Document w3cDoc = tidy.parseDOM(aMethod.getResponseBodyAsStream(), os); - DOMUtility.removeDuplicateAttributes(w3cDoc); + DomUtils.removeDuplicateAttributes(w3cDoc); LOG.debug("Content of response is \n" + os.toString()); ByteArrayOutputStream xhtml = new ByteArrayOutputStream(); @@ -251,7 +252,7 @@ public abstract class AbstractPageRequest implements PageRequest { // recursion. } default: { - throw new RuntimeException("Method failed: " + throw new IOException("Method failed: " + aMethod.getStatusLine()); } }