X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2FGetPageRequest.java;h=b737723df5dde04f6d61775d1915e3b12b66f404;hb=5685a836b9208ff8babfe5ac5b30c5f86d27cf96;hp=2ce267ee828e72757efd2d5d22e0c7ea83dda963;hpb=c8a926fa53de4bd99e0a05be7934a0c9d74c173f;p=utils diff --git a/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java b/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java index 2ce267ee..b737723d 100644 --- a/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java +++ b/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java @@ -17,7 +17,6 @@ package org.wamblee.crawler; import java.io.IOException; -import java.io.PrintStream; import javax.xml.transform.TransformerException; @@ -26,6 +25,7 @@ import org.apache.commons.httpclient.HttpMethod; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.httpclient.methods.GetMethod; import org.w3c.dom.Document; +import org.wamblee.xml.XslTransformer; /** * Gets a page by issueing a get request. @@ -37,35 +37,26 @@ public class GetPageRequest extends AbstractPageRequest { * @param aMaxTries Maximum number of retries. * @param aMaxDelay Maximum delay before executing the request. * @param aParams Request parameters to use. + * @param aHeaders Request headers to use. * @param aXslt XSLT to use. */ - public GetPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, String aXslt) { - super(aMaxTries, aMaxDelay, aParams, aXslt, null); + public GetPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, + NameValuePair[] aHeaders, String aXslt, XslTransformer aTransformer) { + super(aMaxTries, aMaxDelay, aParams, aHeaders, aXslt, aTransformer); } - - /** - * Constructs the request. - * @param aMaxTries Maximum number of retries. - * @param aMaxDelay Maximum delay before executing the request. - * @param aParams Request parameters to use. - * @param aXslt XSLT to use. - * @param aOs Logging output stream to use. - */ - public GetPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, String aXslt, PrintStream aOs) { - super(aMaxTries, aMaxDelay, aParams, aXslt, aOs); - } - + /* * (non-Javadoc) * * @see org.wamblee.crawler.PageRequest#getPage(org.apache.commons.httpclient.HttpClient) */ - public Document execute(String aUrl, HttpClient aClient) + public Document execute(String aUrl, NameValuePair[] aParams, HttpClient aClient) throws PageException { HttpMethod method = new GetMethod(aUrl); - if (getParameters().length > 0) { + NameValuePair[] params = getParameters(aParams); + if (params.length > 0) { String oldQueryString = method.getQueryString(); - method.setQueryString(getParameters()); + method.setQueryString(params); String queryString = method.getQueryString(); if (oldQueryString.length() > 0) { queryString = queryString + '&' + oldQueryString; @@ -75,9 +66,9 @@ public class GetPageRequest extends AbstractPageRequest { try { return executeMethod(aClient, method); } catch (TransformerException e) { - throw new PageException(e.getMessage(), e); + throw new PageException("Transformation problem for url " + aUrl, e); } catch (IOException e) { - throw new PageException(e.getMessage(), e); + throw new PageException("Problem getting " + aUrl, e); } }