X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2FGetPageRequest.java;h=b737723df5dde04f6d61775d1915e3b12b66f404;hb=071ee3b92d229ef0725928e19820fcd1084d11a0;hp=9a9d02e4ed2536f8619bc7f23dc13fac67255754;hpb=a5a9deb2dedb2efc96972acedaa44909a3b0fd79;p=utils diff --git a/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java b/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java index 9a9d02e4..b737723d 100644 --- a/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java +++ b/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java @@ -16,7 +16,7 @@ package org.wamblee.crawler; -import java.io.PrintStream; +import java.io.IOException; import javax.xml.transform.TransformerException; @@ -25,31 +25,38 @@ import org.apache.commons.httpclient.HttpMethod; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.httpclient.methods.GetMethod; import org.w3c.dom.Document; +import org.wamblee.xml.XslTransformer; /** * Gets a page by issueing a get request. */ public class GetPageRequest extends AbstractPageRequest { - public GetPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, String aXslt) { - super(aMaxTries, aMaxDelay, aParams, aXslt, null); - } - - public GetPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, String aXslt, PrintStream aOs) { - super(aMaxTries, aMaxDelay, aParams, aXslt, aOs); + /** + * Constructs the request. + * @param aMaxTries Maximum number of retries. + * @param aMaxDelay Maximum delay before executing the request. + * @param aParams Request parameters to use. + * @param aHeaders Request headers to use. + * @param aXslt XSLT to use. + */ + public GetPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, + NameValuePair[] aHeaders, String aXslt, XslTransformer aTransformer) { + super(aMaxTries, aMaxDelay, aParams, aHeaders, aXslt, aTransformer); } - + /* * (non-Javadoc) * * @see org.wamblee.crawler.PageRequest#getPage(org.apache.commons.httpclient.HttpClient) */ - public Document execute(String aUrl, HttpClient aClient) + public Document execute(String aUrl, NameValuePair[] aParams, HttpClient aClient) throws PageException { HttpMethod method = new GetMethod(aUrl); - if (getParameters().length > 0) { + NameValuePair[] params = getParameters(aParams); + if (params.length > 0) { String oldQueryString = method.getQueryString(); - method.setQueryString(getParameters()); + method.setQueryString(params); String queryString = method.getQueryString(); if (oldQueryString.length() > 0) { queryString = queryString + '&' + oldQueryString; @@ -59,7 +66,9 @@ public class GetPageRequest extends AbstractPageRequest { try { return executeMethod(aClient, method); } catch (TransformerException e) { - throw new PageException(e.getMessage(), e); + throw new PageException("Transformation problem for url " + aUrl, e); + } catch (IOException e) { + throw new PageException("Problem getting " + aUrl, e); } }