X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2FGetPageRequest.java;h=b737723df5dde04f6d61775d1915e3b12b66f404;hb=5685a836b9208ff8babfe5ac5b30c5f86d27cf96;hp=3da77b83899eac83a9cc2851300f4fb6b1cc9d61;hpb=917321038aac9668051a64278525a2cc7bc5c2e2;p=utils diff --git a/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java b/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java index 3da77b83..b737723d 100644 --- a/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java +++ b/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java @@ -16,7 +16,7 @@ package org.wamblee.crawler; -import java.io.PrintStream; +import java.io.IOException; import javax.xml.transform.TransformerException; @@ -25,6 +25,7 @@ import org.apache.commons.httpclient.HttpMethod; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.httpclient.methods.GetMethod; import org.w3c.dom.Document; +import org.wamblee.xml.XslTransformer; /** * Gets a page by issueing a get request. @@ -36,35 +37,26 @@ public class GetPageRequest extends AbstractPageRequest { * @param aMaxTries Maximum number of retries. * @param aMaxDelay Maximum delay before executing the request. * @param aParams Request parameters to use. + * @param aHeaders Request headers to use. * @param aXslt XSLT to use. */ - public GetPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, String aXslt) { - super(aMaxTries, aMaxDelay, aParams, aXslt, null); + public GetPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, + NameValuePair[] aHeaders, String aXslt, XslTransformer aTransformer) { + super(aMaxTries, aMaxDelay, aParams, aHeaders, aXslt, aTransformer); } - - /** - * Constructs the request. - * @param aMaxTries Maximum number of retries. - * @param aMaxDelay Maximum delay before executing the request. - * @param aParams Request parameters to use. - * @param aXslt XSLT to use. - * @param aOs Logging output stream to use. - */ - public GetPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, String aXslt, PrintStream aOs) { - super(aMaxTries, aMaxDelay, aParams, aXslt, aOs); - } - + /* * (non-Javadoc) * * @see org.wamblee.crawler.PageRequest#getPage(org.apache.commons.httpclient.HttpClient) */ - public Document execute(String aUrl, HttpClient aClient) + public Document execute(String aUrl, NameValuePair[] aParams, HttpClient aClient) throws PageException { HttpMethod method = new GetMethod(aUrl); - if (getParameters().length > 0) { + NameValuePair[] params = getParameters(aParams); + if (params.length > 0) { String oldQueryString = method.getQueryString(); - method.setQueryString(getParameters()); + method.setQueryString(params); String queryString = method.getQueryString(); if (oldQueryString.length() > 0) { queryString = queryString + '&' + oldQueryString; @@ -74,7 +66,9 @@ public class GetPageRequest extends AbstractPageRequest { try { return executeMethod(aClient, method); } catch (TransformerException e) { - throw new PageException(e.getMessage(), e); + throw new PageException("Transformation problem for url " + aUrl, e); + } catch (IOException e) { + throw new PageException("Problem getting " + aUrl, e); } }