X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2FGetPageRequest.java;h=b737723df5dde04f6d61775d1915e3b12b66f404;hb=5685a836b9208ff8babfe5ac5b30c5f86d27cf96;hp=7d99c1e814241c137026bdb88cdec01c65dc91e5;hpb=81bc61121a8f17f754fc99eb66603a59df242ddc;p=utils diff --git a/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java b/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java index 7d99c1e8..b737723d 100644 --- a/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java +++ b/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java @@ -12,48 +12,64 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ + */ package org.wamblee.crawler; -import java.io.PrintStream; +import java.io.IOException; + +import javax.xml.transform.TransformerException; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpMethod; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.httpclient.methods.GetMethod; import org.w3c.dom.Document; +import org.wamblee.xml.XslTransformer; /** - * Gets a page by issueing a get request. + * Gets a page by issueing a get request. */ public class GetPageRequest extends AbstractPageRequest { - - public GetPageRequest(NameValuePair[] aParams, String aXslt) { - super(aParams, aXslt, null); - } - - public GetPageRequest(NameValuePair[] aParams, String aXslt, PrintStream aOs) { - super(aParams, aXslt, aOs); + + /** + * Constructs the request. + * @param aMaxTries Maximum number of retries. + * @param aMaxDelay Maximum delay before executing the request. + * @param aParams Request parameters to use. + * @param aHeaders Request headers to use. + * @param aXslt XSLT to use. + */ + public GetPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, + NameValuePair[] aHeaders, String aXslt, XslTransformer aTransformer) { + super(aMaxTries, aMaxDelay, aParams, aHeaders, aXslt, aTransformer); } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see org.wamblee.crawler.PageRequest#getPage(org.apache.commons.httpclient.HttpClient) */ - public Document execute(String aUrl, HttpClient aClient) { + public Document execute(String aUrl, NameValuePair[] aParams, HttpClient aClient) + throws PageException { HttpMethod method = new GetMethod(aUrl); - if ( getParameters().length > 0 ) { + NameValuePair[] params = getParameters(aParams); + if (params.length > 0) { String oldQueryString = method.getQueryString(); - method.setQueryString(getParameters()); + method.setQueryString(params); String queryString = method.getQueryString(); - if ( oldQueryString.length() > 0 ) { + if (oldQueryString.length() > 0) { queryString = queryString + '&' + oldQueryString; method.setQueryString(queryString); } } - - return executeMethod(aClient, method); + try { + return executeMethod(aClient, method); + } catch (TransformerException e) { + throw new PageException("Transformation problem for url " + aUrl, e); + } catch (IOException e) { + throw new PageException("Problem getting " + aUrl, e); + } } }