X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fimpl%2FCrawlerImpl.java;h=098ed91f42ee30d072b2ce5d5339e37d3e8a2726;hb=f53c06ddca33e21e772c479179b7f858a3a8b8d4;hp=0188ad31c94d2a8eb8a2d2e040fb7af3fd90f122;hpb=5685a836b9208ff8babfe5ac5b30c5f86d27cf96;p=utils diff --git a/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java b/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java index 0188ad31..098ed91f 100644 --- a/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java +++ b/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java @@ -17,6 +17,7 @@ package org.wamblee.crawler.impl; import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dom4j.Element; @@ -58,11 +59,11 @@ public class CrawlerImpl implements Crawler { * * @see org.wamblee.crawler.Crawler#getPage(java.lang.String) */ - public Page getPage(String aUrl) throws PageException { + public Page getPage(String aUrl, NameValuePair[] aParams) throws PageException { LOG.debug("Getting page: url = '" + aUrl + "'"); PageRequest request = _config.getRequest(aUrl); - Document content = request.execute(aUrl, _client); - return transformToDom4jDoc(content); + Document content = request.execute(aUrl, aParams, _client); + return transformToDom4jDoc(aUrl, content); } /* @@ -71,11 +72,11 @@ public class CrawlerImpl implements Crawler { * @see org.wamblee.crawler.Crawler#getPage(java.lang.String, * java.lang.String) */ - public Page getPage(String aUrl, PageType aType) throws PageException { + public Page getPage(String aUrl, NameValuePair[] aParams, PageType aType) throws PageException { LOG.debug("Getting page: url = '" + aUrl + "', type = '" + aType + "'"); PageRequest request = _config.getRequest(aType); - Document content = request.execute(aUrl, _client); - return transformToDom4jDoc(content); + Document content = request.execute(aUrl, aParams, _client); + return transformToDom4jDoc(aUrl, content); } /** @@ -83,13 +84,13 @@ public class CrawlerImpl implements Crawler { * @param content DOM document. * @return */ - private Page transformToDom4jDoc(Document content) { + private Page transformToDom4jDoc(String aUrl, Document content) { DOMReader reader = new DOMReader(); org.dom4j.Document dom4jDoc = reader.read(content); Element root = dom4jDoc.getRootElement(); dom4jDoc.remove(root); - return new PageImpl(this, replaceReferencesWithContent(root)); + return new PageImpl(aUrl, this, replaceReferencesWithContent(root)); } /**