X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fimpl%2FCrawlerImpl.java;h=098ed91f42ee30d072b2ce5d5339e37d3e8a2726;hb=f53c06ddca33e21e772c479179b7f858a3a8b8d4;hp=5633078cabaa71cfd2a5970cd7b1e9743270c79f;hpb=0c7e22e06b8aa3e5e0e516f2f3c46eee6215bd85;p=utils diff --git a/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java b/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java index 5633078c..098ed91f 100644 --- a/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java +++ b/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java @@ -17,6 +17,7 @@ package org.wamblee.crawler.impl; import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dom4j.Element; @@ -36,8 +37,6 @@ public class CrawlerImpl implements Crawler { private static final Log LOG = LogFactory.getLog(CrawlerImpl.class); - private static final int MAX_DELAY = 5000; - private HttpClient _client; private Configuration _config; @@ -60,11 +59,11 @@ public class CrawlerImpl implements Crawler { * * @see org.wamblee.crawler.Crawler#getPage(java.lang.String) */ - public Page getPage(String aUrl) throws PageException { - LOG.info("Getting page: url = '" + aUrl + "'"); + public Page getPage(String aUrl, NameValuePair[] aParams) throws PageException { + LOG.debug("Getting page: url = '" + aUrl + "'"); PageRequest request = _config.getRequest(aUrl); - Document content = request.execute(aUrl, _client); - return transformToDom4jDoc(content); + Document content = request.execute(aUrl, aParams, _client); + return transformToDom4jDoc(aUrl, content); } /* @@ -73,11 +72,11 @@ public class CrawlerImpl implements Crawler { * @see org.wamblee.crawler.Crawler#getPage(java.lang.String, * java.lang.String) */ - public Page getPage(String aUrl, PageType aType) throws PageException { - LOG.info("Getting page: url = '" + aUrl + "', type = '" + aType + "'"); + public Page getPage(String aUrl, NameValuePair[] aParams, PageType aType) throws PageException { + LOG.debug("Getting page: url = '" + aUrl + "', type = '" + aType + "'"); PageRequest request = _config.getRequest(aType); - Document content = request.execute(aUrl, _client); - return transformToDom4jDoc(content); + Document content = request.execute(aUrl, aParams, _client); + return transformToDom4jDoc(aUrl, content); } /** @@ -85,13 +84,13 @@ public class CrawlerImpl implements Crawler { * @param content DOM document. * @return */ - private Page transformToDom4jDoc(Document content) { + private Page transformToDom4jDoc(String aUrl, Document content) { DOMReader reader = new DOMReader(); org.dom4j.Document dom4jDoc = reader.read(content); Element root = dom4jDoc.getRootElement(); dom4jDoc.remove(root); - return new PageImpl(this, replaceReferencesWithContent(root)); + return new PageImpl(aUrl, this, replaceReferencesWithContent(root)); } /**