git://wamblee.org
/
utils
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
timeout is now 5 seconds max.
[utils]
/
crawler
/
basic
/
src
/
org
/
wamblee
/
crawler
/
impl
/
CrawlerImpl.java
diff --git
a/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java
b/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java
index 0188ad31c94d2a8eb8a2d2e040fb7af3fd90f122..098ed91f42ee30d072b2ce5d5339e37d3e8a2726 100644
(file)
--- a/
crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java
+++ b/
crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java
@@
-17,6
+17,7
@@
package org.wamblee.crawler.impl;
import org.apache.commons.httpclient.HttpClient;
package org.wamblee.crawler.impl;
import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Element;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Element;
@@
-58,11
+59,11
@@
public class CrawlerImpl implements Crawler {
*
* @see org.wamblee.crawler.Crawler#getPage(java.lang.String)
*/
*
* @see org.wamblee.crawler.Crawler#getPage(java.lang.String)
*/
- public Page getPage(String aUrl) throws PageException {
+ public Page getPage(String aUrl
, NameValuePair[] aParams
) throws PageException {
LOG.debug("Getting page: url = '" + aUrl + "'");
PageRequest request = _config.getRequest(aUrl);
LOG.debug("Getting page: url = '" + aUrl + "'");
PageRequest request = _config.getRequest(aUrl);
- Document content = request.execute(aUrl, _client);
- return transformToDom4jDoc(content);
+ Document content = request.execute(aUrl,
aParams,
_client);
+ return transformToDom4jDoc(
aUrl,
content);
}
/*
}
/*
@@
-71,11
+72,11
@@
public class CrawlerImpl implements Crawler {
* @see org.wamblee.crawler.Crawler#getPage(java.lang.String,
* java.lang.String)
*/
* @see org.wamblee.crawler.Crawler#getPage(java.lang.String,
* java.lang.String)
*/
- public Page getPage(String aUrl, PageType aType) throws PageException {
+ public Page getPage(String aUrl,
NameValuePair[] aParams,
PageType aType) throws PageException {
LOG.debug("Getting page: url = '" + aUrl + "', type = '" + aType + "'");
PageRequest request = _config.getRequest(aType);
LOG.debug("Getting page: url = '" + aUrl + "', type = '" + aType + "'");
PageRequest request = _config.getRequest(aType);
- Document content = request.execute(aUrl, _client);
- return transformToDom4jDoc(content);
+ Document content = request.execute(aUrl,
aParams,
_client);
+ return transformToDom4jDoc(
aUrl,
content);
}
/**
}
/**
@@
-83,13
+84,13
@@
public class CrawlerImpl implements Crawler {
* @param content DOM document.
* @return
*/
* @param content DOM document.
* @return
*/
- private Page transformToDom4jDoc(Document content) {
+ private Page transformToDom4jDoc(
String aUrl,
Document content) {
DOMReader reader = new DOMReader();
org.dom4j.Document dom4jDoc = reader.read(content);
Element root = dom4jDoc.getRootElement();
dom4jDoc.remove(root);
DOMReader reader = new DOMReader();
org.dom4j.Document dom4jDoc = reader.read(content);
Element root = dom4jDoc.getRootElement();
dom4jDoc.remove(root);
- return new PageImpl(this, replaceReferencesWithContent(root));
+ return new PageImpl(
aUrl,
this, replaceReferencesWithContent(root));
}
/**
}
/**