git://wamblee.org
/
utils
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
6b1d610
)
support for parameters on actions.
author
erik
<erik@77661180-640e-0410-b3a8-9f9b13e6d0e0>
Sat, 19 Aug 2006 23:52:53 +0000
(23:52 +0000)
committer
erik
<erik@77661180-640e-0410-b3a8-9f9b13e6d0e0>
Sat, 19 Aug 2006 23:52:53 +0000
(23:52 +0000)
trunk/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java
patch
|
blob
|
history
trunk/crawler/basic/src/org/wamblee/crawler/Action.java
patch
|
blob
|
history
trunk/crawler/basic/src/org/wamblee/crawler/Crawler.java
patch
|
blob
|
history
trunk/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java
patch
|
blob
|
history
trunk/crawler/basic/src/org/wamblee/crawler/PageRequest.java
patch
|
blob
|
history
trunk/crawler/basic/src/org/wamblee/crawler/PostPageRequest.java
patch
|
blob
|
history
diff --git
a/trunk/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java
b/trunk/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java
index 28482d7fbd096fb8641db821cfcbaab339841726..2e598005ff3a453150a50884cccfd649889ee3c6 100644
(file)
--- a/
trunk/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java
+++ b/
trunk/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java
@@
-18,6
+18,9
@@
package org.wamblee.crawler;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
@@
-108,10
+111,15
@@
public abstract class AbstractPageRequest implements PageRequest {
/**
* Gets the parameters for the request.
*
/**
* Gets the parameters for the request.
*
+ * @param aParams Additional parameters to use, obtained from another page, most likely as
+ * hidden form fields.
* @return Request parameters.
*/
* @return Request parameters.
*/
- protected NameValuePair[] getParameters() {
- return _params;
+ protected NameValuePair[] getParameters(NameValuePair[] aParams) {
+ List<NameValuePair> params = new ArrayList<NameValuePair>();
+ params.addAll(Arrays.asList(_params));
+ params.addAll(Arrays.asList(aParams));
+ return params.toArray(new NameValuePair[0]);
}
/**
}
/**
@@
-176,6
+184,7
@@
public abstract class AbstractPageRequest implements PageRequest {
try {
aMethod = executeWithRedirects(aClient, aMethod);
byte[] xhtmlData = getXhtml(aMethod);
try {
aMethod = executeWithRedirects(aClient, aMethod);
byte[] xhtmlData = getXhtml(aMethod);
+
Document transformed = _transformer.transform(xhtmlData,
_transformer.resolve(_xslt));
Document transformed = _transformer.transform(xhtmlData,
_transformer.resolve(_xslt));
diff --git
a/trunk/crawler/basic/src/org/wamblee/crawler/Action.java
b/trunk/crawler/basic/src/org/wamblee/crawler/Action.java
index cd9b4e2a74a83062757133aa697259d133522050..f24cacd0ea4547c11a9afec2af5201d49c325e0a 100644
(file)
--- a/
trunk/crawler/basic/src/org/wamblee/crawler/Action.java
+++ b/
trunk/crawler/basic/src/org/wamblee/crawler/Action.java
@@
-16,6
+16,7
@@
package org.wamblee.crawler;
package org.wamblee.crawler;
+import org.apache.commons.httpclient.NameValuePair;
import org.dom4j.Element;
/**
import org.dom4j.Element;
/**
diff --git
a/trunk/crawler/basic/src/org/wamblee/crawler/Crawler.java
b/trunk/crawler/basic/src/org/wamblee/crawler/Crawler.java
index 00d1283aeee1324705acf6661c07bd4ed30ddb80..3615d9bc3a1e612e620725140a383fd17ac8e41b 100644
(file)
--- a/
trunk/crawler/basic/src/org/wamblee/crawler/Crawler.java
+++ b/
trunk/crawler/basic/src/org/wamblee/crawler/Crawler.java
@@
-16,6
+16,8
@@
package org.wamblee.crawler;
package org.wamblee.crawler;
+import org.apache.commons.httpclient.NameValuePair;
+
/**
* The object that actually obtains pages based on URL.
/**
* The object that actually obtains pages based on URL.
@@
-25,17
+27,19
@@
public interface Crawler {
/**
* Gets the content for a specific page.
* @param aUrl Url of page.
/**
* Gets the content for a specific page.
* @param aUrl Url of page.
+ * @param aParameters Paremeters to supply.
* @return Page to retrieve.
* @throws PageException In case of problems retrieving the page.
*/
* @return Page to retrieve.
* @throws PageException In case of problems retrieving the page.
*/
- Page getPage(String aUrl) throws PageException;
+ Page getPage(String aUrl
, NameValuePair[] aParameters
) throws PageException;
/**
* Gets the content for a specific page.
/**
* Gets the content for a specific page.
- * @param aUrl Url of page.
+ * @param aUrl Url of page.
+ * @param aParameters Parameters to supply.
* @param aType Type of page.
* @return Page.
* @throws PageException In case of problems retrieving the page.
*/
* @param aType Type of page.
* @return Page.
* @throws PageException In case of problems retrieving the page.
*/
- Page getPage(String aUrl, PageType aType) throws PageException;
+ Page getPage(String aUrl,
NameValuePair[] aParameters,
PageType aType) throws PageException;
}
}
diff --git
a/trunk/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java
b/trunk/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java
index 40a3421167fafb0f0a3f08e19fbb994912177bc2..b737723df5dde04f6d61775d1915e3b12b66f404 100644
(file)
--- a/
trunk/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java
+++ b/
trunk/crawler/basic/src/org/wamblee/crawler/GetPageRequest.java
@@
-50,12
+50,13
@@
public class GetPageRequest extends AbstractPageRequest {
*
* @see org.wamblee.crawler.PageRequest#getPage(org.apache.commons.httpclient.HttpClient)
*/
*
* @see org.wamblee.crawler.PageRequest#getPage(org.apache.commons.httpclient.HttpClient)
*/
- public Document execute(String aUrl, HttpClient aClient)
+ public Document execute(String aUrl,
NameValuePair[] aParams,
HttpClient aClient)
throws PageException {
HttpMethod method = new GetMethod(aUrl);
throws PageException {
HttpMethod method = new GetMethod(aUrl);
- if (getParameters().length > 0) {
+ NameValuePair[] params = getParameters(aParams);
+ if (params.length > 0) {
String oldQueryString = method.getQueryString();
String oldQueryString = method.getQueryString();
- method.setQueryString(
getParameters()
);
+ method.setQueryString(
params
);
String queryString = method.getQueryString();
if (oldQueryString.length() > 0) {
queryString = queryString + '&' + oldQueryString;
String queryString = method.getQueryString();
if (oldQueryString.length() > 0) {
queryString = queryString + '&' + oldQueryString;
diff --git
a/trunk/crawler/basic/src/org/wamblee/crawler/PageRequest.java
b/trunk/crawler/basic/src/org/wamblee/crawler/PageRequest.java
index 192f74e4587ecc714c174a92f45bbcf326439533..be729069864b1f5e581de8d49def2a3771e60a40 100644
(file)
--- a/
trunk/crawler/basic/src/org/wamblee/crawler/PageRequest.java
+++ b/
trunk/crawler/basic/src/org/wamblee/crawler/PageRequest.java
@@
-17,6
+17,7
@@
package org.wamblee.crawler;
import org.apache.commons.httpclient.HttpClient;
package org.wamblee.crawler;
import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.NameValuePair;
import org.w3c.dom.Document;
/**
import org.w3c.dom.Document;
/**
@@
-27,11
+28,12
@@
public interface PageRequest {
/**
* Gets a page as an XML document.
* @param aUrl Url of the page.
/**
* Gets a page as an XML document.
* @param aUrl Url of the page.
+ * @param aParams Additional parameters to supply.
* @param aClient Http client to use.
* @return Client.
* @throws PageException In case of problems retrieving the page.
*/
* @param aClient Http client to use.
* @return Client.
* @throws PageException In case of problems retrieving the page.
*/
- Document execute(String aUrl, HttpClient aClient) throws PageException;
+ Document execute(String aUrl,
NameValuePair[] aParams,
HttpClient aClient) throws PageException;
/**
* Overrides the Xslt to use. This is used when the transformed page specifies
/**
* Overrides the Xslt to use. This is used when the transformed page specifies
diff --git
a/trunk/crawler/basic/src/org/wamblee/crawler/PostPageRequest.java
b/trunk/crawler/basic/src/org/wamblee/crawler/PostPageRequest.java
index af160f60cfa581b180a5d9b2cc576da897705218..4a6a073c490eaa6d209ca924a3c5744a8defc678 100644
(file)
--- a/
trunk/crawler/basic/src/org/wamblee/crawler/PostPageRequest.java
+++ b/
trunk/crawler/basic/src/org/wamblee/crawler/PostPageRequest.java
@@
-52,10
+52,10
@@
public class PostPageRequest extends AbstractPageRequest {
* @see org.wamblee.crawler.PageRequest#execute(java.lang.String,
* org.apache.commons.httpclient.HttpClient)
*/
* @see org.wamblee.crawler.PageRequest#execute(java.lang.String,
* org.apache.commons.httpclient.HttpClient)
*/
- public Document execute(String aUrl, HttpClient aClient)
+ public Document execute(String aUrl,
NameValuePair[] aParams,
HttpClient aClient)
throws PageException {
PostMethod method = new PostMethod(aUrl);
throws PageException {
PostMethod method = new PostMethod(aUrl);
- method.addParameters(getParameters());
+ method.addParameters(getParameters(
aParams
));
try {
return executeMethod(aClient, method);
} catch (TransformerException e) {
try {
return executeMethod(aClient, method);
} catch (TransformerException e) {