git://wamblee.org
/
utils
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
more robustness, now a detailed report is always sent, also if crawling
[utils]
/
crawler
/
basic
/
src
/
org
/
wamblee
/
crawler
/
AbstractPageRequest.java
diff --git
a/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java
b/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java
index baf9510b8c2ee9d560d1e888fbb58b8bd5b7b170..432ebb4ac4f72010f30bd5371a15cce2d48e4f42 100644
(file)
--- a/
crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java
+++ b/
crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java
@@
-17,7
+17,6
@@
package org.wamblee.crawler;
import java.io.ByteArrayOutputStream;
package org.wamblee.crawler;
import java.io.ByteArrayOutputStream;
-import java.io.File;
import java.io.IOException;
import javax.xml.transform.OutputKeys;
import java.io.IOException;
import javax.xml.transform.OutputKeys;
@@
-40,9
+39,8
@@
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.w3c.dom.Document;
import org.w3c.tidy.Tidy;
import org.apache.xml.serialize.XMLSerializer;
import org.w3c.dom.Document;
import org.w3c.tidy.Tidy;
-import org.wamblee.io.FileResource;
-import org.wamblee.xml.DOMUtility;
-import org.wamblee.xml.XSLT;
+import org.wamblee.xml.DomUtils;
+import org.wamblee.xml.XslTransformer;
/**
* General support claas for all kinds of requests.
/**
* General support claas for all kinds of requests.
@@
-60,6
+58,8
@@
public abstract class AbstractPageRequest implements PageRequest {
private NameValuePair[] _params;
private String _xslt;
private NameValuePair[] _params;
private String _xslt;
+
+ private XslTransformer _transformer;
/**
* Constructs the request.
/**
* Constructs the request.
@@
-74,7
+74,7
@@
public abstract class AbstractPageRequest implements PageRequest {
* XSLT used to convert the response.
*/
protected AbstractPageRequest(int aMaxTries, int aMaxDelay,
* XSLT used to convert the response.
*/
protected AbstractPageRequest(int aMaxTries, int aMaxDelay,
- NameValuePair[] aParams, String aXslt) {
+ NameValuePair[] aParams, String aXslt
, XslTransformer aTransformer
) {
if (aParams == null) {
throw new IllegalArgumentException("aParams is null");
}
if (aParams == null) {
throw new IllegalArgumentException("aParams is null");
}
@@
-85,6
+85,7
@@
public abstract class AbstractPageRequest implements PageRequest {
_maxDelay = aMaxDelay;
_params = aParams;
_xslt = aXslt;
_maxDelay = aMaxDelay;
_params = aParams;
_xslt = aXslt;
+ _transformer = aTransformer;
}
/*
}
/*
@@
-155,8
+156,8
@@
public abstract class AbstractPageRequest implements PageRequest {
aMethod = executeWithRedirects(aClient, aMethod);
byte[] xhtmlData = getXhtml(aMethod);
aMethod = executeWithRedirects(aClient, aMethod);
byte[] xhtmlData = getXhtml(aMethod);
- Document transformed =
new XSLT()
.transform(xhtmlData,
-
new FileResource(new File(_xslt)
));
+ Document transformed =
_transformer
.transform(xhtmlData,
+
_transformer.resolve(_xslt
));
ByteArrayOutputStream os = new ByteArrayOutputStream();
Transformer transformer = TransformerFactory.newInstance()
.newTransformer();
ByteArrayOutputStream os = new ByteArrayOutputStream();
Transformer transformer = TransformerFactory.newInstance()
.newTransformer();
@@
-196,7
+197,7
@@
public abstract class AbstractPageRequest implements PageRequest {
// in a system wide way.
ByteArrayOutputStream os = new ByteArrayOutputStream();
Document w3cDoc = tidy.parseDOM(aMethod.getResponseBodyAsStream(), os);
// in a system wide way.
ByteArrayOutputStream os = new ByteArrayOutputStream();
Document w3cDoc = tidy.parseDOM(aMethod.getResponseBodyAsStream(), os);
- D
OMUtility
.removeDuplicateAttributes(w3cDoc);
+ D
omUtils
.removeDuplicateAttributes(w3cDoc);
LOG.debug("Content of response is \n" + os.toString());
ByteArrayOutputStream xhtml = new ByteArrayOutputStream();
LOG.debug("Content of response is \n" + os.toString());
ByteArrayOutputStream xhtml = new ByteArrayOutputStream();
@@
-251,7
+252,7
@@
public abstract class AbstractPageRequest implements PageRequest {
// recursion.
}
default: {
// recursion.
}
default: {
- throw new
Runtime
Exception("Method failed: "
+ throw new
IO
Exception("Method failed: "
+ aMethod.getStatusLine());
}
}
+ aMethod.getStatusLine());
}
}