git://wamblee.org
/
utils
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
more robustness, now a detailed report is always sent, also if crawling
[utils]
/
crawler
/
basic
/
src
/
org
/
wamblee
/
crawler
/
AbstractPageRequest.java
diff --git
a/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java
b/crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java
index 66627ca523faa4c56a112811e7ccdf9381107825..432ebb4ac4f72010f30bd5371a15cce2d48e4f42 100644
(file)
--- a/
crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java
+++ b/
crawler/basic/src/org/wamblee/crawler/AbstractPageRequest.java
@@
-39,9
+39,8
@@
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.w3c.dom.Document;
import org.w3c.tidy.Tidy;
import org.apache.xml.serialize.XMLSerializer;
import org.w3c.dom.Document;
import org.w3c.tidy.Tidy;
-import org.wamblee.xml.ClasspathUriResolver;
-import org.wamblee.xml.DOMUtility;
-import org.wamblee.xml.XSLT;
+import org.wamblee.xml.DomUtils;
+import org.wamblee.xml.XslTransformer;
/**
* General support claas for all kinds of requests.
/**
* General support claas for all kinds of requests.
@@
-59,6
+58,8
@@
public abstract class AbstractPageRequest implements PageRequest {
private NameValuePair[] _params;
private String _xslt;
private NameValuePair[] _params;
private String _xslt;
+
+ private XslTransformer _transformer;
/**
* Constructs the request.
/**
* Constructs the request.
@@
-73,7
+74,7
@@
public abstract class AbstractPageRequest implements PageRequest {
* XSLT used to convert the response.
*/
protected AbstractPageRequest(int aMaxTries, int aMaxDelay,
* XSLT used to convert the response.
*/
protected AbstractPageRequest(int aMaxTries, int aMaxDelay,
- NameValuePair[] aParams, String aXslt) {
+ NameValuePair[] aParams, String aXslt
, XslTransformer aTransformer
) {
if (aParams == null) {
throw new IllegalArgumentException("aParams is null");
}
if (aParams == null) {
throw new IllegalArgumentException("aParams is null");
}
@@
-84,6
+85,7
@@
public abstract class AbstractPageRequest implements PageRequest {
_maxDelay = aMaxDelay;
_params = aParams;
_xslt = aXslt;
_maxDelay = aMaxDelay;
_params = aParams;
_xslt = aXslt;
+ _transformer = aTransformer;
}
/*
}
/*
@@
-154,9
+156,8
@@
public abstract class AbstractPageRequest implements PageRequest {
aMethod = executeWithRedirects(aClient, aMethod);
byte[] xhtmlData = getXhtml(aMethod);
aMethod = executeWithRedirects(aClient, aMethod);
byte[] xhtmlData = getXhtml(aMethod);
- XSLT xsltProcessor = new XSLT(new ClasspathUriResolver());
- Document transformed = xsltProcessor.transform(xhtmlData,
- xsltProcessor.resolve(_xslt));
+ Document transformed = _transformer.transform(xhtmlData,
+ _transformer.resolve(_xslt));
ByteArrayOutputStream os = new ByteArrayOutputStream();
Transformer transformer = TransformerFactory.newInstance()
.newTransformer();
ByteArrayOutputStream os = new ByteArrayOutputStream();
Transformer transformer = TransformerFactory.newInstance()
.newTransformer();
@@
-196,7
+197,7
@@
public abstract class AbstractPageRequest implements PageRequest {
// in a system wide way.
ByteArrayOutputStream os = new ByteArrayOutputStream();
Document w3cDoc = tidy.parseDOM(aMethod.getResponseBodyAsStream(), os);
// in a system wide way.
ByteArrayOutputStream os = new ByteArrayOutputStream();
Document w3cDoc = tidy.parseDOM(aMethod.getResponseBodyAsStream(), os);
- D
OMUtility
.removeDuplicateAttributes(w3cDoc);
+ D
omUtils
.removeDuplicateAttributes(w3cDoc);
LOG.debug("Content of response is \n" + os.toString());
ByteArrayOutputStream xhtml = new ByteArrayOutputStream();
LOG.debug("Content of response is \n" + os.toString());
ByteArrayOutputStream xhtml = new ByteArrayOutputStream();
@@
-251,7
+252,7
@@
public abstract class AbstractPageRequest implements PageRequest {
// recursion.
}
default: {
// recursion.
}
default: {
- throw new
Runtime
Exception("Method failed: "
+ throw new
IO
Exception("Method failed: "
+ aMethod.getStatusLine());
}
}
+ aMethod.getStatusLine());
}
}