X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fimpl%2FConfigurationParser.java;h=da3fd2be6bca5511e4eecee6bde3bec562b8e77e;hb=eb6463aa65160fb4c5bbb275adcff0df33c11e64;hp=6795bf1c6c1fa1830d14e2902147f5d3816d31b4;hpb=c78140d07025961d92a4635b6e30f4bf66725746;p=utils diff --git a/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java b/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java index 6795bf1c..da3fd2be 100644 --- a/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java +++ b/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java @@ -17,7 +17,6 @@ package org.wamblee.crawler.impl; import java.io.InputStream; -import java.io.PrintStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -31,6 +30,7 @@ import org.wamblee.crawler.Configuration; import org.wamblee.crawler.GetPageRequest; import org.wamblee.crawler.PageRequest; import org.wamblee.crawler.PostPageRequest; +import org.wamblee.xml.XslTransformer; /** * Parsing of the configuration from an XML file. @@ -48,6 +48,8 @@ public class ConfigurationParser { private static final String ELEM_XSLT = "xslt"; private static final String ELEM_PARAM = "param"; + + private static final String ELEM_HEADER = "header"; private static final String AT_NAME = "name"; @@ -59,17 +61,15 @@ public class ConfigurationParser { private static final int MAX_TRIES = 3; - private static final int MAX_DELAY = 100; - - private PrintStream _os; + private static final int MAX_DELAY = 5000; + private XslTransformer _transformer; + /** * Constructs the configuration parser. - * @param aOs The stream for logging requests. - * TODO plain java logging should be used instead of this awkward mechanism. */ - public ConfigurationParser(PrintStream aOs) { - _os = aOs; + public ConfigurationParser(XslTransformer aTransformer) { + _transformer = aTransformer; } /** @@ -151,21 +151,18 @@ public class ConfigurationParser { private PageRequest parseRequestConfig(Element aElem) { String method = aElem.elementText(ELEM_METHOD); String xslt = aElem.elementText(ELEM_XSLT); - List params = new ArrayList(); - for (Iterator i = aElem.elementIterator(ELEM_PARAM); i.hasNext();) { - Element paramElem = (Element) i.next(); - NameValuePair param = parseParameter(paramElem); - params.add(param); - } - + List params = parseNameValuePairs(aElem, ELEM_PARAM); + List headers = parseNameValuePairs(aElem, ELEM_HEADER); + NameValuePair[] paramsArray = params.toArray(new NameValuePair[0]); + NameValuePair[] headersArray = headers.toArray(new NameValuePair[0]); PageRequest request; if (METHOD_POST.equals(method)) { - request = new PostPageRequest(MAX_TRIES, MAX_DELAY, paramsArray, - xslt, _os); + request = new PostPageRequest(MAX_TRIES, MAX_DELAY, paramsArray, headersArray, + xslt, _transformer); } else if (METHOD_GET.equals(method) || method == null) { - request = new GetPageRequest(MAX_TRIES, MAX_DELAY, paramsArray, - xslt, _os); + request = new GetPageRequest(MAX_TRIES, MAX_DELAY, paramsArray, headersArray, + xslt, _transformer); } else { throw new RuntimeException("Unknown request method '" + method + "'. Only " + METHOD_GET + " and " + METHOD_POST @@ -174,6 +171,20 @@ public class ConfigurationParser { return request; } + /** + * @param aElem + * @return + */ + private List parseNameValuePairs(Element aElem, String aElemName) { + List headers = new ArrayList(); + for (Iterator i = aElem.elementIterator(aElemName); i.hasNext();) { + Element paramElem = (Element) i.next(); + NameValuePair header = parseParameter(paramElem); + headers.add(header); + } + return headers; + } + /** * Parses a parameter definition. * @param aParam Parameter.