X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fimpl%2FConfigurationParser.java;fp=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fimpl%2FConfigurationParser.java;h=2f2e5f5bf95cdf9cc866c182551da757de820769;hb=9d2a49c65872cd9330670a3cace19faf493df04d;hp=dafbc832b5f12583a96f119f6e86953496661c7d;hpb=76827fb7e4bf7e1ecc0b2ef01a75c5751a915964;p=utils diff --git a/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java b/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java index dafbc832..2f2e5f5b 100644 --- a/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java +++ b/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java @@ -36,24 +36,33 @@ import org.wamblee.crawler.PostPageRequest; * Parsing of the configuration from an XML file. */ public class ConfigurationParser { - - private static final String ELEM_URL = "url"; + + private static final String ELEM_URL = "url"; + private static final String ELEM_TYPE = "type"; - private static final String ELEM_PATTERN = "pattern"; - private static final String ELEM_METHOD= "method"; - private static final String ELEM_XSLT = "xslt"; - private static final String ELEM_PARAM = "param"; + + private static final String ELEM_PATTERN = "pattern"; + + private static final String ELEM_METHOD = "method"; + + private static final String ELEM_XSLT = "xslt"; + + private static final String ELEM_PARAM = "param"; + private static final String AT_NAME = "name"; + private static final String AT_VALUE = "value"; - + private static final String METHOD_POST = "post"; + private static final String METHOD_GET = "get"; - - private static final int MAX_TRIES = 3; + + private static final int MAX_TRIES = 3; + private static final int MAX_DELAY = 5000; - - private PrintStream _os; - + + private PrintStream _os; + public ConfigurationParser(PrintStream aOs) { _os = aOs; } @@ -62,10 +71,10 @@ public class ConfigurationParser { try { SAXReader reader = new SAXReader(); Document document = reader.read(aStream); - - Element root = document.getRootElement(); + + Element root = document.getRootElement(); List urlConfigs = parseUrlConfigs(root); - List pageTypeConfigs = parsePageTypeConfigs(root); + List pageTypeConfigs = parsePageTypeConfigs(root); return new ConfigurationImpl(urlConfigs, pageTypeConfigs); } catch (DocumentException e) { throw new RuntimeException("Problem parsing config file", e); @@ -73,36 +82,36 @@ public class ConfigurationParser { } /** - * @param root + * @param aRoot * @return */ - private List parseUrlConfigs(Element root) { + private List parseUrlConfigs(Element aRoot) { List configs = new ArrayList(); - for (Iterator i = root.elementIterator(ELEM_URL); i.hasNext(); ) { - Element url = (Element)i.next(); + for (Iterator i = aRoot.elementIterator(ELEM_URL); i.hasNext();) { + Element url = (Element) i.next(); UrlConfig config = parseUrlConfig(url); configs.add(config); } return configs; } - - private List parsePageTypeConfigs(Element root) { + + private List parsePageTypeConfigs(Element aRoot) { List configs = new ArrayList(); - for (Iterator i = root.elementIterator(ELEM_TYPE); i.hasNext(); ) { - Element url = (Element)i.next(); + for (Iterator i = aRoot.elementIterator(ELEM_TYPE); i.hasNext();) { + Element url = (Element) i.next(); PageTypeConfig config = parsePageTypeConfig(url); configs.add(config); } return configs; } - - private UrlConfig parseUrlConfig(Element aUrlElem) { + + private UrlConfig parseUrlConfig(Element aUrlElem) { String pattern = aUrlElem.elementText(ELEM_PATTERN); PageRequest request = parseRequestConfig(aUrlElem); return new UrlConfig(pattern, request); } - - private PageTypeConfig parsePageTypeConfig(Element aTypeElem) { + + private PageTypeConfig parsePageTypeConfig(Element aTypeElem) { String pattern = aTypeElem.elementText(ELEM_PATTERN); PageRequest request = parseRequestConfig(aTypeElem); return new PageTypeConfig(pattern, request); @@ -113,30 +122,32 @@ public class ConfigurationParser { * @return */ private PageRequest parseRequestConfig(Element aUrlElem) { - String method = aUrlElem.elementText(ELEM_METHOD); + String method = aUrlElem.elementText(ELEM_METHOD); String xslt = aUrlElem.elementText(ELEM_XSLT); List params = new ArrayList(); - for (Iterator i = aUrlElem.elementIterator(ELEM_PARAM); i.hasNext(); ) { - Element paramElem = (Element)i.next(); + for (Iterator i = aUrlElem.elementIterator(ELEM_PARAM); i.hasNext();) { + Element paramElem = (Element) i.next(); NameValuePair param = parseParameter(paramElem); params.add(param); } - + NameValuePair[] paramsArray = params.toArray(new NameValuePair[0]); - PageRequest request; - if ( METHOD_POST.equals(method)) { - request = new PostPageRequest(MAX_TRIES, MAX_DELAY, paramsArray, xslt, _os); - } - else if ( METHOD_GET.equals(method) || method == null ){ - request = new GetPageRequest(MAX_TRIES, MAX_DELAY, paramsArray, xslt, _os); - } else { - throw new RuntimeException("Unknown request method '" + method + "'. Only " + - METHOD_GET + " and " + METHOD_POST + " are supported"); + PageRequest request; + if (METHOD_POST.equals(method)) { + request = new PostPageRequest(MAX_TRIES, MAX_DELAY, paramsArray, + xslt, _os); + } else if (METHOD_GET.equals(method) || method == null) { + request = new GetPageRequest(MAX_TRIES, MAX_DELAY, paramsArray, + xslt, _os); + } else { + throw new RuntimeException("Unknown request method '" + method + + "'. Only " + METHOD_GET + " and " + METHOD_POST + + " are supported"); } return request; } - - private NameValuePair parseParameter(Element aParam) { + + private NameValuePair parseParameter(Element aParam) { String name = aParam.attributeValue(AT_NAME); String value = aParam.attributeValue(AT_VALUE); return new NameValuePair(name, value);