X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fkiss%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2Fmain%2FKissCrawler.java;h=969c5b23749e8e500f052308af0ed48a44f09308;hb=3c68ffbb4f2b908015301accc3f4473acc4619e2;hp=0c01a9bcc3e9a712df3a3553a54c265b1e4b7cc0;hpb=2160337764463b1cd9217671f0f9c2f6ab89dcb0;p=utils diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java index 0c01a9bc..969c5b23 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java @@ -61,16 +61,11 @@ public class KissCrawler { * Start URL of the electronic programme guide. */ private static final String START_URL = "http://epg.kml.kiss-technology.com/login_core.php"; - + /** - * Crawler configuration file. + * Default socket timeout to use. */ - private static final String CRAWLER_CONFIG = "config.xml"; - - /** - * Configuration file describing interesting programs. - */ - private static final String PROGRAM_CONFIG = "programs.xml"; + private static final int SOCKET_TIMEOUT = 10000; /** * Regular expression for matching time interval strings in the retrieved @@ -92,9 +87,31 @@ public class KissCrawler { * In case of problems. */ public static void main(String[] aArgs) throws Exception { - new KissCrawler(START_URL, CRAWLER_CONFIG, PROGRAM_CONFIG); + String crawlerConfig = new File(aArgs[0]).getCanonicalPath(); + String programConfig = new File(aArgs[1]).getCanonicalPath(); + new KissCrawler(START_URL, SOCKET_TIMEOUT, crawlerConfig, programConfig); + } + + /** + * Constructs the crawler. This retrieves the TV guide by crawling the KiSS + * EPG guide, filters the guide for interesting programs, tries to record + * them, and sends a summary mail to the user. + * + * @param aCrawlerConfig + * Configuration file for the crawler. + * @param aProgramConfig + * Configuration file describing interesting shows. + * @throws IOException + * In case of problems reading files. + * @throws MessagingException + * In case of problems sending a mail notification. + */ + public KissCrawler(String aCrawlerConfig, + String aProgramConfig) throws IOException, NotificationException, PageException { + this(START_URL, SOCKET_TIMEOUT, aCrawlerConfig, aProgramConfig); } + /** * Constructs the crawler. This retrieves the TV guide by crawling the KiSS * EPG guide, filters the guide for interesting programs, tries to record @@ -102,6 +119,7 @@ public class KissCrawler { * * @param aStartUrl * Start URL of the electronic programme guide. + * @param aSocketTimeout Socket timeout to use. * @param aCrawlerConfig * Configuration file for the crawler. * @param aProgramConfig @@ -111,14 +129,15 @@ public class KissCrawler { * @throws MessagingException * In case of problems sending a mail notification. */ - public KissCrawler(String aStartUrl, String aCrawlerConfig, - String aProgramConfig) throws IOException, NotificationException { + public KissCrawler(String aStartUrl, int aSocketTimeout, String aCrawlerConfig, + String aProgramConfig) throws IOException, NotificationException, PageException { _pattern = Pattern.compile(TIME_REGEX); try { HttpClient client = new HttpClient(); // client.getHostConfiguration().setProxy("127.0.0.1", 3128); + client.getParams().setParameter("http.socket.timeout", SOCKET_TIMEOUT); XslTransformer transformer = new XslTransformer( new ClasspathUriResolver()); @@ -143,6 +162,7 @@ public class KissCrawler { } catch (PageException e) { report.addMessage("Problem getting TV guide", e); LOG.info("Problem getting TV guide", e); + throw e; } parser.getNotifier().send(report.asXml()); } finally { @@ -220,7 +240,7 @@ public class KissCrawler { } return favorites.execute(); } catch (PageException e) { - String msg = "Could not login to electronic programme guide."; + String msg = "Could not complete login to electronic programme guide."; throw new PageException(msg, e); } }