X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fkiss%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2Fmain%2FKissCrawler.java;h=c522131d8bb8b1ddb49f9040bdc4b6463dfe1d9d;hb=0119c69858055c84592338a202e9d1b18510a29b;hp=5d7e85ca94868eb0a283b6e9739462bf3f06772a;hpb=01c0abdc7d3339b36216665ff70c99ba3e50c180;p=utils diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java index 5d7e85ca..c522131d 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java @@ -19,10 +19,8 @@ package org.wamblee.crawler.kiss.main; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.PrintStream; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; @@ -57,11 +55,6 @@ public class KissCrawler { private static final Log LOG = LogFactory.getLog(KissCrawler.class); - /** - * Log file name for the crawler. - */ - private static final String LOG_FILE = "kiss.log"; - /** * Start URL of the electronic programme guide. */ @@ -121,29 +114,24 @@ public class KissCrawler { _pattern = Pattern.compile(TIME_REGEX); - FileOutputStream fos = new FileOutputStream(new File(LOG_FILE)); - PrintStream os = new PrintStream(fos); - try { HttpClient client = new HttpClient(); - //client.getHostConfiguration().setProxy("127.0.0.1", 3128); + // client.getHostConfiguration().setProxy("127.0.0.1", 3128); - Crawler crawler = createCrawler(aCrawlerConfig, os, client); + Crawler crawler = createCrawler(aCrawlerConfig, client); InputStream programConfigFile = new FileInputStream(new File( aProgramConfig)); ProgramConfigurationParser parser = new ProgramConfigurationParser(); parser.parse(programConfigFile); - List programFilters = parser.getFilters(); - + List programFilters = parser.getFilters(); + Page page = getStartPage(aStartUrl, crawler); TVGuide guide = createGuide(page); PrintVisitor printer = new PrintVisitor(System.out); guide.accept(printer); processResults(programFilters, guide, parser.getNotifier()); } finally { - os.flush(); - os.close(); - System.out.println("Output written on '" + LOG_FILE + "'"); + System.out.println("Crawler finished"); } } @@ -170,7 +158,7 @@ public class KissCrawler { executor.commit(); try { aNotifier.send(executor.getReport()); - } catch (NotificationException e) { + } catch (NotificationException e) { throw new RuntimeException(e); } } @@ -188,9 +176,9 @@ public class KissCrawler { * @throws FileNotFoundException * In case configuration files cannot be found. */ - private Crawler createCrawler(String aCrawlerConfig, PrintStream aOs, + private Crawler createCrawler(String aCrawlerConfig, HttpClient aClient) throws FileNotFoundException { - ConfigurationParser parser = new ConfigurationParser(aOs); + ConfigurationParser parser = new ConfigurationParser(); InputStream crawlerConfigFile = new FileInputStream(new File( aCrawlerConfig)); Configuration config = parser.parse(crawlerConfigFile);