From 915f1b7c5af2b85372dc84d008e9a5c49428f0e9 Mon Sep 17 00:00:00 2001 From: erik Date: Tue, 21 Mar 2006 22:30:56 +0000 Subject: [PATCH] --- .../src/org/wamblee/crawler/impl/App.java | 80 ++++++++----------- .../crawler/impl/ConfigurationParser.java | 8 +- .../wamblee/crawler/kiss/guide/Program.java | 2 - .../crawler/kiss/main/KissCrawler.java | 20 +---- .../kiss/main/ProgramActionExecutor.java | 2 +- .../org/wamblee/test/HibernateUpdater.java | 2 +- 6 files changed, 42 insertions(+), 72 deletions(-) diff --git a/crawler/basic/src/org/wamblee/crawler/impl/App.java b/crawler/basic/src/org/wamblee/crawler/impl/App.java index 6246453a..90e66dfc 100644 --- a/crawler/basic/src/org/wamblee/crawler/impl/App.java +++ b/crawler/basic/src/org/wamblee/crawler/impl/App.java @@ -2,9 +2,7 @@ package org.wamblee.crawler.impl; import java.io.File; import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.InputStream; -import java.io.PrintStream; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.logging.Log; @@ -36,60 +34,49 @@ import org.wamblee.crawler.PageException; * Entry point for the crawler. */ public final class App { - + /** * Disabled constructor. - * + * */ - private App() { + private App() { // Empty } - - private static final Log LOG = LogFactory.getLog(App.class); - - private static final String LOG_FILE = "crawler.log"; - + /** - * Runs a test program. - * @param aArgs Arguments. First argument is the crawler config file name and second argument is - * the start url. - * @throws Exception In case of problems. + * Runs a test program. + * + * @param aArgs + * Arguments. First argument is the crawler config file name and + * second argument is the start url. + * @throws Exception + * In case of problems. */ public static void main(String[] aArgs) throws Exception { String configFileName = aArgs[0]; String starturl = aArgs[1]; - FileOutputStream fos = new FileOutputStream(new File(LOG_FILE)); - PrintStream os = new PrintStream(fos); - - try { - ConfigurationParser parser = new ConfigurationParser(os); - InputStream configFile = new FileInputStream(new File( - configFileName)); - Configuration config = parser.parse(configFile); - - HttpClient client = new HttpClient(); - // client.getHostConfiguration().setProxy("localhost", 3128); - - Crawler crawler = new CrawlerImpl(client, config); - - System.out.println("Retrieving: " + starturl); - Page page = crawler.getPage(starturl); - showPage(page); - page = page.getAction("channels-favorites").execute(); - recordInterestingShows(page); - showPage(page); - page = page.getAction("Nederland 1").execute(); - showPage(page); - page = page.getAction("right-now").execute(); - showPage(page); - page = page.getAction("Het elfde uur").execute(); - showPage(page); - } finally { - os.flush(); - os.close(); - System.out.println("Output written on '" + LOG_FILE + "'"); - } + ConfigurationParser parser = new ConfigurationParser(); + InputStream configFile = new FileInputStream(new File(configFileName)); + Configuration config = parser.parse(configFile); + + HttpClient client = new HttpClient(); + // client.getHostConfiguration().setProxy("localhost", 3128); + + Crawler crawler = new CrawlerImpl(client, config); + + System.out.println("Retrieving: " + starturl); + Page page = crawler.getPage(starturl); + showPage(page); + page = page.getAction("channels-favorites").execute(); + recordInterestingShows(page); + showPage(page); + page = page.getAction("Nederland 1").execute(); + showPage(page); + page = page.getAction("right-now").execute(); + showPage(page); + page = page.getAction("Het elfde uur").execute(); + showPage(page); } /** @@ -113,7 +100,8 @@ public final class App { } } - private static void examineChannel(String aChannel, Page aPage) throws PageException { + private static void examineChannel(String aChannel, Page aPage) + throws PageException { Action[] programs = aPage.getActions(); for (Action program : programs) { System.out.println(aChannel + " - " + program.getName()); diff --git a/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java b/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java index b45053a2..792d0d34 100644 --- a/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java +++ b/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java @@ -17,7 +17,6 @@ package org.wamblee.crawler.impl; import java.io.InputStream; -import java.io.PrintStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -61,14 +60,11 @@ public class ConfigurationParser { private static final int MAX_DELAY = 100; - private PrintStream _os; - /** * Constructs the configuration parser. - * @param aOs The stream for logging requests */ - public ConfigurationParser(PrintStream aOs) { - _os = aOs; + public ConfigurationParser() { + // Empty } /** diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/guide/Program.java b/crawler/kiss/src/org/wamblee/crawler/kiss/guide/Program.java index 5f1bb67e..28cbec5b 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/guide/Program.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/guide/Program.java @@ -69,8 +69,6 @@ public class Program { */ private static final String RECORD_ACTION = "record"; - private static final String RESULT_ELEM = "result"; - /** * Result of recording a program. * diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java index b2f6064a..c522131d 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java @@ -19,10 +19,8 @@ package org.wamblee.crawler.kiss.main; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.PrintStream; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; @@ -57,11 +55,6 @@ public class KissCrawler { private static final Log LOG = LogFactory.getLog(KissCrawler.class); - /** - * Log file name for the crawler. - */ - private static final String LOG_FILE = "kiss.log"; - /** * Start URL of the electronic programme guide. */ @@ -121,14 +114,11 @@ public class KissCrawler { _pattern = Pattern.compile(TIME_REGEX); - FileOutputStream fos = new FileOutputStream(new File(LOG_FILE)); - PrintStream os = new PrintStream(fos); - try { HttpClient client = new HttpClient(); // client.getHostConfiguration().setProxy("127.0.0.1", 3128); - Crawler crawler = createCrawler(aCrawlerConfig, os, client); + Crawler crawler = createCrawler(aCrawlerConfig, client); InputStream programConfigFile = new FileInputStream(new File( aProgramConfig)); ProgramConfigurationParser parser = new ProgramConfigurationParser(); @@ -141,9 +131,7 @@ public class KissCrawler { guide.accept(printer); processResults(programFilters, guide, parser.getNotifier()); } finally { - os.flush(); - os.close(); - System.out.println("Output written on '" + LOG_FILE + "'"); + System.out.println("Crawler finished"); } } @@ -188,9 +176,9 @@ public class KissCrawler { * @throws FileNotFoundException * In case configuration files cannot be found. */ - private Crawler createCrawler(String aCrawlerConfig, PrintStream aOs, + private Crawler createCrawler(String aCrawlerConfig, HttpClient aClient) throws FileNotFoundException { - ConfigurationParser parser = new ConfigurationParser(aOs); + ConfigurationParser parser = new ConfigurationParser(); InputStream crawlerConfigFile = new FileInputStream(new File( aCrawlerConfig)); Configuration config = parser.parse(crawlerConfigFile); diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/main/ProgramActionExecutor.java b/crawler/kiss/src/org/wamblee/crawler/kiss/main/ProgramActionExecutor.java index d47868f5..b9c103b7 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/main/ProgramActionExecutor.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/main/ProgramActionExecutor.java @@ -29,7 +29,7 @@ import org.wamblee.crawler.kiss.guide.Program.RecordingResult; /** * Provides execution of actions for programs. Actions use this class to tell - * the executor what to do. The executor then decide on exactly what to do and + * the executor what to do. The executor then decides on exactly what to do and * in what order and makes decisions in case of conflicts. */ public class ProgramActionExecutor { diff --git a/support/test/org/wamblee/test/HibernateUpdater.java b/support/test/org/wamblee/test/HibernateUpdater.java index 3882ebfd..576c1819 100644 --- a/support/test/org/wamblee/test/HibernateUpdater.java +++ b/support/test/org/wamblee/test/HibernateUpdater.java @@ -37,7 +37,7 @@ public final class HibernateUpdater { public static void main(String[] aArgs) throws IOException { String file = aArgs[0]; - File dir = new File(aArgs[0]); + File dir = new File(file); Configuration conf = HibernateUtils.getConfiguration(dir); -- 2.31.1