X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fimpl%2FApp.java;h=b0339f4b07ad7d37b765a7e0c79beda0ef5a0cae;hb=f53c06ddca33e21e772c479179b7f858a3a8b8d4;hp=15e740a628df1b1a2d8d363755cc170235b493de;hpb=a5a9deb2dedb2efc96972acedaa44909a3b0fd79;p=utils diff --git a/crawler/basic/src/org/wamblee/crawler/impl/App.java b/crawler/basic/src/org/wamblee/crawler/impl/App.java index 15e740a6..b0339f4b 100644 --- a/crawler/basic/src/org/wamblee/crawler/impl/App.java +++ b/crawler/basic/src/org/wamblee/crawler/impl/App.java @@ -2,19 +2,17 @@ package org.wamblee.crawler.impl; import java.io.File; import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.InputStream; -import java.io.PrintStream; import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.apache.commons.httpclient.NameValuePair; import org.dom4j.Element; import org.wamblee.crawler.Action; import org.wamblee.crawler.Configuration; import org.wamblee.crawler.Crawler; import org.wamblee.crawler.Page; import org.wamblee.crawler.PageException; +import org.wamblee.xml.XslTransformer; /* * Copyright 2005 the original author or authors. @@ -33,49 +31,52 @@ import org.wamblee.crawler.PageException; */ /** - * Entry point for the crawler. + * Test application which uses the crawler. */ -public class App { +public final class App { - private static final Log LOG = LogFactory.getLog(App.class); - - private static final String LOG_FILE = "crawler.log"; - - public static void main(String[] args) throws Exception { - String configFileName = args[0]; - String starturl = args[1]; - - FileOutputStream fos = new FileOutputStream(new File(LOG_FILE)); - PrintStream os = new PrintStream(fos); - - try { - ConfigurationParser parser = new ConfigurationParser(os); - InputStream configFile = new FileInputStream(new File( - configFileName)); - Configuration config = parser.parse(configFile); - - HttpClient client = new HttpClient(); - // client.getHostConfiguration().setProxy("localhost", 3128); - - Crawler crawler = new CrawlerImpl(client, config); - - System.out.println("Retrieving: " + starturl); - Page page = crawler.getPage(starturl); - showPage(page); - page = page.getAction("channels-favorites").execute(); - recordInterestingShows(page); - showPage(page); - page = page.getAction("Nederland 1").execute(); - showPage(page); - page = page.getAction("right-now").execute(); - showPage(page); - page = page.getAction("Het elfde uur").execute(); - showPage(page); - } finally { - os.flush(); - os.close(); - System.out.println("Output written on '" + LOG_FILE + "'"); - } + /** + * Disabled constructor. + * + */ + private App() { + // Empty + } + + /** + * Runs a test program. + * + * @param aArgs + * Arguments. First argument is the crawler config file name and + * second argument is the start url. + * @throws Exception + * In case of problems. + */ + public static void main(String[] aArgs) throws Exception { + String configFileName = aArgs[0]; + String starturl = aArgs[1]; + + ConfigurationParser parser = new ConfigurationParser(new XslTransformer()); + InputStream configFile = new FileInputStream(new File(configFileName)); + Configuration config = parser.parse(configFile); + + HttpClient client = new HttpClient(); + // client.getHostConfiguration().setProxy("localhost", 3128); + + Crawler crawler = new CrawlerImpl(client, config); + + System.out.println("Retrieving: " + starturl); + Page page = crawler.getPage(starturl, new NameValuePair[0]); + showPage(page); + page = page.getAction("channels-favorites").execute(); + recordInterestingShows(page); + showPage(page); + page = page.getAction("Nederland 1").execute(); + showPage(page); + page = page.getAction("right-now").execute(); + showPage(page); + page = page.getAction("Het elfde uur").execute(); + showPage(page); } /** @@ -99,7 +100,8 @@ public class App { } } - private static void examineChannel(String aChannel, Page aPage) throws PageException { + private static void examineChannel(String aChannel, Page aPage) + throws PageException { Action[] programs = aPage.getActions(); for (Action program : programs) { System.out.println(aChannel + " - " + program.getName());