X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fimpl%2FApp.java;h=d4ca4709c78a19665736f93cdd4d543582d6942f;hb=59528dfd9877eca88ab0426bac6c26b2d6fe886d;hp=75fd3b09fa675e973cd64dc6c8f147246882e197;hpb=81bc61121a8f17f754fc99eb66603a59df242ddc;p=utils diff --git a/crawler/basic/src/org/wamblee/crawler/impl/App.java b/crawler/basic/src/org/wamblee/crawler/impl/App.java index 75fd3b09..d4ca4709 100644 --- a/crawler/basic/src/org/wamblee/crawler/impl/App.java +++ b/crawler/basic/src/org/wamblee/crawler/impl/App.java @@ -2,9 +2,7 @@ package org.wamblee.crawler.impl; import java.io.File; import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.InputStream; -import java.io.PrintStream; import org.apache.commons.httpclient.HttpClient; import org.dom4j.Element; @@ -12,6 +10,7 @@ import org.wamblee.crawler.Action; import org.wamblee.crawler.Configuration; import org.wamblee.crawler.Crawler; import org.wamblee.crawler.Page; +import org.wamblee.crawler.PageException; /* * Copyright 2005 the original author or authors. @@ -30,47 +29,52 @@ import org.wamblee.crawler.Page; */ /** - * Entry point for the crawler. + * Test application which uses the crawler. */ -public class App { +public final class App { - private static final String LOG_FILE = "crawler.log"; + /** + * Disabled constructor. + * + */ + private App() { + // Empty + } + + /** + * Runs a test program. + * + * @param aArgs + * Arguments. First argument is the crawler config file name and + * second argument is the start url. + * @throws Exception + * In case of problems. + */ + public static void main(String[] aArgs) throws Exception { + String configFileName = aArgs[0]; + String starturl = aArgs[1]; - public static void main(String[] args) throws Exception { - String configFileName = args[0]; - String starturl = args[1]; - - FileOutputStream fos = new FileOutputStream(new File(LOG_FILE)); - PrintStream os = new PrintStream(fos); + ConfigurationParser parser = new ConfigurationParser(); + InputStream configFile = new FileInputStream(new File(configFileName)); + Configuration config = parser.parse(configFile); - try { - ConfigurationParser parser = new ConfigurationParser(os); - InputStream configFile = new FileInputStream(new File( - configFileName)); - Configuration config = parser.parse(configFile); + HttpClient client = new HttpClient(); + // client.getHostConfiguration().setProxy("localhost", 3128); - HttpClient client = new HttpClient(); - // client.getHostConfiguration().setProxy("localhost", 3128); - - Crawler crawler = new CrawlerImpl(client, config); + Crawler crawler = new CrawlerImpl(client, config); - System.out.println("Retrieving: " + starturl); - Page page = crawler.getPage(starturl); - showPage(page); - page = page.getAction("channels-favorites").execute(); - recordInterestingShows(page); - showPage(page); - page = page.getAction("Nederland 1").execute(); - showPage(page); - page = page.getAction("right-now").execute(); - showPage(page); - page = page.getAction("Het elfde uur").execute(); - showPage(page); - } finally { - os.flush(); - os.close(); - System.out.println("Output written on '" + LOG_FILE + "'"); - } + System.out.println("Retrieving: " + starturl); + Page page = crawler.getPage(starturl); + showPage(page); + page = page.getAction("channels-favorites").execute(); + recordInterestingShows(page); + showPage(page); + page = page.getAction("Nederland 1").execute(); + showPage(page); + page = page.getAction("right-now").execute(); + showPage(page); + page = page.getAction("Het elfde uur").execute(); + showPage(page); } /** @@ -79,27 +83,29 @@ public class App { */ private static void showPage(Page aPage) { Action[] links = aPage.getActions(); - for (Action link: links) { + for (Action link : links) { System.out.println("Link found '" + link.getName() + "'"); } - Element element = aPage.getContent(); + Element element = aPage.getContent(); System.out.println("Retrieved content: " + element.asXML()); } - - private static void recordInterestingShows(Page page) { + + private static void recordInterestingShows(Page page) throws PageException { Action[] channels = page.getActions(); - for (Action channel: channels) { - examineChannel(channel.getName(), channel.execute().getAction("right-now").execute()); + for (Action channel : channels) { + examineChannel(channel.getName(), channel.execute().getAction( + "right-now").execute()); } } - - private static void examineChannel(String aChannel, Page aPage) { - Action[] programs = aPage.getActions(); - for (Action program: programs) { + + private static void examineChannel(String aChannel, Page aPage) + throws PageException { + Action[] programs = aPage.getActions(); + for (Action program : programs) { System.out.println(aChannel + " - " + program.getName()); - if ( program.getName().toLowerCase().matches(".*babe.*")) { + if (program.getName().toLowerCase().matches(".*babe.*")) { Page programPage = program.execute(); - Action record = programPage.getAction("record"); + Action record = programPage.getAction("record"); System.out.println("Recording possible: " + record != null); } }