X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fbasic%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fimpl%2FApp.java;h=6246453abba66ce5e4cf570cca4aa6b995dc1933;hb=8bdf7301b21a7824933fac2b75caf410b7dd5923;hp=75fd3b09fa675e973cd64dc6c8f147246882e197;hpb=30671b398473b876e5c42d063f0c8e169ad3163c;p=utils diff --git a/crawler/basic/src/org/wamblee/crawler/impl/App.java b/crawler/basic/src/org/wamblee/crawler/impl/App.java index 75fd3b09..6246453a 100644 --- a/crawler/basic/src/org/wamblee/crawler/impl/App.java +++ b/crawler/basic/src/org/wamblee/crawler/impl/App.java @@ -7,11 +7,14 @@ import java.io.InputStream; import java.io.PrintStream; import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.dom4j.Element; import org.wamblee.crawler.Action; import org.wamblee.crawler.Configuration; import org.wamblee.crawler.Crawler; import org.wamblee.crawler.Page; +import org.wamblee.crawler.PageException; /* * Copyright 2005 the original author or authors. @@ -32,14 +35,30 @@ import org.wamblee.crawler.Page; /** * Entry point for the crawler. */ -public class App { +public final class App { + + /** + * Disabled constructor. + * + */ + private App() { + // Empty + } + + private static final Log LOG = LogFactory.getLog(App.class); private static final String LOG_FILE = "crawler.log"; - public static void main(String[] args) throws Exception { - String configFileName = args[0]; - String starturl = args[1]; - + /** + * Runs a test program. + * @param aArgs Arguments. First argument is the crawler config file name and second argument is + * the start url. + * @throws Exception In case of problems. + */ + public static void main(String[] aArgs) throws Exception { + String configFileName = aArgs[0]; + String starturl = aArgs[1]; + FileOutputStream fos = new FileOutputStream(new File(LOG_FILE)); PrintStream os = new PrintStream(fos); @@ -51,7 +70,7 @@ public class App { HttpClient client = new HttpClient(); // client.getHostConfiguration().setProxy("localhost", 3128); - + Crawler crawler = new CrawlerImpl(client, config); System.out.println("Retrieving: " + starturl); @@ -79,27 +98,28 @@ public class App { */ private static void showPage(Page aPage) { Action[] links = aPage.getActions(); - for (Action link: links) { + for (Action link : links) { System.out.println("Link found '" + link.getName() + "'"); } - Element element = aPage.getContent(); + Element element = aPage.getContent(); System.out.println("Retrieved content: " + element.asXML()); } - - private static void recordInterestingShows(Page page) { + + private static void recordInterestingShows(Page page) throws PageException { Action[] channels = page.getActions(); - for (Action channel: channels) { - examineChannel(channel.getName(), channel.execute().getAction("right-now").execute()); + for (Action channel : channels) { + examineChannel(channel.getName(), channel.execute().getAction( + "right-now").execute()); } } - - private static void examineChannel(String aChannel, Page aPage) { - Action[] programs = aPage.getActions(); - for (Action program: programs) { + + private static void examineChannel(String aChannel, Page aPage) throws PageException { + Action[] programs = aPage.getActions(); + for (Action program : programs) { System.out.println(aChannel + " - " + program.getName()); - if ( program.getName().toLowerCase().matches(".*babe.*")) { + if (program.getName().toLowerCase().matches(".*babe.*")) { Page programPage = program.execute(); - Action record = programPage.getAction("record"); + Action record = programPage.getAction("record"); System.out.println("Recording possible: " + record != null); } }