import java.io.PrintStream;
import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.dom4j.Element;
import org.wamblee.crawler.Action;
import org.wamblee.crawler.Configuration;
import org.wamblee.crawler.Crawler;
import org.wamblee.crawler.Page;
+import org.wamblee.crawler.PageException;
/*
* Copyright 2005 the original author or authors.
*/
public class App {
+ private static final Log LOG = LogFactory.getLog(App.class);
+
private static final String LOG_FILE = "crawler.log";
public static void main(String[] args) throws Exception {
String configFileName = args[0];
String starturl = args[1];
-
+
FileOutputStream fos = new FileOutputStream(new File(LOG_FILE));
PrintStream os = new PrintStream(fos);
HttpClient client = new HttpClient();
// client.getHostConfiguration().setProxy("localhost", 3128);
-
+
Crawler crawler = new CrawlerImpl(client, config);
System.out.println("Retrieving: " + starturl);
*/
private static void showPage(Page aPage) {
Action[] links = aPage.getActions();
- for (Action link: links) {
+ for (Action link : links) {
System.out.println("Link found '" + link.getName() + "'");
}
- Element element = aPage.getContent();
+ Element element = aPage.getContent();
System.out.println("Retrieved content: " + element.asXML());
}
-
- private static void recordInterestingShows(Page page) {
+
+ private static void recordInterestingShows(Page page) throws PageException {
Action[] channels = page.getActions();
- for (Action channel: channels) {
- examineChannel(channel.getName(), channel.execute().getAction("right-now").execute());
+ for (Action channel : channels) {
+ examineChannel(channel.getName(), channel.execute().getAction(
+ "right-now").execute());
}
}
-
- private static void examineChannel(String aChannel, Page aPage) {
- Action[] programs = aPage.getActions();
- for (Action program: programs) {
+
+ private static void examineChannel(String aChannel, Page aPage) throws PageException {
+ Action[] programs = aPage.getActions();
+ for (Action program : programs) {
System.out.println(aChannel + " - " + program.getName());
- if ( program.getName().toLowerCase().matches(".*babe.*")) {
+ if (program.getName().toLowerCase().matches(".*babe.*")) {
Page programPage = program.execute();
- Action record = programPage.getAction("record");
+ Action record = programPage.getAction("record");
System.out.println("Recording possible: " + record != null);
}
}