1 package org.wamblee.crawler.impl;
4 import java.io.FileInputStream;
5 import java.io.FileOutputStream;
6 import java.io.InputStream;
7 import java.io.PrintStream;
9 import org.apache.commons.httpclient.HttpClient;
10 import org.apache.commons.logging.Log;
11 import org.apache.commons.logging.LogFactory;
12 import org.dom4j.Element;
13 import org.wamblee.crawler.Action;
14 import org.wamblee.crawler.Configuration;
15 import org.wamblee.crawler.Crawler;
16 import org.wamblee.crawler.Page;
17 import org.wamblee.crawler.PageException;
20 * Copyright 2005 the original author or authors.
22 * Licensed under the Apache License, Version 2.0 (the "License");
23 * you may not use this file except in compliance with the License.
24 * You may obtain a copy of the License at
26 * http://www.apache.org/licenses/LICENSE-2.0
28 * Unless required by applicable law or agreed to in writing, software
29 * distributed under the License is distributed on an "AS IS" BASIS,
30 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31 * See the License for the specific language governing permissions and
32 * limitations under the License.
36 * Entry point for the crawler.
38 public final class App {
41 * Disabled constructor.
48 private static final Log LOG = LogFactory.getLog(App.class);
50 private static final String LOG_FILE = "crawler.log";
53 * Runs a test program.
54 * @param aArgs Arguments. First argument is the crawler config file name and second argument is
56 * @throws Exception In case of problems.
58 public static void main(String[] aArgs) throws Exception {
59 String configFileName = aArgs[0];
60 String starturl = aArgs[1];
62 FileOutputStream fos = new FileOutputStream(new File(LOG_FILE));
63 PrintStream os = new PrintStream(fos);
66 ConfigurationParser parser = new ConfigurationParser(os);
67 InputStream configFile = new FileInputStream(new File(
69 Configuration config = parser.parse(configFile);
71 HttpClient client = new HttpClient();
72 // client.getHostConfiguration().setProxy("localhost", 3128);
74 Crawler crawler = new CrawlerImpl(client, config);
76 System.out.println("Retrieving: " + starturl);
77 Page page = crawler.getPage(starturl);
79 page = page.getAction("channels-favorites").execute();
80 recordInterestingShows(page);
82 page = page.getAction("Nederland 1").execute();
84 page = page.getAction("right-now").execute();
86 page = page.getAction("Het elfde uur").execute();
91 System.out.println("Output written on '" + LOG_FILE + "'");
99 private static void showPage(Page aPage) {
100 Action[] links = aPage.getActions();
101 for (Action link : links) {
102 System.out.println("Link found '" + link.getName() + "'");
104 Element element = aPage.getContent();
105 System.out.println("Retrieved content: " + element.asXML());
108 private static void recordInterestingShows(Page page) throws PageException {
109 Action[] channels = page.getActions();
110 for (Action channel : channels) {
111 examineChannel(channel.getName(), channel.execute().getAction(
112 "right-now").execute());
116 private static void examineChannel(String aChannel, Page aPage) throws PageException {
117 Action[] programs = aPage.getActions();
118 for (Action program : programs) {
119 System.out.println(aChannel + " - " + program.getName());
120 if (program.getName().toLowerCase().matches(".*babe.*")) {
121 Page programPage = program.execute();
122 Action record = programPage.getAction("record");
123 System.out.println("Recording possible: " + record != null);