1 package org.wamblee.crawler.impl;
4 import java.io.FileInputStream;
5 import java.io.InputStream;
7 import org.apache.commons.httpclient.HttpClient;
8 import org.apache.commons.httpclient.NameValuePair;
9 import org.dom4j.Element;
10 import org.wamblee.crawler.Action;
11 import org.wamblee.crawler.Configuration;
12 import org.wamblee.crawler.Crawler;
13 import org.wamblee.crawler.Page;
14 import org.wamblee.crawler.PageException;
15 import org.wamblee.xml.XslTransformer;
18 * Copyright 2005 the original author or authors.
20 * Licensed under the Apache License, Version 2.0 (the "License");
21 * you may not use this file except in compliance with the License.
22 * You may obtain a copy of the License at
24 * http://www.apache.org/licenses/LICENSE-2.0
26 * Unless required by applicable law or agreed to in writing, software
27 * distributed under the License is distributed on an "AS IS" BASIS,
28 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
29 * See the License for the specific language governing permissions and
30 * limitations under the License.
34 * Test application which uses the crawler.
36 public final class App {
39 * Disabled constructor.
47 * Runs a test program.
50 * Arguments. First argument is the crawler config file name and
51 * second argument is the start url.
53 * In case of problems.
55 public static void main(String[] aArgs) throws Exception {
56 String configFileName = aArgs[0];
57 String starturl = aArgs[1];
59 ConfigurationParser parser = new ConfigurationParser(new XslTransformer());
60 InputStream configFile = new FileInputStream(new File(configFileName));
61 Configuration config = parser.parse(configFile);
63 HttpClient client = new HttpClient();
64 // client.getHostConfiguration().setProxy("localhost", 3128);
66 Crawler crawler = new CrawlerImpl(client, config);
68 System.out.println("Retrieving: " + starturl);
69 Page page = crawler.getPage(starturl, new NameValuePair[0]);
71 page = page.getAction("channels-favorites").execute();
72 recordInterestingShows(page);
74 page = page.getAction("Nederland 1").execute();
76 page = page.getAction("right-now").execute();
78 page = page.getAction("Het elfde uur").execute();
86 private static void showPage(Page aPage) {
87 Action[] links = aPage.getActions();
88 for (Action link : links) {
89 System.out.println("Link found '" + link.getName() + "'");
91 Element element = aPage.getContent();
92 System.out.println("Retrieved content: " + element.asXML());
95 private static void recordInterestingShows(Page page) throws PageException {
96 Action[] channels = page.getActions();
97 for (Action channel : channels) {
98 examineChannel(channel.getName(), channel.execute().getAction(
99 "right-now").execute());
103 private static void examineChannel(String aChannel, Page aPage)
104 throws PageException {
105 Action[] programs = aPage.getActions();
106 for (Action program : programs) {
107 System.out.println(aChannel + " - " + program.getName());
108 if (program.getName().toLowerCase().matches(".*babe.*")) {
109 Page programPage = program.execute();
110 Action record = programPage.getAction("record");
111 System.out.println("Recording possible: " + record != null);