package org.wamblee.crawler.impl; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import org.apache.commons.httpclient.HttpClient; import org.dom4j.Element; import org.wamblee.crawler.Action; import org.wamblee.crawler.Configuration; import org.wamblee.crawler.Crawler; import org.wamblee.crawler.Page; import org.wamblee.crawler.PageException; import org.wamblee.xml.XslTransformer; /* * Copyright 2005 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * Test application which uses the crawler. */ public final class App { /** * Disabled constructor. * */ private App() { // Empty } /** * Runs a test program. * * @param aArgs * Arguments. First argument is the crawler config file name and * second argument is the start url. * @throws Exception * In case of problems. */ public static void main(String[] aArgs) throws Exception { String configFileName = aArgs[0]; String starturl = aArgs[1]; ConfigurationParser parser = new ConfigurationParser(new XslTransformer()); InputStream configFile = new FileInputStream(new File(configFileName)); Configuration config = parser.parse(configFile); HttpClient client = new HttpClient(); // client.getHostConfiguration().setProxy("localhost", 3128); Crawler crawler = new CrawlerImpl(client, config); System.out.println("Retrieving: " + starturl); Page page = crawler.getPage(starturl); showPage(page); page = page.getAction("channels-favorites").execute(); recordInterestingShows(page); showPage(page); page = page.getAction("Nederland 1").execute(); showPage(page); page = page.getAction("right-now").execute(); showPage(page); page = page.getAction("Het elfde uur").execute(); showPage(page); } /** * @param starturl * @param crawler */ private static void showPage(Page aPage) { Action[] links = aPage.getActions(); for (Action link : links) { System.out.println("Link found '" + link.getName() + "'"); } Element element = aPage.getContent(); System.out.println("Retrieved content: " + element.asXML()); } private static void recordInterestingShows(Page page) throws PageException { Action[] channels = page.getActions(); for (Action channel : channels) { examineChannel(channel.getName(), channel.execute().getAction( "right-now").execute()); } } private static void examineChannel(String aChannel, Page aPage) throws PageException { Action[] programs = aPage.getActions(); for (Action program : programs) { System.out.println(aChannel + " - " + program.getName()); if (program.getName().toLowerCase().matches(".*babe.*")) { Page programPage = program.execute(); Action record = programPage.getAction("record"); System.out.println("Recording possible: " + record != null); } } } }