package org.wamblee.crawler.impl; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.InputStream; import java.io.PrintStream; import org.apache.commons.httpclient.HttpClient; import org.dom4j.Element; import org.wamblee.crawler.Action; import org.wamblee.crawler.Configuration; import org.wamblee.crawler.Crawler; import org.wamblee.crawler.Page; /* * Copyright 2005 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * Entry point for the crawler. */ public class App { private static final String LOG_FILE = "crawler.log"; public static void main(String[] args) throws Exception { String configFileName = args[0]; String starturl = args[1]; FileOutputStream fos = new FileOutputStream(new File(LOG_FILE)); PrintStream os = new PrintStream(fos); try { ConfigurationParser parser = new ConfigurationParser(os); InputStream configFile = new FileInputStream(new File( configFileName)); Configuration config = parser.parse(configFile); HttpClient client = new HttpClient(); // client.getHostConfiguration().setProxy("localhost", 3128); Crawler crawler = new CrawlerImpl(client, config); System.out.println("Retrieving: " + starturl); Page page = crawler.getPage(starturl); showPage(page); page = page.getAction("channels-favorites").execute(); recordInterestingShows(page); showPage(page); page = page.getAction("Nederland 1").execute(); showPage(page); page = page.getAction("right-now").execute(); showPage(page); page = page.getAction("Het elfde uur").execute(); showPage(page); } finally { os.flush(); os.close(); System.out.println("Output written on '" + LOG_FILE + "'"); } } /** * @param starturl * @param crawler */ private static void showPage(Page aPage) { Action[] links = aPage.getActions(); for (Action link: links) { System.out.println("Link found '" + link.getName() + "'"); } Element element = aPage.getContent(); System.out.println("Retrieved content: " + element.asXML()); } private static void recordInterestingShows(Page page) { Action[] channels = page.getActions(); for (Action channel: channels) { examineChannel(channel.getName(), channel.execute().getAction("right-now").execute()); } } private static void examineChannel(String aChannel, Page aPage) { Action[] programs = aPage.getActions(); for (Action program: programs) { System.out.println(aChannel + " - " + program.getName()); if ( program.getName().toLowerCase().matches(".*babe.*")) { Page programPage = program.execute(); Action record = programPage.getAction("record"); System.out.println("Recording possible: " + record != null); } } } }