X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fbasic%2Fsrc%2Fmain%2Fjava%2Forg%2Fwamblee%2Fcrawler%2Fimpl%2FApp.java;fp=crawler%2Fbasic%2Fsrc%2Fmain%2Fjava%2Forg%2Fwamblee%2Fcrawler%2Fimpl%2FApp.java;h=b0339f4b07ad7d37b765a7e0c79beda0ef5a0cae;hb=62f165891f08ae532b5a794af11d7338a93f9a43;hp=0000000000000000000000000000000000000000;hpb=07cedd3f0730646ea35a7f668b3e1e872a4605d9;p=utils diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/impl/App.java b/crawler/basic/src/main/java/org/wamblee/crawler/impl/App.java new file mode 100644 index 00000000..b0339f4b --- /dev/null +++ b/crawler/basic/src/main/java/org/wamblee/crawler/impl/App.java @@ -0,0 +1,116 @@ +package org.wamblee.crawler.impl; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; + +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.NameValuePair; +import org.dom4j.Element; +import org.wamblee.crawler.Action; +import org.wamblee.crawler.Configuration; +import org.wamblee.crawler.Crawler; +import org.wamblee.crawler.Page; +import org.wamblee.crawler.PageException; +import org.wamblee.xml.XslTransformer; + +/* + * Copyright 2005 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Test application which uses the crawler. + */ +public final class App { + + /** + * Disabled constructor. + * + */ + private App() { + // Empty + } + + /** + * Runs a test program. + * + * @param aArgs + * Arguments. First argument is the crawler config file name and + * second argument is the start url. + * @throws Exception + * In case of problems. + */ + public static void main(String[] aArgs) throws Exception { + String configFileName = aArgs[0]; + String starturl = aArgs[1]; + + ConfigurationParser parser = new ConfigurationParser(new XslTransformer()); + InputStream configFile = new FileInputStream(new File(configFileName)); + Configuration config = parser.parse(configFile); + + HttpClient client = new HttpClient(); + // client.getHostConfiguration().setProxy("localhost", 3128); + + Crawler crawler = new CrawlerImpl(client, config); + + System.out.println("Retrieving: " + starturl); + Page page = crawler.getPage(starturl, new NameValuePair[0]); + showPage(page); + page = page.getAction("channels-favorites").execute(); + recordInterestingShows(page); + showPage(page); + page = page.getAction("Nederland 1").execute(); + showPage(page); + page = page.getAction("right-now").execute(); + showPage(page); + page = page.getAction("Het elfde uur").execute(); + showPage(page); + } + + /** + * @param starturl + * @param crawler + */ + private static void showPage(Page aPage) { + Action[] links = aPage.getActions(); + for (Action link : links) { + System.out.println("Link found '" + link.getName() + "'"); + } + Element element = aPage.getContent(); + System.out.println("Retrieved content: " + element.asXML()); + } + + private static void recordInterestingShows(Page page) throws PageException { + Action[] channels = page.getActions(); + for (Action channel : channels) { + examineChannel(channel.getName(), channel.execute().getAction( + "right-now").execute()); + } + } + + private static void examineChannel(String aChannel, Page aPage) + throws PageException { + Action[] programs = aPage.getActions(); + for (Action program : programs) { + System.out.println(aChannel + " - " + program.getName()); + if (program.getName().toLowerCase().matches(".*babe.*")) { + Page programPage = program.execute(); + Action record = programPage.getAction("record"); + System.out.println("Recording possible: " + record != null); + } + } + } + +}