Migration to maven almost complete. At least everything builds and works
[utils] / crawler / basic / src / main / java / org / wamblee / crawler / impl / App.java
diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/impl/App.java b/crawler/basic/src/main/java/org/wamblee/crawler/impl/App.java
new file mode 100644 (file)
index 0000000..b0339f4
--- /dev/null
@@ -0,0 +1,116 @@
+package org.wamblee.crawler.impl;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.NameValuePair;
+import org.dom4j.Element;
+import org.wamblee.crawler.Action;
+import org.wamblee.crawler.Configuration;
+import org.wamblee.crawler.Crawler;
+import org.wamblee.crawler.Page;
+import org.wamblee.crawler.PageException;
+import org.wamblee.xml.XslTransformer;
+
+/*
+ * Copyright 2005 the original author or authors.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Test application which uses the crawler. 
+ */
+public final class App {
+
+    /**
+     * Disabled constructor.
+     * 
+     */
+    private App() {
+        // Empty
+    }
+    
+    /**
+     * Runs a test program.
+     * 
+     * @param aArgs
+     *            Arguments. First argument is the crawler config file name and
+     *            second argument is the start url.
+     * @throws Exception
+     *             In case of problems.
+     */
+    public static void main(String[] aArgs) throws Exception {
+        String configFileName = aArgs[0];
+        String starturl = aArgs[1];
+
+        ConfigurationParser parser = new ConfigurationParser(new XslTransformer());
+        InputStream configFile = new FileInputStream(new File(configFileName));
+        Configuration config = parser.parse(configFile);
+
+        HttpClient client = new HttpClient();
+        // client.getHostConfiguration().setProxy("localhost", 3128);
+
+        Crawler crawler = new CrawlerImpl(client, config);
+
+        System.out.println("Retrieving: " + starturl);
+        Page page = crawler.getPage(starturl, new NameValuePair[0]);
+        showPage(page);
+        page = page.getAction("channels-favorites").execute();
+        recordInterestingShows(page);
+        showPage(page);
+        page = page.getAction("Nederland 1").execute();
+        showPage(page);
+        page = page.getAction("right-now").execute();
+        showPage(page);
+        page = page.getAction("Het elfde uur").execute();
+        showPage(page);
+    }
+
+    /**
+     * @param starturl
+     * @param crawler
+     */
+    private static void showPage(Page aPage) {
+        Action[] links = aPage.getActions();
+        for (Action link : links) {
+            System.out.println("Link found '" + link.getName() + "'");
+        }
+        Element element = aPage.getContent();
+        System.out.println("Retrieved content: " + element.asXML());
+    }
+
+    private static void recordInterestingShows(Page page) throws PageException {
+        Action[] channels = page.getActions();
+        for (Action channel : channels) {
+            examineChannel(channel.getName(), channel.execute().getAction(
+                    "right-now").execute());
+        }
+    }
+
+    private static void examineChannel(String aChannel, Page aPage)
+            throws PageException {
+        Action[] programs = aPage.getActions();
+        for (Action program : programs) {
+            System.out.println(aChannel + " - " + program.getName());
+            if (program.getName().toLowerCase().matches(".*babe.*")) {
+                Page programPage = program.execute();
+                Action record = programPage.getAction("record");
+                System.out.println("Recording possible: " + record != null);
+            }
+        }
+    }
+
+}