(no commit message)
[utils] / crawler / kiss / src / org / wamblee / crawler / kiss / KissCrawler.java
diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java b/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java
new file mode 100644 (file)
index 0000000..dd9ba78
--- /dev/null
@@ -0,0 +1,154 @@
+/*
+ * Copyright 2005 the original author or authors.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wamblee.crawler.kiss;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.dom4j.Element;
+import org.wamblee.conditions.Condition;
+import org.wamblee.conditions.OrCondition;
+import org.wamblee.crawler.Action;
+import org.wamblee.crawler.Configuration;
+import org.wamblee.crawler.Crawler;
+import org.wamblee.crawler.Page;
+import org.wamblee.crawler.impl.ConfigurationParser;
+import org.wamblee.crawler.impl.CrawlerImpl;
+
+/**
+ * 
+ */
+public class KissCrawler {
+    
+    private static final Log LOG = LogFactory.getLog(KissCrawler.class);
+
+    private static final String LOG_FILE = "kiss.log";
+
+    private static final String START_URL = "http://epg.kml.kiss-technology.com/login_core.php";
+
+    private static final String CRAWLER_CONFIG = "config.xml";
+    
+    private static final String PROGRAM_CONFIG = "programs.xml";
+
+    private static final String TIME_REGEX = "([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*";
+
+    private Pattern _pattern;
+
+    public KissCrawler(String aStartUrl, String aCrawlerConfig, String aProgramConfig) throws Exception {
+
+        _pattern = Pattern.compile(TIME_REGEX);
+
+        FileOutputStream fos = new FileOutputStream(new File(LOG_FILE));
+        PrintStream os = new PrintStream(fos);
+
+        try {
+            ConfigurationParser parser = new ConfigurationParser(os);
+            InputStream crawlerConfigFile = new FileInputStream(new File(aCrawlerConfig));
+            Configuration config = parser.parse(crawlerConfigFile);
+            
+            InputStream programConfigFile = new FileInputStream(new File(aProgramConfig)); 
+            Condition<Program> programCondition = new ProgramConfigurationParser().parse(programConfigFile); 
+           
+
+            HttpClient client = new HttpClient();
+            // client.getHostConfiguration().setProxy("localhost", 3128);
+
+            Crawler crawler = new CrawlerImpl(client, config);
+
+            Page page = crawler.getPage(aStartUrl);
+            showPage(page);
+            page = page.getAction("channels-favorites").execute();
+            TVGuide guide = createGuide(page);
+            PrintVisitor printer = new PrintVisitor(System.out);
+            guide.accept(printer);
+            
+            MatchVisitor matcher = new MatchVisitor(programCondition);
+            guide.accept(matcher);
+            List<Program> programs = matcher.getMatches(); 
+            for (Program program: programs) { 
+                System.out.println("Found: " + program + " record: " + program.record() );
+            }
+            
+        } finally {
+            os.flush();
+            os.close();
+            System.out.println("Output written on '" + LOG_FILE + "'");
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        new KissCrawler(START_URL, CRAWLER_CONFIG, PROGRAM_CONFIG);
+    }
+
+    private void showPage(Page aPage) {
+        Action[] links = aPage.getActions();
+        for (Action link : links) {
+            System.out.println("Link found '" + link.getName() + "'");
+        }
+        Element element = aPage.getContent();
+        System.out.println("Retrieved content: " + element.asXML());
+    }
+
+    private TVGuide createGuide(Page page) {
+        LOG.info("Obtaining full TV guide");
+        Action[] actions = page.getActions();
+        List<Channel> channels = new ArrayList<Channel>();
+        for (Action action : actions) {
+            Channel channel = createChannel(action.getName(), action.execute()
+                    .getAction("right-now").execute());
+            channels.add(channel);
+        }
+        return new TVGuide(channels);
+    }
+
+    private Channel createChannel(String aChannel, Page aPage) {
+        LOG.info("Obtaining program for " + aChannel);
+        Action[] programActions = aPage.getActions();
+        List<Program> programs = new ArrayList<Program>();
+        for (Action action : programActions) {
+            String time = action.getContent().element("time").getText().trim();
+            Matcher matcher = _pattern.matcher(time);
+            if (matcher.matches()) {
+                Time begin = new Time(Integer.parseInt(matcher.group(1)), 
+                                      Integer.parseInt(matcher.group(2)));
+                Time end = new Time(Integer.parseInt(matcher.group(3)), 
+                        Integer.parseInt(matcher.group(4)));
+                TimeInterval interval = new TimeInterval(begin, end);
+                //Page programInfo = action.execute();
+                //String description = programInfo.getContent().element("description").getText().trim();
+                //String keywords = programInfo.getContent().element("keywords").getText().trim();
+                String description = "";
+                String keywords = "";
+                Program program = new Program(aChannel, action.getName(), description, keywords, interval, action);
+                
+                LOG.debug("Got program " + program);
+                programs.add(program);
+            }
+        }
+        return new Channel(aChannel, programs);
+    }
+}