Now crawling in desktop mode should work. It is much more efficient
[utils] / crawler / kiss / src / org / wamblee / crawler / kiss / main / KissCrawler.java
index 3191ff24e627cffdd5d0eab3321e106c090a3ba9..3300e1299e71b6c4268d9ba9447b65d6df17fe8f 100644 (file)
@@ -32,6 +32,7 @@ import org.apache.commons.httpclient.HttpClient;
 import org.apache.commons.httpclient.NameValuePair;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.dom4j.Element;
 import org.wamblee.crawler.Action;
 import org.wamblee.crawler.Configuration;
 import org.wamblee.crawler.Crawler;
@@ -63,11 +64,11 @@ public class KissCrawler {
      * Start URL of the electronic programme guide.
      */
     private static final String START_URL = "http://epg.kml.kiss-technology.com/login.php";
-    
+
     /**
-     * Default socket timeout to use. 
+     * Default socket timeout to use.
      */
-    private static final int SOCKET_TIMEOUT = 10000; 
+    private static final int SOCKET_TIMEOUT = 10000;
 
     /**
      * Regular expression for matching time interval strings in the retrieved
@@ -89,14 +90,15 @@ public class KissCrawler {
      *             In case of problems.
      */
     public static void main(String[] aArgs) throws Exception {
-        String crawlerConfig = new File(aArgs[0]).getCanonicalPath(); 
-        String programConfig = new File(aArgs[1]).getCanonicalPath(); 
+        String crawlerConfig = new File(aArgs[0]).getCanonicalPath();
+        String programConfig = new File(aArgs[1]).getCanonicalPath();
 
         BeanFactory factory = new StandaloneCrawlerBeanFactory();
         Notifier notifier = factory.find(Notifier.class);
-        new KissCrawler(START_URL, SOCKET_TIMEOUT, crawlerConfig, programConfig, notifier, new Report());
+        new KissCrawler(START_URL, SOCKET_TIMEOUT, crawlerConfig,
+                programConfig, notifier, new Report());
     }
-    
+
     /**
      * Constructs the crawler. This retrieves the TV guide by crawling the KiSS
      * EPG guide, filters the guide for interesting programs, tries to record
@@ -106,19 +108,24 @@ public class KissCrawler {
      *            Configuration file for the crawler.
      * @param aProgramConfig
      *            Configuration file describing interesting shows.
-     * @param aNotifier Object used to send notifications of the results.           
-     * @param aReport Report to use. 
+     * @param aNotifier
+     *            Object used to send notifications of the results.
+     * @param aReport
+     *            Report to use.
      * @throws IOException
      *             In case of problems reading files.
-     * @throws NotificationException In case notification fails.
-     * @throws PageException In case of problems retrieving the TV guide. 
+     * @throws NotificationException
+     *             In case notification fails.
+     * @throws PageException
+     *             In case of problems retrieving the TV guide.
      */
-    public KissCrawler(String aCrawlerConfig,
-            String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException {
-        this(START_URL, SOCKET_TIMEOUT, aCrawlerConfig, aProgramConfig, aNotifier, aReport);
+    public KissCrawler(String aCrawlerConfig, String aProgramConfig,
+            Notifier aNotifier, Report aReport) throws IOException,
+            NotificationException, PageException {
+        this(START_URL, SOCKET_TIMEOUT, aCrawlerConfig, aProgramConfig,
+                aNotifier, aReport);
     }
 
-
     /**
      * Constructs the crawler. This retrieves the TV guide by crawling the KiSS
      * EPG guide, filters the guide for interesting programs, tries to record
@@ -126,27 +133,35 @@ public class KissCrawler {
      * 
      * @param aStartUrl
      *            Start URL of the electronic programme guide.
-     * @param aSocketTimeout Socket timeout to use. 
+     * @param aSocketTimeout
+     *            Socket timeout to use.
      * @param aCrawlerConfig
      *            Configuration file for the crawler.
      * @param aProgramConfig
      *            Configuration file describing interesting shows.
-     * @param aNotifier Object used to send notifications of the results.           
-     * @param aReport Report to use. 
+     * @param aNotifier
+     *            Object used to send notifications of the results.
+     * @param aReport
+     *            Report to use.
      * @throws IOException
      *             In case of problems reading files.
-     * @throws NotificationException In case notification fails.
-     * @throws PageException In case of problems retrieving the TV guide.   
+     * @throws NotificationException
+     *             In case notification fails.
+     * @throws PageException
+     *             In case of problems retrieving the TV guide.
      */
-    public KissCrawler(String aStartUrl, int aSocketTimeout, String aCrawlerConfig,
-            String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException {
+    public KissCrawler(String aStartUrl, int aSocketTimeout,
+            String aCrawlerConfig, String aProgramConfig, Notifier aNotifier,
+            Report aReport) throws IOException, NotificationException,
+            PageException {
 
         _pattern = Pattern.compile(TIME_REGEX);
 
         try {
             HttpClient client = new HttpClient();
             // client.getHostConfiguration().setProxy("127.0.0.1", 3128);
-            client.getParams().setParameter("http.socket.timeout", SOCKET_TIMEOUT);
+            client.getParams().setParameter("http.socket.timeout",
+                    SOCKET_TIMEOUT);
 
             XslTransformer transformer = new XslTransformer(
                     new ClasspathUriResolver());
@@ -163,12 +178,11 @@ public class KissCrawler {
                 TVGuide guide = createGuide(page, aReport);
                 PrintVisitor printer = new PrintVisitor(System.out);
                 guide.accept(printer);
-                processResults(programFilters, guide, aNotifier,
-                        aReport);
+                processResults(programFilters, guide, aNotifier, aReport);
             } catch (PageException e) {
                 aReport.addMessage("Problem getting TV guide", e);
                 LOG.info("Problem getting TV guide", e);
-                throw e; 
+                throw e;
             }
             aNotifier.send(aReport.asXml());
         } finally {
@@ -260,12 +274,14 @@ public class KissCrawler {
      * @param aReport
      *            Report to use.
      * @return TV guide.
-     * @throws PageException In case of problem getting the tv guide.
+     * @throws PageException
+     *             In case of problem getting the tv guide.
      */
-    private TVGuide createGuide(Page aPage, Report aReport) throws PageException {
+    private TVGuide createGuide(Page aPage, Report aReport)
+            throws PageException {
         LOG.info("Obtaining full TV guide");
         Action[] actions = aPage.getActions();
-        if ( actions.length == 0 ) { 
+        if (actions.length == 0) {
             LOG.error("No channels found");
             throw new PageException("No channels found");
         }
@@ -273,13 +289,13 @@ public class KissCrawler {
         for (Action action : actions) {
             try {
                 LOG.info("Getting channel info for '" + action.getName() + "'");
-                Action rightNow = action.execute().getAction("right-now");
-                if (rightNow == null) {
+                Action tomorrow = action.execute().getAction("tomorrow");
+                if (tomorrow == null) {
                     throw new PageException("Channel summary page for '"
                             + action.getName()
                             + "' does not contain required information");
                 }
-                Channel channel = createChannel(action.getName(), rightNow
+                Channel channel = createChannel(action.getName(), tomorrow
                         .execute(), aReport);
                 channels.add(channel);
                 if (SystemProperties.isDebugMode()) {
@@ -319,18 +335,25 @@ public class KissCrawler {
                 TimeInterval interval = new TimeInterval(begin, end);
                 String description = "";
                 String keywords = "";
+
                 if (!SystemProperties.isNoProgramDetailsRequired()) {
-                    try {
-                        Page programInfo = action.execute();
-                        description = programInfo.getContent().element(
-                                "description").getText().trim();
-                        keywords = programInfo.getContent().element("keywords")
-                                .getText().trim();
-                    } catch (PageException e) {
-                        String msg =   "Program details could not be determined for '"
-                            + action.getName() + "'";
-                        aReport.addMessage(msg, e);
-                        LOG.warn(msg, e);
+                    Element descriptionElem = action.getContent().element(
+                            "description");
+                    if (descriptionElem == null) {
+                        try {
+                            Page programInfo = action.execute();
+                            description = programInfo.getContent().element(
+                                    "description").getText().trim();
+                            keywords = programInfo.getContent().element(
+                                    "keywords").getText().trim();
+                        } catch (PageException e) {
+                            String msg = "Program details could not be determined for '"
+                                    + action.getName() + "'";
+                            aReport.addMessage(msg, e);
+                            LOG.warn(msg, e);
+                        }
+                    } else {
+                        description = descriptionElem.getTextTrim();
                     }
                 }
                 Program program = new Program(aChannel, action.getName(),