more robustness, now a detailed report is always sent, also if crawling
[utils] / crawler / kiss / src / org / wamblee / crawler / kiss / main / KissCrawler.java
index ad8de40205a2bbf34b91acb1ade106292cc3b1d2..0c01a9bcc3e9a712df3a3553a54c265b1e4b7cc0 100644 (file)
@@ -112,28 +112,39 @@ public class KissCrawler {
      *             In case of problems sending a mail notification.
      */
     public KissCrawler(String aStartUrl, String aCrawlerConfig,
-            String aProgramConfig) throws IOException, MessagingException {
+            String aProgramConfig) throws IOException, NotificationException {
 
         _pattern = Pattern.compile(TIME_REGEX);
 
         try {
             HttpClient client = new HttpClient();
             // client.getHostConfiguration().setProxy("127.0.0.1", 3128);
-            
-            XslTransformer transformer = new XslTransformer(new ClasspathUriResolver());
+
+            XslTransformer transformer = new XslTransformer(
+                    new ClasspathUriResolver());
 
             Crawler crawler = createCrawler(aCrawlerConfig, client, transformer);
             InputStream programConfigFile = new FileInputStream(new File(
                     aProgramConfig));
-            ProgramConfigurationParser parser = new ProgramConfigurationParser(transformer);
+            ProgramConfigurationParser parser = new ProgramConfigurationParser(
+                    transformer);
             parser.parse(programConfigFile);
             List<ProgramFilter> programFilters = parser.getFilters();
 
-            Page page = getStartPage(aStartUrl, crawler);
-            TVGuide guide = createGuide(page);
-            PrintVisitor printer = new PrintVisitor(System.out);
-            guide.accept(printer);
-            processResults(programFilters, guide, parser.getNotifier());
+            Report report = new Report();
+
+            try {
+                Page page = getStartPage(aStartUrl, crawler, report);
+                TVGuide guide = createGuide(page, report);
+                PrintVisitor printer = new PrintVisitor(System.out);
+                guide.accept(printer);
+                processResults(programFilters, guide, parser.getNotifier(),
+                        report);
+            } catch (PageException e) {
+                report.addMessage("Problem getting TV guide", e);
+                LOG.info("Problem getting TV guide", e);
+            }
+            parser.getNotifier().send(report.asXml());
         } finally {
             System.out.println("Crawler finished");
         }
@@ -150,8 +161,8 @@ public class KissCrawler {
      *             In case of problems sending a summary mail.
      */
     private void processResults(List<ProgramFilter> aProgramCondition,
-            TVGuide aGuide, Notifier aNotifier) throws MessagingException {
-        ProgramActionExecutor executor = new ProgramActionExecutor();
+            TVGuide aGuide, Notifier aNotifier, Report aReport) {
+        ProgramActionExecutor executor = new ProgramActionExecutor(aReport);
         for (ProgramFilter filter : aProgramCondition) {
             List<Program> programs = filter.apply(aGuide);
             ProgramAction action = filter.getAction();
@@ -160,11 +171,7 @@ public class KissCrawler {
             }
         }
         executor.commit();
-        try {
-            aNotifier.send(executor.getReport());
-        } catch (NotificationException e) {
-            throw new RuntimeException(e);
-        }
+
     }
 
     /**
@@ -180,8 +187,8 @@ public class KissCrawler {
      * @throws FileNotFoundException
      *             In case configuration files cannot be found.
      */
-    private Crawler createCrawler(String aCrawlerConfig, 
-            HttpClient aClient, XslTransformer aTransformer) throws FileNotFoundException {
+    private Crawler createCrawler(String aCrawlerConfig, HttpClient aClient,
+            XslTransformer aTransformer) throws FileNotFoundException {
         ConfigurationParser parser = new ConfigurationParser(aTransformer);
         InputStream crawlerConfigFile = new FileInputStream(new File(
                 aCrawlerConfig));
@@ -198,15 +205,23 @@ public class KissCrawler {
      *            URL of the electronic programme guide.
      * @param aCrawler
      *            Crawler to use.
+     * @param aReport
+     *            Report to use.
      * @return Starting page.
      */
-    private Page getStartPage(String aStartUrl, Crawler aCrawler) {
+    private Page getStartPage(String aStartUrl, Crawler aCrawler, Report aReport)
+            throws PageException {
         try {
             Page page = aCrawler.getPage(aStartUrl);
-            return page.getAction("channels-favorites").execute();
+            Action favorites = page.getAction("channels-favorites");
+            if (favorites == null) {
+                String msg = "Channels favorites action not found on start page";
+                throw new PageException(msg);
+            }
+            return favorites.execute();
         } catch (PageException e) {
-            throw new RuntimeException(
-                    "Could not login to electronic program guide", e);
+            String msg = "Could not login to electronic programme guide.";
+            throw new PageException(msg, e);
         }
     }
 
@@ -215,22 +230,32 @@ public class KissCrawler {
      * 
      * @param aPage
      *            Starting page.
+     * @param aReport
+     *            Report to use.
      * @return TV guide.
      */
-    private TVGuide createGuide(Page aPage) {
+    private TVGuide createGuide(Page aPage, Report aReport) {
         LOG.info("Obtaining full TV guide");
         Action[] actions = aPage.getActions();
         List<Channel> channels = new ArrayList<Channel>();
         for (Action action : actions) {
             try {
                 LOG.info("Getting channel info for '" + action.getName() + "'");
-                Channel channel = createChannel(action.getName(), action
-                        .execute().getAction("right-now").execute());
+                Action rightNow = action.execute().getAction("right-now");
+                if (rightNow == null) {
+                    throw new PageException("Channel summary page for '"
+                            + action.getName()
+                            + "' does not contain required information");
+                }
+                Channel channel = createChannel(action.getName(), rightNow
+                        .execute(), aReport);
                 channels.add(channel);
                 if (SystemProperties.isDebugMode()) {
                     break; // Only one channel is crawled.
                 }
             } catch (PageException e) {
+                aReport.addMessage("Could not create channel information for '"
+                        + action.getName() + "'");
                 LOG.error("Could not create channel information for '"
                         + action.getName() + "'", e);
             }
@@ -247,7 +272,7 @@ public class KissCrawler {
      *            Starting page for the channel.
      * @return Channel.
      */
-    private Channel createChannel(String aChannel, Page aPage) {
+    private Channel createChannel(String aChannel, Page aPage, Report aReport) {
         LOG.info("Obtaining program for " + aChannel);
         Action[] programActions = aPage.getActions();
         List<Program> programs = new ArrayList<Program>();
@@ -270,9 +295,10 @@ public class KissCrawler {
                         keywords = programInfo.getContent().element("keywords")
                                 .getText().trim();
                     } catch (PageException e) {
-                        LOG.warn(
-                                "Program details could not be determined for '"
-                                        + action.getName() + "'", e);
+                        String msg =   "Program details could not be determined for '"
+                            + action.getName() + "'";
+                        aReport.addMessage(msg, e);
+                        LOG.warn(msg, e);
                     }
                 }
                 Program program = new Program(aChannel, action.getName(),