From a43d898e496060a177addb62bf874c96dc6d46ff Mon Sep 17 00:00:00 2001 From: Erik Brakkee Date: Sun, 30 Apr 2006 19:01:36 +0000 Subject: [PATCH] --- crawler/kiss/build.xml | 1 + crawler/kiss/conf/kiss/programs.xml | 2 +- crawler/kiss/conf/kiss/run.sh | 4 +- crawler/kiss/deps.xml | 2 +- .../crawler/kiss/main/KissCrawler.java | 38 +++++---- .../kiss/main/ProgramActionExecutor.java | 3 +- .../kiss/main/ProgramConfigurationParser.java | 79 ------------------- .../kiss/notification/MailNotifier.java | 14 ++++ .../crawler/kiss/notification/Notifier.java | 18 +++++ crawler/kissweb/WebRoot/WEB-INF/overview.jsp | 16 +++- crawler/kissweb/src/beanRefContext.xml | 1 + .../kissweb/src/org.wamblee.crawler.kiss.xml | 5 +- .../kiss/scheduling/CrawlerExecutor.java | 7 +- .../kiss/scheduling/CrawlerExecutorImpl.java | 19 +++-- .../kiss/scheduling/CrawlerScheduler.java | 22 +++++- ...rawlerSchedule.java => CrawlerStatus.java} | 44 ++++++++--- .../kiss/scheduling/quartz/CrawlerJob.java | 6 +- .../crawler/kiss/servlet/Application.java | 1 - .../crawler/kiss/servlet/CrawlerServlet.java | 33 +++++--- 19 files changed, 174 insertions(+), 141 deletions(-) rename crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/{CrawlerSchedule.java => CrawlerStatus.java} (82%) diff --git a/crawler/kiss/build.xml b/crawler/kiss/build.xml index 2223c428..bae9b1e1 100644 --- a/crawler/kiss/build.xml +++ b/crawler/kiss/build.xml @@ -48,6 +48,7 @@ + diff --git a/crawler/kiss/conf/kiss/programs.xml b/crawler/kiss/conf/kiss/programs.xml index d53254c0..ac87857b 100644 --- a/crawler/kiss/conf/kiss/programs.xml +++ b/crawler/kiss/conf/kiss/programs.xml @@ -72,7 +72,7 @@ 9 - dr.*who + ((dr)|(doct.*)).*who diff --git a/crawler/kiss/conf/kiss/run.sh b/crawler/kiss/conf/kiss/run.sh index 5278c563..f04c0742 100755 --- a/crawler/kiss/conf/kiss/run.sh +++ b/crawler/kiss/conf/kiss/run.sh @@ -1,6 +1,6 @@ #!/bin/ksh -cd $( dirname $0 ) +cd $( dirname $0 )/../conf CP="" for i in ../lib/*.jar @@ -10,5 +10,5 @@ done set -x java -classpath $CP org.wamblee.crawler.kiss.main.KissCrawler \ - ../conf/config.xml ../conf/programs.xml + config.xml programs.xml diff --git a/crawler/kiss/deps.xml b/crawler/kiss/deps.xml index 3c0e0cce..6e9095ab 100644 --- a/crawler/kiss/deps.xml +++ b/crawler/kiss/deps.xml @@ -1,7 +1,7 @@ + depends="logging.d,mail.d,commons-email.d,commons-beanutils.d,commons-codec.d,dom4j.d,xerces.d,httpclient.d,jtidy.d,wamblee.support.d,wamblee.crawler.d,spring.d"> diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java index 969c5b23..b0be9ef6 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java @@ -46,6 +46,7 @@ import org.wamblee.crawler.kiss.guide.Time; import org.wamblee.crawler.kiss.guide.TimeInterval; import org.wamblee.crawler.kiss.notification.NotificationException; import org.wamblee.crawler.kiss.notification.Notifier; +import org.wamblee.general.BeanFactory; import org.wamblee.xml.ClasspathUriResolver; import org.wamblee.xml.XslTransformer; @@ -89,7 +90,10 @@ public class KissCrawler { public static void main(String[] aArgs) throws Exception { String crawlerConfig = new File(aArgs[0]).getCanonicalPath(); String programConfig = new File(aArgs[1]).getCanonicalPath(); - new KissCrawler(START_URL, SOCKET_TIMEOUT, crawlerConfig, programConfig); + + BeanFactory factory = new StandaloneCrawlerBeanFactory(); + Notifier notifier = factory.find(Notifier.class); + new KissCrawler(START_URL, SOCKET_TIMEOUT, crawlerConfig, programConfig, notifier, new Report()); } /** @@ -101,14 +105,16 @@ public class KissCrawler { * Configuration file for the crawler. * @param aProgramConfig * Configuration file describing interesting shows. + * @param aNotifier Object used to send notifications of the results. + * @param aReport Report to use. * @throws IOException * In case of problems reading files. - * @throws MessagingException - * In case of problems sending a mail notification. + * @throws NotificationException In case notification fails. + * @throws PageException In case of problems retrieving the TV guide. */ public KissCrawler(String aCrawlerConfig, - String aProgramConfig) throws IOException, NotificationException, PageException { - this(START_URL, SOCKET_TIMEOUT, aCrawlerConfig, aProgramConfig); + String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException { + this(START_URL, SOCKET_TIMEOUT, aCrawlerConfig, aProgramConfig, aNotifier, aReport); } @@ -124,13 +130,15 @@ public class KissCrawler { * Configuration file for the crawler. * @param aProgramConfig * Configuration file describing interesting shows. + * @param aNotifier Object used to send notifications of the results. + * @param aReport Report to use. * @throws IOException * In case of problems reading files. - * @throws MessagingException - * In case of problems sending a mail notification. + * @throws NotificationException In case notification fails. + * @throws PageException In case of problems retrieving the TV guide. */ public KissCrawler(String aStartUrl, int aSocketTimeout, String aCrawlerConfig, - String aProgramConfig) throws IOException, NotificationException, PageException { + String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException { _pattern = Pattern.compile(TIME_REGEX); @@ -150,21 +158,19 @@ public class KissCrawler { parser.parse(programConfigFile); List programFilters = parser.getFilters(); - Report report = new Report(); - try { - Page page = getStartPage(aStartUrl, crawler, report); - TVGuide guide = createGuide(page, report); + Page page = getStartPage(aStartUrl, crawler, aReport); + TVGuide guide = createGuide(page, aReport); PrintVisitor printer = new PrintVisitor(System.out); guide.accept(printer); - processResults(programFilters, guide, parser.getNotifier(), - report); + processResults(programFilters, guide, aNotifier, + aReport); } catch (PageException e) { - report.addMessage("Problem getting TV guide", e); + aReport.addMessage("Problem getting TV guide", e); LOG.info("Problem getting TV guide", e); throw e; } - parser.getNotifier().send(report.asXml()); + aNotifier.send(aReport.asXml()); } finally { System.out.println("Crawler finished"); } diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/main/ProgramActionExecutor.java b/crawler/kiss/src/org/wamblee/crawler/kiss/main/ProgramActionExecutor.java index fae06bda..68a4c13d 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/main/ProgramActionExecutor.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/main/ProgramActionExecutor.java @@ -50,7 +50,8 @@ public class ProgramActionExecutor { /** * Constructs the program action executor. - * + * + * @param aReport Report to use. */ public ProgramActionExecutor(Report aReport) { _showsToRecord = new TreeMap>(); diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/main/ProgramConfigurationParser.java b/crawler/kiss/src/org/wamblee/crawler/kiss/main/ProgramConfigurationParser.java index 2e22f351..fc41cde4 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/main/ProgramConfigurationParser.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/main/ProgramConfigurationParser.java @@ -30,9 +30,6 @@ import org.wamblee.conditions.AndCondition; import org.wamblee.conditions.Condition; import org.wamblee.conditions.PropertyRegexCondition; import org.wamblee.crawler.kiss.guide.Program; -import org.wamblee.crawler.kiss.notification.MailNotifier; -import org.wamblee.crawler.kiss.notification.MailServer; -import org.wamblee.crawler.kiss.notification.Notifier; import org.wamblee.xml.XslTransformer; /** @@ -43,14 +40,6 @@ class ProgramConfigurationParser { private static final int DEFAULT_PRIORITY = 1; - private static final String ELEM_PASSWORD = "password"; - - private static final String ELEM_USERNAME = "username"; - - private static final String ELEM_PORT = "port"; - - private static final String ELEM_HOST = "host"; - // Formatting configuration. private static final String ELEM_FORMAT = "format"; @@ -58,18 +47,6 @@ class ProgramConfigurationParser { private static final String ELEM_HTML = "html"; - // Mail server configuration. - - private static final String ELEM_NOTIFICATION = "notification"; - - private static final String ELEM_SMTP = "smtp"; - - private static final String ELEM_SUBJECT = "subject"; - - private static final String ELEM_TO = "to"; - - private static final String ELEM_FROM = "from"; - // Configuration of interesting programs. private static final String ELEM_PROGRAM = "program"; @@ -88,11 +65,8 @@ class ProgramConfigurationParser { private XslTransformer _transformer; - private Notifier _notifier; - ProgramConfigurationParser(XslTransformer aTransformer) { _filters = null; - _notifier = null; _transformer = aTransformer; } @@ -151,55 +125,11 @@ class ProgramConfigurationParser { filters.add(new ProgramFilter(condition, action)); } _filters = filters; - - Element notifier = root.element(ELEM_NOTIFICATION); - _notifier = parseNotifier(notifier); - } catch (DocumentException e) { throw new RuntimeException("Error parsing program configuraiton", e); } } - /** - * Parses the notifier - * - * @return Notifier - */ - private Notifier parseNotifier(Element aNotifier) { - String from = aNotifier.elementTextTrim(ELEM_FROM); - String to = aNotifier.elementTextTrim(ELEM_TO); - String subject = aNotifier.elementTextTrim(ELEM_SUBJECT); - - Element smtp = aNotifier.element(ELEM_SMTP); - MailServer server = parseMailServer(smtp); - - Element format = aNotifier.element(ELEM_FORMAT); - String htmlXslt = format.elementTextTrim(ELEM_HTML); - String textXslt = format.elementTextTrim(ELEM_TEXT); - - return new MailNotifier(from, to, subject, htmlXslt, textXslt, server, _transformer); - } - - /** - * Parses the mail server from the XML. - * - * @param aSmtp - * Mail server configuration. - * @return Mail server. - */ - private MailServer parseMailServer(Element aSmtp) { - String host = aSmtp.elementTextTrim(ELEM_HOST); - Element portElem = aSmtp.element(ELEM_PORT); - int port = DEFAULT_SMTP_PORT; - if (portElem != null) { - port = Integer.valueOf(portElem.getTextTrim()); - } - String username = aSmtp.elementTextTrim(ELEM_USERNAME); - String password = aSmtp.elementTextTrim(ELEM_PASSWORD); - - return new MailServer(host, port, username, password); - } - /** * Returns the list of program filters. * @@ -208,13 +138,4 @@ class ProgramConfigurationParser { public List getFilters() { return _filters; } - - /** - * Returns the notifier to use. - * - * @return Notifier. - */ - public Notifier getNotifier() { - return _notifier; - } } diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/notification/MailNotifier.java b/crawler/kiss/src/org/wamblee/crawler/kiss/notification/MailNotifier.java index 3b6431d7..e4646572 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/notification/MailNotifier.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/notification/MailNotifier.java @@ -128,4 +128,18 @@ public class MailNotifier implements Notifier { String reportXmlText = aReport.asXML(); return _transformer.textTransform(reportXmlText.getBytes(), _transformer.resolve(aXslt)); } + + /* (non-Javadoc) + * @see org.wamblee.crawler.kiss.notification.Notifier#asHtml(org.dom4j.Element) + */ + public String asHtml(Element aReport) throws IOException, TransformerException { + return transformReport(aReport, _htmlXslt); + } + + /* (non-Javadoc) + * @see org.wamblee.crawler.kiss.notification.Notifier#asText(org.dom4j.Element) + */ + public String asText(Element aReport) throws IOException, TransformerException { + return transformReport(aReport, _textXslt); + } } diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/notification/Notifier.java b/crawler/kiss/src/org/wamblee/crawler/kiss/notification/Notifier.java index ef35b6d4..eda91ba9 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/notification/Notifier.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/notification/Notifier.java @@ -15,6 +15,10 @@ */ package org.wamblee.crawler.kiss.notification; +import java.io.IOException; + +import javax.xml.transform.TransformerException; + import org.dom4j.Element; /** @@ -30,4 +34,18 @@ public interface Notifier { * Report to send. */ void send(Element aReport) throws NotificationException; + + /** + * Converts the report to html. + * @param aReport Report to convert. + * @return + */ + String asHtml(Element aReport) throws IOException, TransformerException; + + /** + * Converts the report to text. + * @param aReport Report to convert. + * @return + */ + String asText(Element aReport) throws IOException, TransformerException; } diff --git a/crawler/kissweb/WebRoot/WEB-INF/overview.jsp b/crawler/kissweb/WebRoot/WEB-INF/overview.jsp index bca8cd3c..ce749492 100644 --- a/crawler/kissweb/WebRoot/WEB-INF/overview.jsp +++ b/crawler/kissweb/WebRoot/WEB-INF/overview.jsp @@ -30,7 +30,8 @@ - + + Last executed at: @@ -46,14 +47,23 @@ - + Last message: - + + + + Last report: + + + details + + +
diff --git a/crawler/kissweb/src/beanRefContext.xml b/crawler/kissweb/src/beanRefContext.xml index 0881652f..f1a89bd4 100644 --- a/crawler/kissweb/src/beanRefContext.xml +++ b/crawler/kissweb/src/beanRefContext.xml @@ -7,6 +7,7 @@ class="org.springframework.context.support.ClassPathXmlApplicationContext"> + org.wamblee.crawler.notification.xml org.wamblee.crawler.kiss.xml diff --git a/crawler/kissweb/src/org.wamblee.crawler.kiss.xml b/crawler/kissweb/src/org.wamblee.crawler.kiss.xml index 01e88591..325ef251 100644 --- a/crawler/kissweb/src/org.wamblee.crawler.kiss.xml +++ b/crawler/kissweb/src/org.wamblee.crawler.kiss.xml @@ -14,12 +14,13 @@ class="org.wamblee.crawler.kiss.scheduling.CrawlerExecutorImpl"> /home/erik/crawler/config.xml /home/erik/crawler/programs.xml + - + diff --git a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerExecutor.java b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerExecutor.java index 428ba983..c5dba6fd 100644 --- a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerExecutor.java +++ b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerExecutor.java @@ -18,6 +18,8 @@ package org.wamblee.crawler.kiss.scheduling; import java.util.Date; +import org.wamblee.crawler.kiss.main.Report; + /** * Encapsulates the actual execution of the crawler. * This interface makes it possible to test the scheduling logic @@ -28,8 +30,9 @@ public interface CrawlerExecutor { /** * Executes the crawler. - * @param aDate Date the crawler is being triggered. + * @param aDate Date the crawler is being triggered. + * @param The report from the crawler. * @throws Exception */ - void execute(Date aDate) throws Exception; + void execute(Date aDate, Report aReport) throws Exception; } diff --git a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerExecutorImpl.java b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerExecutorImpl.java index 35d3f923..ad178276 100644 --- a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerExecutorImpl.java +++ b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerExecutorImpl.java @@ -19,6 +19,8 @@ package org.wamblee.crawler.kiss.scheduling; import java.util.Date; import org.wamblee.crawler.kiss.main.KissCrawler; +import org.wamblee.crawler.kiss.main.Report; +import org.wamblee.crawler.kiss.notification.Notifier; /** * Implementation which executes the KiSS crawler for retrieving web content. @@ -26,22 +28,25 @@ import org.wamblee.crawler.kiss.main.KissCrawler; public class CrawlerExecutorImpl implements CrawlerExecutor { private String _crawlerConfig; - private String _programConfig; - + private String _programConfig; + private Notifier _notifier; + /** * Constructs the crawler executor. * @param aCrawlerConfig Crawler configuration file. - * @param aProgramConfig Program configuration file. + * @param aProgramConfig Program configuration file. + * @param aNotifier Object used to send notifications. */ - public CrawlerExecutorImpl(String aCrawlerConfig, String aProgramConfig) { + public CrawlerExecutorImpl(String aCrawlerConfig, String aProgramConfig, Notifier aNotifier) { _crawlerConfig = aCrawlerConfig; - _programConfig = aProgramConfig; + _programConfig = aProgramConfig; + _notifier = aNotifier; } /* (non-Javadoc) * @see org.wamblee.crawler.kiss.scheduling.CrawlerScheduler.CrawlerExecutor#execute(java.util.Date) */ - public void execute(Date aDate) throws Exception { - KissCrawler crawler = new KissCrawler(_crawlerConfig, _programConfig); + public void execute(Date aDate, Report aReport) throws Exception { + KissCrawler crawler = new KissCrawler(_crawlerConfig, _programConfig, _notifier, aReport); } } diff --git a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerScheduler.java b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerScheduler.java index e529da07..fefb8d98 100644 --- a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerScheduler.java +++ b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerScheduler.java @@ -17,16 +17,32 @@ package org.wamblee.crawler.kiss.scheduling; /** - * + * Interface to the scheduler specific for working with the crawler. */ public interface CrawlerScheduler { + /** + * Initializes the scheduler. + * @throws Exception In case of problems. + */ void initialize() throws Exception; + /** + * Checks if the crawler is running. + * @return True iff the crawler is running. + * @throws Exception In case of problems. + */ boolean isCrawlerRunning() throws Exception; - + + /** + * Schedules the crawler for immediate execution. + * @throws Exception In case of problems. + */ void scheduleNow() throws Exception; + /** + * Shuts down the scheduler. + * @throws Exception In case of problems. + */ void shutdown() throws Exception; - } diff --git a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerStatus.java similarity index 82% rename from crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java rename to crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerStatus.java index 5121f920..065a2294 100644 --- a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java +++ b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerStatus.java @@ -22,6 +22,7 @@ import java.util.Date; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.wamblee.crawler.kiss.main.Report; /** * This class encapsulates the logic for deciding whether to @@ -30,16 +31,18 @@ import org.apache.commons.logging.LogFactory; * more complex logic for determining whether to run the * crawler. */ -public class CrawlerSchedule implements Serializable { +public class CrawlerStatus implements Serializable { - private static final Log LOG = LogFactory.getLog(CrawlerSchedule.class); + private static final Log LOG = LogFactory.getLog(CrawlerStatus.class); private CrawlerExecutor _crawler; private Date _lastExecuted; private boolean _lastResult; private Exception _lastException; + private Report _lastReport; private int _hourMin; private int _hourMax; + private boolean _mustExecute; /** * Constructs the scheduler. @@ -53,13 +56,24 @@ public class CrawlerSchedule implements Serializable { * @param aHourMin The crawler may only run if hour >= aHourMin * @param aHourMax The crawler may only run if hour <= aHourMax */ - public CrawlerSchedule(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) { + public CrawlerStatus(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) { _crawler = aCrawler; _lastExecuted = new Date(); _lastResult = true; // the crawler will automatically run the next day. - _lastException = null; + _lastException = null; + _lastReport = null; _hourMin = aHourMin; _hourMax = aHourMax; + _mustExecute = false; + } + + /** + * Determines whether or not the crawler must be run the next time it is triggered. + * @param aMustExecute If true then the crawler will run the next time it is triggered + * by the scheduler. + */ + public void setMustExecute(boolean aMustExecute) { + _mustExecute = aMustExecute; } /** @@ -71,8 +85,9 @@ public class CrawlerSchedule implements Serializable { if (mustExecute(aDate)) { LOG.info("Executing crawler at " + aDate); - try { - _crawler.execute(aDate); + Report report = new Report(); + try { + _crawler.execute(aDate, report); _lastResult = true; _lastException = null; } catch (Exception e) { @@ -80,6 +95,7 @@ public class CrawlerSchedule implements Serializable { _lastException = e; } finally { _lastExecuted = aDate; + _lastReport = report; } } } @@ -92,10 +108,6 @@ public class CrawlerSchedule implements Serializable { return _lastExecuted; } - public void setLastExecuted(Date aDate) { - _lastExecuted = aDate; - } - /** * Gets the result of the last execution. * @return True iff last execution was a success. @@ -113,12 +125,24 @@ public class CrawlerSchedule implements Serializable { return _lastException; } + /** + * Gets the last report from the scheduler. + * @return Report. + */ + public Report getLastReport() { + return _lastReport; + } + /** * Determines whether or not the crawler must be run. * @param aDate Current time. * @return True iff the crawler must be run. */ private boolean mustExecute(Date aDate) { + if (_mustExecute) { + _mustExecute = false; + return true; + } if ( _lastExecuted == null ) { return false; // crawler must be started manually at least once after deployment. } diff --git a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/quartz/CrawlerJob.java b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/quartz/CrawlerJob.java index b86f5a92..60e59e83 100644 --- a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/quartz/CrawlerJob.java +++ b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/quartz/CrawlerJob.java @@ -21,7 +21,7 @@ import org.apache.commons.logging.LogFactory; import org.quartz.JobExecutionContext; import org.quartz.JobExecutionException; import org.quartz.StatefulJob; -import org.wamblee.crawler.kiss.scheduling.CrawlerSchedule; +import org.wamblee.crawler.kiss.scheduling.CrawlerStatus; import org.wamblee.general.BeanKernel; /** @@ -48,8 +48,8 @@ public class CrawlerJob implements StatefulJob { throws JobExecutionException { LOG.info("Job triggered"); try { - CrawlerSchedule schedule = BeanKernel.getBeanFactory().find( - CrawlerSchedule.class); + CrawlerStatus schedule = BeanKernel.getBeanFactory().find( + CrawlerStatus.class); schedule.execute(aContext.getFireTime()); } catch (Exception e) { throw new JobExecutionException("Error executing crawler", e, false); diff --git a/crawler/kissweb/src/org/wamblee/crawler/kiss/servlet/Application.java b/crawler/kissweb/src/org/wamblee/crawler/kiss/servlet/Application.java index 19d3b6fa..9220a62f 100644 --- a/crawler/kissweb/src/org/wamblee/crawler/kiss/servlet/Application.java +++ b/crawler/kissweb/src/org/wamblee/crawler/kiss/servlet/Application.java @@ -19,7 +19,6 @@ package org.wamblee.crawler.kiss.servlet; import javax.servlet.ServletContextEvent; import javax.servlet.ServletContextListener; -import org.quartz.SchedulerException; import org.wamblee.crawler.kiss.scheduling.CrawlerScheduler; import org.wamblee.general.BeanKernel; diff --git a/crawler/kissweb/src/org/wamblee/crawler/kiss/servlet/CrawlerServlet.java b/crawler/kissweb/src/org/wamblee/crawler/kiss/servlet/CrawlerServlet.java index 479c60b7..1f9d08e5 100644 --- a/crawler/kissweb/src/org/wamblee/crawler/kiss/servlet/CrawlerServlet.java +++ b/crawler/kissweb/src/org/wamblee/crawler/kiss/servlet/CrawlerServlet.java @@ -17,15 +17,17 @@ package org.wamblee.crawler.kiss.servlet; import java.io.IOException; -import java.util.Date; +import java.io.OutputStream; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import org.wamblee.crawler.kiss.scheduling.CrawlerSchedule; +import org.wamblee.crawler.kiss.main.Report; +import org.wamblee.crawler.kiss.notification.Notifier; import org.wamblee.crawler.kiss.scheduling.CrawlerScheduler; +import org.wamblee.crawler.kiss.scheduling.CrawlerStatus; import org.wamblee.general.BeanKernel; /** @@ -45,12 +47,21 @@ public class CrawlerServlet extends HttpServlet { CrawlerScheduler scheduler = BeanKernel.getBeanFactory().find( CrawlerScheduler.class); - CrawlerSchedule status = BeanKernel.getBeanFactory().find( - CrawlerSchedule.class); + CrawlerStatus status = BeanKernel.getBeanFactory().find( + CrawlerStatus.class); try { - if ( aRequest.getParameter("runnow") != null ) { - status.setLastExecuted(new Date(System.currentTimeMillis() - 24*3600*1000)); + if (aRequest.getParameter("details") != null) { + Report report = status.getLastReport(); + if (report != null) { + Notifier notifier = BeanKernel.getBeanFactory().find(Notifier.class); + OutputStream os = aResponse.getOutputStream(); + os.write(notifier.asHtml(report.asXml()).getBytes()); + return; + } + } + if (aRequest.getParameter("runnow") != null) { + status.setMustExecute(true); scheduler.scheduleNow(); aResponse.sendRedirect(""); return; @@ -59,10 +70,12 @@ public class CrawlerServlet extends HttpServlet { aRequest.setAttribute("lastExecuted", status.getLastExecuted()); aRequest.setAttribute("lastResult", status.getLastResult()); aRequest.setAttribute("lastException", status.getLastException()); - String msg = ""; - Throwable e = status.getLastException(); - while ( e != null ) { - msg = msg + e.getMessage() + "
"; + aRequest.setAttribute("lastReport", status.getLastReport()); + String msg = ""; + Throwable e = status.getLastException(); + while (e != null) { + msg = msg + e.getClass().getName() + ": " + e.getMessage() + + "
"; e = e.getCause(); } aRequest.setAttribute("lastMessage", msg); -- 2.31.1