X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=trunk%2Fcrawler%2Fkissweb%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2Fscheduling%2FCrawlerStatus.java;fp=trunk%2Fcrawler%2Fkissweb%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2Fscheduling%2FCrawlerSchedule.java;h=065a229487b6d87fded759f7ba5e03fb2da6fba8;hb=4819218c6211c7ff0bcd4646c10b891abfc1a020;hp=5121f92058df76a437ca159e595c1b0e1f089866;hpb=1aa8a4b42297af33fe68811cacba7a3721b9ea2d;p=utils diff --git a/trunk/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java b/trunk/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerStatus.java similarity index 82% rename from trunk/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java rename to trunk/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerStatus.java index 5121f920..065a2294 100644 --- a/trunk/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java +++ b/trunk/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerStatus.java @@ -22,6 +22,7 @@ import java.util.Date; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.wamblee.crawler.kiss.main.Report; /** * This class encapsulates the logic for deciding whether to @@ -30,16 +31,18 @@ import org.apache.commons.logging.LogFactory; * more complex logic for determining whether to run the * crawler. */ -public class CrawlerSchedule implements Serializable { +public class CrawlerStatus implements Serializable { - private static final Log LOG = LogFactory.getLog(CrawlerSchedule.class); + private static final Log LOG = LogFactory.getLog(CrawlerStatus.class); private CrawlerExecutor _crawler; private Date _lastExecuted; private boolean _lastResult; private Exception _lastException; + private Report _lastReport; private int _hourMin; private int _hourMax; + private boolean _mustExecute; /** * Constructs the scheduler. @@ -53,13 +56,24 @@ public class CrawlerSchedule implements Serializable { * @param aHourMin The crawler may only run if hour >= aHourMin * @param aHourMax The crawler may only run if hour <= aHourMax */ - public CrawlerSchedule(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) { + public CrawlerStatus(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) { _crawler = aCrawler; _lastExecuted = new Date(); _lastResult = true; // the crawler will automatically run the next day. - _lastException = null; + _lastException = null; + _lastReport = null; _hourMin = aHourMin; _hourMax = aHourMax; + _mustExecute = false; + } + + /** + * Determines whether or not the crawler must be run the next time it is triggered. + * @param aMustExecute If true then the crawler will run the next time it is triggered + * by the scheduler. + */ + public void setMustExecute(boolean aMustExecute) { + _mustExecute = aMustExecute; } /** @@ -71,8 +85,9 @@ public class CrawlerSchedule implements Serializable { if (mustExecute(aDate)) { LOG.info("Executing crawler at " + aDate); - try { - _crawler.execute(aDate); + Report report = new Report(); + try { + _crawler.execute(aDate, report); _lastResult = true; _lastException = null; } catch (Exception e) { @@ -80,6 +95,7 @@ public class CrawlerSchedule implements Serializable { _lastException = e; } finally { _lastExecuted = aDate; + _lastReport = report; } } } @@ -92,10 +108,6 @@ public class CrawlerSchedule implements Serializable { return _lastExecuted; } - public void setLastExecuted(Date aDate) { - _lastExecuted = aDate; - } - /** * Gets the result of the last execution. * @return True iff last execution was a success. @@ -113,12 +125,24 @@ public class CrawlerSchedule implements Serializable { return _lastException; } + /** + * Gets the last report from the scheduler. + * @return Report. + */ + public Report getLastReport() { + return _lastReport; + } + /** * Determines whether or not the crawler must be run. * @param aDate Current time. * @return True iff the crawler must be run. */ private boolean mustExecute(Date aDate) { + if (_mustExecute) { + _mustExecute = false; + return true; + } if ( _lastExecuted == null ) { return false; // crawler must be started manually at least once after deployment. }