X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fkissweb%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2Fscheduling%2FCrawlerSchedule.java;h=5121f92058df76a437ca159e595c1b0e1f089866;hb=3c68ffbb4f2b908015301accc3f4473acc4619e2;hp=e341169a82e76e5cad7da3f6cacda4b7cc84097b;hpb=26ae3728e2da2d86fe42994b9223fe44690dbfd0;p=utils diff --git a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java index e341169a..5121f920 100644 --- a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java +++ b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java @@ -43,14 +43,20 @@ public class CrawlerSchedule implements Serializable { /** * Constructs the scheduler. - * @param aCrawler The interface by which the crawler is executed. + * The crawler will run if it is triggered in the range between the minimum (included) + * and maximum (included) hour of the day if either + * + * @param aCrawler The interface through which the crawler is executed. * @param aHourMin The crawler may only run if hour >= aHourMin * @param aHourMax The crawler may only run if hour <= aHourMax */ public CrawlerSchedule(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) { _crawler = aCrawler; - _lastExecuted = null; - _lastResult = false; + _lastExecuted = new Date(); + _lastResult = true; // the crawler will automatically run the next day. _lastException = null; _hourMin = aHourMin; _hourMax = aHourMax; @@ -62,6 +68,7 @@ public class CrawlerSchedule implements Serializable { * @param aDate Time at which we are executing now. */ public void execute(Date aDate) { + if (mustExecute(aDate)) { LOG.info("Executing crawler at " + aDate); try { @@ -85,6 +92,10 @@ public class CrawlerSchedule implements Serializable { return _lastExecuted; } + public void setLastExecuted(Date aDate) { + _lastExecuted = aDate; + } + /** * Gets the result of the last execution. * @return True iff last execution was a success. @@ -108,11 +119,14 @@ public class CrawlerSchedule implements Serializable { * @return True iff the crawler must be run. */ private boolean mustExecute(Date aDate) { + if ( _lastExecuted == null ) { + return false; // crawler must be started manually at least once after deployment. + } Calendar calendar = Calendar.getInstance(); calendar.setTime(aDate); int hour = calendar.get(Calendar.HOUR_OF_DAY); if ( hour < _hourMin ) { - return false; + return false; } if (hour > _hourMax ) { return false; @@ -128,17 +142,24 @@ public class CrawlerSchedule implements Serializable { return false; // already run successfully today. } + /** + * Determines if the last execution was on the same day. + * @param aDate Current time. + * @return True iff last execution was on the same day. + */ private boolean lastExecutionWasOnSameDay(Date aDate) { if ( _lastExecuted == null ) { return false; } int curDay = getDayOfYear(aDate); int lastDay = getDayOfYear(_lastExecuted); - return curDay == lastDay; + return curDay == lastDay; // check can be invalid only if scheduling interval is one year, + // which is ridiculous. } /** - * @param aDate + * Gets the day of the year + * @param aDate Date to compute day for. */ private int getDayOfYear(Date aDate) { Calendar calendar = Calendar.getInstance();