X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fkissweb%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2Fscheduling%2FCrawlerSchedule.java;h=e341169a82e76e5cad7da3f6cacda4b7cc84097b;hb=26ae3728e2da2d86fe42994b9223fe44690dbfd0;hp=09ada84da8d4f9bf0411ef1b6cb228a252ce4f27;hpb=0ee323ba44aa1653e9daf2e2ebe71ff15d9202f8;p=utils diff --git a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java index 09ada84d..e341169a 100644 --- a/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java +++ b/crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java @@ -20,6 +20,9 @@ import java.io.Serializable; import java.util.Calendar; import java.util.Date; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + /** * This class encapsulates the logic for deciding whether to * run the crawler. This provides the mechanism to keep the @@ -29,9 +32,12 @@ import java.util.Date; */ public class CrawlerSchedule implements Serializable { + private static final Log LOG = LogFactory.getLog(CrawlerSchedule.class); + private CrawlerExecutor _crawler; private Date _lastExecuted; - private Exception _lastResult; + private boolean _lastResult; + private Exception _lastException; private int _hourMin; private int _hourMax; @@ -44,7 +50,8 @@ public class CrawlerSchedule implements Serializable { public CrawlerSchedule(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) { _crawler = aCrawler; _lastExecuted = null; - _lastResult = null; + _lastResult = false; + _lastException = null; _hourMin = aHourMin; _hourMax = aHourMax; } @@ -56,15 +63,18 @@ public class CrawlerSchedule implements Serializable { */ public void execute(Date aDate) { if (mustExecute(aDate)) { - try { - _lastResult = null; + LOG.info("Executing crawler at " + aDate); + try { _crawler.execute(aDate); + _lastResult = true; + _lastException = null; } catch (Exception e) { - _lastResult = e; + _lastResult = false; + _lastException = e; } finally { _lastExecuted = aDate; } - } + } } /** @@ -77,11 +87,19 @@ public class CrawlerSchedule implements Serializable { /** * Gets the result of the last execution. + * @return True iff last execution was a success. + */ + public boolean getLastResult() { + return _lastResult; + } + + /** + * Gets the exception thrown by the last execution. * @return null if the last execution was successful or an exception * otherwise. */ - public Exception getLastResult() { - return _lastResult; + public Exception getLastException() { + return _lastException; } /** @@ -99,13 +117,32 @@ public class CrawlerSchedule implements Serializable { if (hour > _hourMax ) { return false; } - if ( hour == _hourMin ) { + + if ( !lastExecutionWasOnSameDay(aDate)) { return true; // First execution of today. - } - if ( _lastResult != null ) { + } + // last execution was on the same day. + if ( !_lastResult ) { return true; // last execution of today was unsuccessful, retry. } - return false; // already run successfully today. } + + private boolean lastExecutionWasOnSameDay(Date aDate) { + if ( _lastExecuted == null ) { + return false; + } + int curDay = getDayOfYear(aDate); + int lastDay = getDayOfYear(_lastExecuted); + return curDay == lastDay; + } + + /** + * @param aDate + */ + private int getDayOfYear(Date aDate) { + Calendar calendar = Calendar.getInstance(); + calendar.setTime(aDate); + return calendar.get(Calendar.DAY_OF_YEAR); + } }