import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.wamblee.crawler.kiss.main.Report;
/**
* This class encapsulates the logic for deciding whether to
* more complex logic for determining whether to run the
* crawler.
*/
-public class CrawlerSchedule implements Serializable {
+public class CrawlerStatus implements Serializable {
- private static final Log LOG = LogFactory.getLog(CrawlerSchedule.class);
+ private static final Log LOG = LogFactory.getLog(CrawlerStatus.class);
private CrawlerExecutor _crawler;
private Date _lastExecuted;
private boolean _lastResult;
private Exception _lastException;
+ private Report _lastReport;
private int _hourMin;
private int _hourMax;
+ private boolean _mustExecute;
/**
* Constructs the scheduler.
* @param aHourMin The crawler may only run if hour >= <code>aHourMin</code>
* @param aHourMax The crawler may only run if hour <= <code>aHourMax</code>
*/
- public CrawlerSchedule(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) {
+ public CrawlerStatus(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) {
_crawler = aCrawler;
_lastExecuted = new Date();
_lastResult = true; // the crawler will automatically run the next day.
- _lastException = null;
+ _lastException = null;
+ _lastReport = null;
_hourMin = aHourMin;
_hourMax = aHourMax;
+ _mustExecute = false;
+ }
+
+ /**
+ * Determines whether or not the crawler must be run the next time it is triggered.
+ * @param aMustExecute If true then the crawler will run the next time it is triggered
+ * by the scheduler.
+ */
+ public void setMustExecute(boolean aMustExecute) {
+ _mustExecute = aMustExecute;
}
/**
if (mustExecute(aDate)) {
LOG.info("Executing crawler at " + aDate);
- try {
- _crawler.execute(aDate);
+ Report report = new Report();
+ try {
+ _crawler.execute(aDate, report);
_lastResult = true;
_lastException = null;
} catch (Exception e) {
_lastException = e;
} finally {
_lastExecuted = aDate;
+ _lastReport = report;
}
}
}
return _lastExecuted;
}
- public void setLastExecuted(Date aDate) {
- _lastExecuted = aDate;
- }
-
/**
* Gets the result of the last execution.
* @return True iff last execution was a success.
return _lastException;
}
+ /**
+ * Gets the last report from the scheduler.
+ * @return Report.
+ */
+ public Report getLastReport() {
+ return _lastReport;
+ }
+
/**
* Determines whether or not the crawler must be run.
* @param aDate Current time.
* @return True iff the crawler must be run.
*/
private boolean mustExecute(Date aDate) {
+ if (_mustExecute) {
+ _mustExecute = false;
+ return true;
+ }
if ( _lastExecuted == null ) {
return false; // crawler must be started manually at least once after deployment.
}