/* * Copyright 2006 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.wamblee.crawler.kiss.scheduling; import java.io.Serializable; import java.util.Calendar; import java.util.Date; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.wamblee.crawler.kiss.main.Report; /** * This class encapsulates the logic for deciding whether to * run the crawler. This provides the mechanism to keep the * scheduler simple (e.g. scheduling every hour) and providing * more complex logic for determining whether to run the * crawler. */ public class CrawlerStatus implements Serializable { private static final Log LOG = LogFactory.getLog(CrawlerStatus.class); private CrawlerExecutor _crawler; private Date _lastExecuted; private boolean _lastResult; private Exception _lastException; private Report _lastReport; private int _hourMin; private int _hourMax; private boolean _mustExecute; /** * Constructs the scheduler. * The crawler will run if it is triggered in the range between the minimum (included) * and maximum (included) hour of the day if either *
aHourMin
* @param aHourMax The crawler may only run if hour <= aHourMax
*/
public CrawlerStatus(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) {
_crawler = aCrawler;
_lastExecuted = new Date();
_lastResult = true; // the crawler will automatically run the next day.
_lastException = null;
_lastReport = null;
_hourMin = aHourMin;
_hourMax = aHourMax;
_mustExecute = false;
}
/**
* Determines whether or not the crawler must be run the next time it is triggered.
* @param aMustExecute If true then the crawler will run the next time it is triggered
* by the scheduler.
*/
public void setMustExecute(boolean aMustExecute) {
_mustExecute = aMustExecute;
}
/**
* Called by a scheduled job. This determines whether the crawler must be run or
* not. This encapsulates the rukes for retrying and scheduling the crawler.
* @param aDate Time at which we are executing now.
*/
public void execute(Date aDate) {
if (mustExecute(aDate)) {
LOG.info("Executing crawler at " + aDate);
Report report = new Report();
try {
_crawler.execute(aDate, report);
_lastResult = true;
_lastException = null;
} catch (Exception e) {
_lastResult = false;
_lastException = e;
} finally {
_lastExecuted = aDate;
_lastReport = report;
}
}
}
/**
* Gets the time the crawler was last executed.
* @return Time of last execution.
*/
public Date getLastExecuted() {
return _lastExecuted;
}
/**
* Gets the result of the last execution.
* @return True iff last execution was a success.
*/
public boolean getLastResult() {
return _lastResult;
}
/**
* Gets the exception thrown by the last execution.
* @return null if the last execution was successful or an exception
* otherwise.
*/
public Exception getLastException() {
return _lastException;
}
/**
* Gets the last report from the scheduler.
* @return Report.
*/
public Report getLastReport() {
return _lastReport;
}
/**
* Determines whether or not the crawler must be run.
* @param aDate Current time.
* @return True iff the crawler must be run.
*/
private boolean mustExecute(Date aDate) {
if (_mustExecute) {
_mustExecute = false;
return true;
}
if ( _lastExecuted == null ) {
return false; // crawler must be started manually at least once after deployment.
}
Calendar calendar = Calendar.getInstance();
calendar.setTime(aDate);
int hour = calendar.get(Calendar.HOUR_OF_DAY);
if ( hour < _hourMin ) {
return false;
}
if (hour > _hourMax ) {
return false;
}
if ( !lastExecutionWasOnSameDay(aDate)) {
return true; // First execution of today.
}
// last execution was on the same day.
if ( !_lastResult ) {
return true; // last execution of today was unsuccessful, retry.
}
return false; // already run successfully today.
}
/**
* Determines if the last execution was on the same day.
* @param aDate Current time.
* @return True iff last execution was on the same day.
*/
private boolean lastExecutionWasOnSameDay(Date aDate) {
if ( _lastExecuted == null ) {
return false;
}
int curDay = getDayOfYear(aDate);
int lastDay = getDayOfYear(_lastExecuted);
return curDay == lastDay; // check can be invalid only if scheduling interval is one year,
// which is ridiculous.
}
/**
* Gets the day of the year
* @param aDate Date to compute day for.
*/
private int getDayOfYear(Date aDate) {
Calendar calendar = Calendar.getInstance();
calendar.setTime(aDate);
return calendar.get(Calendar.DAY_OF_YEAR);
}
}