/*
* Copyright 2006 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.wamblee.crawler.kiss.scheduling;
import java.io.Serializable;
import java.util.Calendar;
import java.util.Date;
/**
* This class encapsulates the logic for deciding whether to
* run the crawler. This provides the mechanism to keep the
* scheduler simple (e.g. scheduling every hour) and providing
* more complex logic for determining whether to run the
* crawler.
*/
public class CrawlerSchedule implements Serializable {
private CrawlerExecutor _crawler;
private Date _lastExecuted;
private Exception _lastResult;
private int _hourMin;
private int _hourMax;
/**
* Constructs the scheduler.
* @param aCrawler The interface by which the crawler is executed.
* @param aHourMin The crawler may only run if hour >= aHourMin
* @param aHourMax The crawler may only run if hour <= aHourMax
*/
public CrawlerSchedule(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) {
_crawler = aCrawler;
_lastExecuted = null;
_lastResult = null;
_hourMin = aHourMin;
_hourMax = aHourMax;
}
/**
* Called by a scheduled job. This determines whether the crawler must be run or
* not. This encapsulates the rukes for retrying and scheduling the crawler.
* @param aDate Time at which we are executing now.
*/
public void execute(Date aDate) {
if (mustExecute(aDate)) {
try {
_lastResult = null;
_crawler.execute(aDate);
} catch (Exception e) {
_lastResult = e;
} finally {
_lastExecuted = aDate;
}
}
}
/**
* Gets the time the crawler was last executed.
* @return Time of last execution.
*/
public Date getLastExecuted() {
return _lastExecuted;
}
/**
* Gets the result of the last execution.
* @return null if the last execution was successful or an exception
* otherwise.
*/
public Exception getLastResult() {
return _lastResult;
}
/**
* Determines whether or not the crawler must be run.
* @param aDate Current time.
* @return True iff the crawler must be run.
*/
private boolean mustExecute(Date aDate) {
Calendar calendar = Calendar.getInstance();
calendar.setTime(aDate);
int hour = calendar.get(Calendar.HOUR_OF_DAY);
if ( hour < _hourMin ) {
return false;
}
if (hour > _hourMax ) {
return false;
}
if ( hour == _hourMin ) {
return true; // First execution of today.
}
if ( _lastResult != null ) {
return true; // last execution of today was unsuccessful, retry.
}
return false; // already run successfully today.
}
}