/* * Copyright 2006 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.wamblee.crawler.kiss.scheduling; import java.io.Serializable; import java.util.Calendar; import java.util.Date; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.wamblee.crawler.kiss.main.Report; /** * This class encapsulates the logic for deciding whether to * run the crawler. This provides the mechanism to keep the * scheduler simple (e.g. scheduling every hour) and providing * more complex logic for determining whether to run the * crawler. */ public class CrawlerStatus implements Serializable { private static final Log LOG = LogFactory.getLog(CrawlerStatus.class); private CrawlerExecutor _crawler; private Date _lastExecuted; private boolean _lastResult; private Exception _lastException; private Report _lastReport; private int _hourMin; private int _hourMax; private boolean _mustExecute; /** * Constructs the scheduler. * The crawler will run if it is triggered in the range between the minimum (included) * and maximum (included) hour of the day if either * * @param aCrawler The interface through which the crawler is executed. * @param aHourMin The crawler may only run if hour >= aHourMin * @param aHourMax The crawler may only run if hour <= aHourMax */ public CrawlerStatus(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) { _crawler = aCrawler; _lastExecuted = new Date(); _lastResult = true; // the crawler will automatically run the next day. _lastException = null; _lastReport = null; _hourMin = aHourMin; _hourMax = aHourMax; _mustExecute = false; } /** * Determines whether or not the crawler must be run the next time it is triggered. * @param aMustExecute If true then the crawler will run the next time it is triggered * by the scheduler. */ public void setMustExecute(boolean aMustExecute) { _mustExecute = aMustExecute; } /** * Called by a scheduled job. This determines whether the crawler must be run or * not. This encapsulates the rukes for retrying and scheduling the crawler. * @param aDate Time at which we are executing now. */ public void execute(Date aDate) { if (mustExecute(aDate)) { LOG.info("Executing crawler at " + aDate); Report report = new Report(); try { _crawler.execute(aDate, report); _lastResult = true; _lastException = null; } catch (Exception e) { _lastResult = false; _lastException = e; } finally { _lastExecuted = aDate; _lastReport = report; } } } /** * Gets the time the crawler was last executed. * @return Time of last execution. */ public Date getLastExecuted() { return _lastExecuted; } /** * Gets the result of the last execution. * @return True iff last execution was a success. */ public boolean getLastResult() { return _lastResult; } /** * Gets the exception thrown by the last execution. * @return null if the last execution was successful or an exception * otherwise. */ public Exception getLastException() { return _lastException; } /** * Gets the last report from the scheduler. * @return Report. */ public Report getLastReport() { return _lastReport; } /** * Determines whether or not the crawler must be run. * @param aDate Current time. * @return True iff the crawler must be run. */ private boolean mustExecute(Date aDate) { if (_mustExecute) { _mustExecute = false; return true; } if ( _lastExecuted == null ) { return false; // crawler must be started manually at least once after deployment. } Calendar calendar = Calendar.getInstance(); calendar.setTime(aDate); int hour = calendar.get(Calendar.HOUR_OF_DAY); if ( hour < _hourMin ) { return false; } if (hour > _hourMax ) { return false; } if ( !lastExecutionWasOnSameDay(aDate)) { return true; // First execution of today. } // last execution was on the same day. if ( !_lastResult ) { return true; // last execution of today was unsuccessful, retry. } return false; // already run successfully today. } /** * Determines if the last execution was on the same day. * @param aDate Current time. * @return True iff last execution was on the same day. */ private boolean lastExecutionWasOnSameDay(Date aDate) { if ( _lastExecuted == null ) { return false; } int curDay = getDayOfYear(aDate); int lastDay = getDayOfYear(_lastExecuted); return curDay == lastDay; // check can be invalid only if scheduling interval is one year, // which is ridiculous. } /** * Gets the day of the year * @param aDate Date to compute day for. */ private int getDayOfYear(Date aDate) { Calendar calendar = Calendar.getInstance(); calendar.setTime(aDate); return calendar.get(Calendar.DAY_OF_YEAR); } }