2 * Copyright 2006 the original author or authors.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package org.wamblee.crawler.kiss.scheduling;
19 import java.io.Serializable;
20 import java.util.Calendar;
21 import java.util.Date;
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
27 * This class encapsulates the logic for deciding whether to
28 * run the crawler. This provides the mechanism to keep the
29 * scheduler simple (e.g. scheduling every hour) and providing
30 * more complex logic for determining whether to run the
33 public class CrawlerSchedule implements Serializable {
35 private static final Log LOG = LogFactory.getLog(CrawlerSchedule.class);
37 private CrawlerExecutor _crawler;
38 private Date _lastExecuted;
39 private boolean _lastResult;
40 private Exception _lastException;
45 * Constructs the scheduler.
46 * The crawler will run if it is triggered in the range between the minimum (included)
47 * and maximum (included) hour of the day if either
49 * <li>it is triggered for the first time on the current day.</li>
50 * <li>an earlier crawling attempt on the same day failed. </li>
52 * @param aCrawler The interface through which the crawler is executed.
53 * @param aHourMin The crawler may only run if hour >= <code>aHourMin</code>
54 * @param aHourMax The crawler may only run if hour <= <code>aHourMax</code>
56 public CrawlerSchedule(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) {
58 _lastExecuted = new Date();
59 _lastResult = true; // the crawler will automatically run the next day.
60 _lastException = null;
66 * Called by a scheduled job. This determines whether the crawler must be run or
67 * not. This encapsulates the rukes for retrying and scheduling the crawler.
68 * @param aDate Time at which we are executing now.
70 public void execute(Date aDate) {
72 if (mustExecute(aDate)) {
73 LOG.info("Executing crawler at " + aDate);
75 _crawler.execute(aDate);
77 _lastException = null;
78 } catch (Exception e) {
82 _lastExecuted = aDate;
88 * Gets the time the crawler was last executed.
89 * @return Time of last execution.
91 public Date getLastExecuted() {
95 public void setLastExecuted(Date aDate) {
96 _lastExecuted = aDate;
100 * Gets the result of the last execution.
101 * @return True iff last execution was a success.
103 public boolean getLastResult() {
108 * Gets the exception thrown by the last execution.
109 * @return null if the last execution was successful or an exception
112 public Exception getLastException() {
113 return _lastException;
117 * Determines whether or not the crawler must be run.
118 * @param aDate Current time.
119 * @return True iff the crawler must be run.
121 private boolean mustExecute(Date aDate) {
122 if ( _lastExecuted == null ) {
123 return false; // crawler must be started manually at least once after deployment.
125 Calendar calendar = Calendar.getInstance();
126 calendar.setTime(aDate);
127 int hour = calendar.get(Calendar.HOUR_OF_DAY);
128 if ( hour < _hourMin ) {
131 if (hour > _hourMax ) {
135 if ( !lastExecutionWasOnSameDay(aDate)) {
136 return true; // First execution of today.
138 // last execution was on the same day.
139 if ( !_lastResult ) {
140 return true; // last execution of today was unsuccessful, retry.
142 return false; // already run successfully today.
146 * Determines if the last execution was on the same day.
147 * @param aDate Current time.
148 * @return True iff last execution was on the same day.
150 private boolean lastExecutionWasOnSameDay(Date aDate) {
151 if ( _lastExecuted == null ) {
154 int curDay = getDayOfYear(aDate);
155 int lastDay = getDayOfYear(_lastExecuted);
156 return curDay == lastDay; // check can be invalid only if scheduling interval is one year,
157 // which is ridiculous.
161 * Gets the day of the year
162 * @param aDate Date to compute day for.
164 private int getDayOfYear(Date aDate) {
165 Calendar calendar = Calendar.getInstance();
166 calendar.setTime(aDate);
167 return calendar.get(Calendar.DAY_OF_YEAR);