2 * Copyright 2006 the original author or authors.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package org.wamblee.crawler.kiss.scheduling;
19 import java.io.Serializable;
20 import java.util.Calendar;
21 import java.util.Date;
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.wamblee.crawler.kiss.main.Report;
28 * This class encapsulates the logic for deciding whether to
29 * run the crawler. This provides the mechanism to keep the
30 * scheduler simple (e.g. scheduling every hour) and providing
31 * more complex logic for determining whether to run the
34 public class CrawlerStatus implements Serializable {
36 private static final Log LOG = LogFactory.getLog(CrawlerStatus.class);
38 private CrawlerExecutor _crawler;
39 private Date _lastExecuted;
40 private boolean _lastResult;
41 private Exception _lastException;
42 private Report _lastReport;
45 private boolean _mustExecute;
48 * Constructs the scheduler.
49 * The crawler will run if it is triggered in the range between the minimum (included)
50 * and maximum (included) hour of the day if either
52 * <li>it is triggered for the first time on the current day.</li>
53 * <li>an earlier crawling attempt on the same day failed. </li>
55 * @param aCrawler The interface through which the crawler is executed.
56 * @param aHourMin The crawler may only run if hour >= <code>aHourMin</code>
57 * @param aHourMax The crawler may only run if hour <= <code>aHourMax</code>
59 public CrawlerStatus(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) {
61 _lastExecuted = new Date();
62 _lastResult = true; // the crawler will automatically run the next day.
63 _lastException = null;
71 * Determines whether or not the crawler must be run the next time it is triggered.
72 * @param aMustExecute If true then the crawler will run the next time it is triggered
75 public void setMustExecute(boolean aMustExecute) {
76 _mustExecute = aMustExecute;
80 * Called by a scheduled job. This determines whether the crawler must be run or
81 * not. This encapsulates the rukes for retrying and scheduling the crawler.
82 * @param aDate Time at which we are executing now.
84 public void execute(Date aDate) {
86 if (mustExecute(aDate)) {
87 LOG.info("Executing crawler at " + aDate);
88 Report report = new Report();
90 _crawler.execute(aDate, report);
92 _lastException = null;
93 } catch (Exception e) {
97 _lastExecuted = aDate;
104 * Gets the time the crawler was last executed.
105 * @return Time of last execution.
107 public Date getLastExecuted() {
108 return _lastExecuted;
112 * Gets the result of the last execution.
113 * @return True iff last execution was a success.
115 public boolean getLastResult() {
120 * Gets the exception thrown by the last execution.
121 * @return null if the last execution was successful or an exception
124 public Exception getLastException() {
125 return _lastException;
129 * Gets the last report from the scheduler.
132 public Report getLastReport() {
137 * Determines whether or not the crawler must be run.
138 * @param aDate Current time.
139 * @return True iff the crawler must be run.
141 private boolean mustExecute(Date aDate) {
143 _mustExecute = false;
146 if ( _lastExecuted == null ) {
147 return false; // crawler must be started manually at least once after deployment.
149 Calendar calendar = Calendar.getInstance();
150 calendar.setTime(aDate);
151 int hour = calendar.get(Calendar.HOUR_OF_DAY);
152 if ( hour < _hourMin ) {
155 if (hour > _hourMax ) {
159 if ( !lastExecutionWasOnSameDay(aDate)) {
160 return true; // First execution of today.
162 // last execution was on the same day.
163 if ( !_lastResult ) {
164 return true; // last execution of today was unsuccessful, retry.
166 return false; // already run successfully today.
170 * Determines if the last execution was on the same day.
171 * @param aDate Current time.
172 * @return True iff last execution was on the same day.
174 private boolean lastExecutionWasOnSameDay(Date aDate) {
175 if ( _lastExecuted == null ) {
178 int curDay = getDayOfYear(aDate);
179 int lastDay = getDayOfYear(_lastExecuted);
180 return curDay == lastDay; // check can be invalid only if scheduling interval is one year,
181 // which is ridiculous.
185 * Gets the day of the year
186 * @param aDate Date to compute day for.
188 private int getDayOfYear(Date aDate) {
189 Calendar calendar = Calendar.getInstance();
190 calendar.setTime(aDate);
191 return calendar.get(Calendar.DAY_OF_YEAR);