2 * Copyright 2006 the original author or authors.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package org.wamblee.crawler.kiss.scheduling;
19 import java.io.Serializable;
20 import java.util.Calendar;
21 import java.util.Date;
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
27 * This class encapsulates the logic for deciding whether to
28 * run the crawler. This provides the mechanism to keep the
29 * scheduler simple (e.g. scheduling every hour) and providing
30 * more complex logic for determining whether to run the
33 public class CrawlerSchedule implements Serializable {
35 private static final Log LOG = LogFactory.getLog(CrawlerSchedule.class);
37 private CrawlerExecutor _crawler;
38 private Date _lastExecuted;
39 private boolean _lastResult;
40 private Exception _lastException;
45 * Constructs the scheduler.
46 * @param aCrawler The interface by which the crawler is executed.
47 * @param aHourMin The crawler may only run if hour >= <code>aHourMin</code>
48 * @param aHourMax The crawler may only run if hour <= <code>aHourMax</code>
50 public CrawlerSchedule(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) {
54 _lastException = null;
60 * Called by a scheduled job. This determines whether the crawler must be run or
61 * not. This encapsulates the rukes for retrying and scheduling the crawler.
62 * @param aDate Time at which we are executing now.
64 public void execute(Date aDate) {
65 if (mustExecute(aDate)) {
66 LOG.info("Executing crawler at " + aDate);
68 _crawler.execute(aDate);
70 _lastException = null;
71 } catch (Exception e) {
75 _lastExecuted = aDate;
81 * Gets the time the crawler was last executed.
82 * @return Time of last execution.
84 public Date getLastExecuted() {
89 * Gets the result of the last execution.
90 * @return True iff last execution was a success.
92 public boolean getLastResult() {
97 * Gets the exception thrown by the last execution.
98 * @return null if the last execution was successful or an exception
101 public Exception getLastException() {
102 return _lastException;
106 * Determines whether or not the crawler must be run.
107 * @param aDate Current time.
108 * @return True iff the crawler must be run.
110 private boolean mustExecute(Date aDate) {
111 Calendar calendar = Calendar.getInstance();
112 calendar.setTime(aDate);
113 int hour = calendar.get(Calendar.HOUR_OF_DAY);
114 if ( hour < _hourMin ) {
117 if (hour > _hourMax ) {
121 if ( !lastExecutionWasOnSameDay(aDate)) {
122 return true; // First execution of today.
124 // last execution was on the same day.
125 if ( !_lastResult ) {
126 return true; // last execution of today was unsuccessful, retry.
128 return false; // already run successfully today.
131 private boolean lastExecutionWasOnSameDay(Date aDate) {
132 if ( _lastExecuted == null ) {
135 int curDay = getDayOfYear(aDate);
136 int lastDay = getDayOfYear(_lastExecuted);
137 return curDay == lastDay;
143 private int getDayOfYear(Date aDate) {
144 Calendar calendar = Calendar.getInstance();
145 calendar.setTime(aDate);
146 return calendar.get(Calendar.DAY_OF_YEAR);