wamblee.org Git - utils/blob - crawler/kissweb/src/org/wamblee/crawler/kiss/scheduling/CrawlerSchedule.java

   1 /*
   2  * Copyright 2006 the original author or authors.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 package org.wamblee.crawler.kiss.scheduling;
  18
  19 import java.io.Serializable;
  20 import java.util.Calendar;
  21 import java.util.Date;
  22
  23 /**
  24  * This class encapsulates the logic for deciding whether to
  25  * run the crawler. This provides the mechanism to keep the
  26  * scheduler simple (e.g. scheduling every hour) and providing
  27  * more complex logic for determining whether to run the
  28  * crawler.
  29  */
  30 public class CrawlerSchedule implements Serializable {
  31
  32     private CrawlerExecutor _crawler;
  33     private Date _lastExecuted;
  34     private Exception _lastResult;
  35     private int _hourMin;
  36     private int _hourMax;
  37
  38     /**
  39      * Constructs the scheduler.
  40      * @param aCrawler The interface by which the crawler is executed.
  41      * @param aHourMin The crawler may only run if hour &gt;= <code>aHourMin</code>
  42      * @param aHourMax The crawler may only run if hour &lt;= <code>aHourMax</code>
  43      */
  44     public CrawlerSchedule(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) {
  45         _crawler = aCrawler;
  46         _lastExecuted = null;
  47         _lastResult = null;
  48         _hourMin = aHourMin;
  49         _hourMax = aHourMax;
  50     }
  51
  52     /**
  53      * Called by a scheduled job. This determines whether the crawler must be run or
  54      * not. This encapsulates the rukes for retrying and scheduling the crawler.
  55      * @param aDate Time at which we are executing now.
  56      */
  57     public void execute(Date aDate) {
  58         if (mustExecute(aDate)) {
  59             try {
  60                 _lastResult = null;
  61                 _crawler.execute(aDate);
  62             } catch (Exception e) {
  63                 _lastResult = e;
  64             } finally {
  65                 _lastExecuted = aDate;
  66             }
  67         }
  68     }
  69
  70     /**
  71      * Gets the time the crawler was last executed.
  72      * @return Time of last execution.
  73      */
  74     public Date getLastExecuted() {
  75         return _lastExecuted;
  76     }
  77
  78     /**
  79      * Gets the result of the last execution.
  80      * @return null if the last execution was successful or an exception
  81      *   otherwise.
  82      */
  83     public Exception getLastResult() {
  84         return _lastResult;
  85     }
  86
  87     /**
  88      * Determines whether or not the crawler must be run.
  89      * @param aDate Current time.
  90      * @return True iff the crawler must be run.
  91      */
  92     private boolean mustExecute(Date aDate) {
  93         Calendar calendar = Calendar.getInstance();
  94         calendar.setTime(aDate);
  95         int hour = calendar.get(Calendar.HOUR_OF_DAY);
  96         if ( hour < _hourMin ) {
  97             return false;
  98         }
  99         if (hour > _hourMax ) {
 100             return false;
 101         }
 102         if ( hour == _hourMin ) {
 103             return true; // First execution of today.
 104         }
 105         if ( _lastResult != null ) {
 106             return true; // last execution of today was unsuccessful, retry.
 107         }
 108
 109         return false; // already run successfully today.
 110     }
 111 }