X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fkissweb%2Fsrc%2Fmain%2Fjava%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2Fscheduling%2FCrawlerStatus.java;fp=crawler%2Fkissweb%2Fsrc%2Fmain%2Fjava%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2Fscheduling%2FCrawlerStatus.java;h=0000000000000000000000000000000000000000;hb=2ef561a4fbf29b65335f1558bfebd74733a6ddd9;hp=065a229487b6d87fded759f7ba5e03fb2da6fba8;hpb=54903ea538a09fdb1e2ee6dc37e89bb85aebfec4;p=utils diff --git a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerStatus.java b/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerStatus.java deleted file mode 100644 index 065a2294..00000000 --- a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerStatus.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.scheduling; - -import java.io.Serializable; -import java.util.Calendar; -import java.util.Date; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.wamblee.crawler.kiss.main.Report; - -/** - * This class encapsulates the logic for deciding whether to - * run the crawler. This provides the mechanism to keep the - * scheduler simple (e.g. scheduling every hour) and providing - * more complex logic for determining whether to run the - * crawler. - */ -public class CrawlerStatus implements Serializable { - - private static final Log LOG = LogFactory.getLog(CrawlerStatus.class); - - private CrawlerExecutor _crawler; - private Date _lastExecuted; - private boolean _lastResult; - private Exception _lastException; - private Report _lastReport; - private int _hourMin; - private int _hourMax; - private boolean _mustExecute; - - /** - * Constructs the scheduler. - * The crawler will run if it is triggered in the range between the minimum (included) - * and maximum (included) hour of the day if either - * - * @param aCrawler The interface through which the crawler is executed. - * @param aHourMin The crawler may only run if hour >= aHourMin - * @param aHourMax The crawler may only run if hour <= aHourMax - */ - public CrawlerStatus(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) { - _crawler = aCrawler; - _lastExecuted = new Date(); - _lastResult = true; // the crawler will automatically run the next day. - _lastException = null; - _lastReport = null; - _hourMin = aHourMin; - _hourMax = aHourMax; - _mustExecute = false; - } - - /** - * Determines whether or not the crawler must be run the next time it is triggered. - * @param aMustExecute If true then the crawler will run the next time it is triggered - * by the scheduler. - */ - public void setMustExecute(boolean aMustExecute) { - _mustExecute = aMustExecute; - } - - /** - * Called by a scheduled job. This determines whether the crawler must be run or - * not. This encapsulates the rukes for retrying and scheduling the crawler. - * @param aDate Time at which we are executing now. - */ - public void execute(Date aDate) { - - if (mustExecute(aDate)) { - LOG.info("Executing crawler at " + aDate); - Report report = new Report(); - try { - _crawler.execute(aDate, report); - _lastResult = true; - _lastException = null; - } catch (Exception e) { - _lastResult = false; - _lastException = e; - } finally { - _lastExecuted = aDate; - _lastReport = report; - } - } - } - - /** - * Gets the time the crawler was last executed. - * @return Time of last execution. - */ - public Date getLastExecuted() { - return _lastExecuted; - } - - /** - * Gets the result of the last execution. - * @return True iff last execution was a success. - */ - public boolean getLastResult() { - return _lastResult; - } - - /** - * Gets the exception thrown by the last execution. - * @return null if the last execution was successful or an exception - * otherwise. - */ - public Exception getLastException() { - return _lastException; - } - - /** - * Gets the last report from the scheduler. - * @return Report. - */ - public Report getLastReport() { - return _lastReport; - } - - /** - * Determines whether or not the crawler must be run. - * @param aDate Current time. - * @return True iff the crawler must be run. - */ - private boolean mustExecute(Date aDate) { - if (_mustExecute) { - _mustExecute = false; - return true; - } - if ( _lastExecuted == null ) { - return false; // crawler must be started manually at least once after deployment. - } - Calendar calendar = Calendar.getInstance(); - calendar.setTime(aDate); - int hour = calendar.get(Calendar.HOUR_OF_DAY); - if ( hour < _hourMin ) { - return false; - } - if (hour > _hourMax ) { - return false; - } - - if ( !lastExecutionWasOnSameDay(aDate)) { - return true; // First execution of today. - } - // last execution was on the same day. - if ( !_lastResult ) { - return true; // last execution of today was unsuccessful, retry. - } - return false; // already run successfully today. - } - - /** - * Determines if the last execution was on the same day. - * @param aDate Current time. - * @return True iff last execution was on the same day. - */ - private boolean lastExecutionWasOnSameDay(Date aDate) { - if ( _lastExecuted == null ) { - return false; - } - int curDay = getDayOfYear(aDate); - int lastDay = getDayOfYear(_lastExecuted); - return curDay == lastDay; // check can be invalid only if scheduling interval is one year, - // which is ridiculous. - } - - /** - * Gets the day of the year - * @param aDate Date to compute day for. - */ - private int getDayOfYear(Date aDate) { - Calendar calendar = Calendar.getInstance(); - calendar.setTime(aDate); - return calendar.get(Calendar.DAY_OF_YEAR); - } -}