From c78140d07025961d92a4635b6e30f4bf66725746 Mon Sep 17 00:00:00 2001 From: erik Date: Sat, 18 Mar 2006 11:17:07 +0000 Subject: [PATCH] --- .../src/org/wamblee/crawler/PageType.java | 11 +++ .../org/wamblee/crawler/impl/ActionImpl.java | 13 ++++ .../crawler/impl/ConfigurationParser.java | 2 +- crawler/kiss/conf/kiss/programs.xml | 27 +++++-- .../org/wamblee/crawler/kiss/KissCrawler.java | 78 +++++++++++++++---- .../src/org/wamblee/crawler/kiss/Program.java | 41 ++++++++++ .../kiss/ProgramConfigurationParser.java | 45 ++++++++--- .../wamblee/crawler/kiss/ProgramFilter.java | 50 ++++++++++++ .../src/org/wamblee/crawler/kiss/Time.java | 64 +++++++++++---- .../wamblee/crawler/kiss/TimeInterval.java | 19 +++++ .../conditions/PropertyRegexCondition.java | 12 ++- 11 files changed, 313 insertions(+), 49 deletions(-) create mode 100644 crawler/kiss/src/org/wamblee/crawler/kiss/ProgramFilter.java diff --git a/crawler/basic/src/org/wamblee/crawler/PageType.java b/crawler/basic/src/org/wamblee/crawler/PageType.java index c23aa087..8320c0be 100644 --- a/crawler/basic/src/org/wamblee/crawler/PageType.java +++ b/crawler/basic/src/org/wamblee/crawler/PageType.java @@ -50,4 +50,15 @@ public class PageType { public String toString() { return "PageType(type='" + _type + "')"; } + + /* (non-Javadoc) + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(Object obj) { + if (!(obj instanceof PageType)) { + return false; + } + return toString().equals(obj.toString()); + } } diff --git a/crawler/basic/src/org/wamblee/crawler/impl/ActionImpl.java b/crawler/basic/src/org/wamblee/crawler/impl/ActionImpl.java index c3663735..0ff4252d 100644 --- a/crawler/basic/src/org/wamblee/crawler/impl/ActionImpl.java +++ b/crawler/basic/src/org/wamblee/crawler/impl/ActionImpl.java @@ -113,4 +113,17 @@ public class ActionImpl implements Action { public Element getContent() { return _content; } + + /* (non-Javadoc) + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(Object obj) { + if ( !(obj instanceof ActionImpl )) { + return false; + } + ActionImpl action = (ActionImpl)obj; + return _reference.equals(action._reference) && + _type.equals(action._type); + } } diff --git a/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java b/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java index e9dc4013..6795bf1c 100644 --- a/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java +++ b/crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java @@ -59,7 +59,7 @@ public class ConfigurationParser { private static final int MAX_TRIES = 3; - private static final int MAX_DELAY = 5000; + private static final int MAX_DELAY = 100; private PrintStream _os; diff --git a/crawler/kiss/conf/kiss/programs.xml b/crawler/kiss/conf/kiss/programs.xml index 8fb74e28..f6d5675d 100644 --- a/crawler/kiss/conf/kiss/programs.xml +++ b/crawler/kiss/conf/kiss/programs.xml @@ -1,35 +1,46 @@ - star.*gate + notify + horror - battlestar + notify + ((sci-fi)|(science fiction)) + + + + + star.*gate + + + + battlestar - star trek + star trek - shouf shouf + shouf shouf - red dwarf + red dwarf - top gear + top gear - brainiac + brainiac - lois.*clark + lois.*clark diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java b/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java index 2f47c695..8fbd232d 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java @@ -28,6 +28,8 @@ import java.util.Date; import java.util.EnumMap; import java.util.List; import java.util.Properties; +import java.util.Set; +import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -41,7 +43,6 @@ import javax.mail.internet.MimeMessage; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.wamblee.conditions.Condition; import org.wamblee.crawler.Action; import org.wamblee.crawler.Configuration; import org.wamblee.crawler.Crawler; @@ -139,9 +140,9 @@ public class KissCrawler { InputStream programConfigFile = new FileInputStream(new File( aProgramConfig)); - Condition programCondition = new ProgramConfigurationParser() + List programFilters = new ProgramConfigurationParser() .parse(programConfigFile); - recordInterestingShows(programCondition, guide); + recordInterestingShows(programFilters, guide); } finally { os.flush(); os.close(); @@ -159,31 +160,55 @@ public class KissCrawler { * @throws MessagingException * In case of problems sending a summary mail. */ - private void recordInterestingShows(Condition aProgramCondition, + private void recordInterestingShows(List aProgramCondition, TVGuide aGuide) throws MessagingException { - MatchVisitor matcher = new MatchVisitor(aProgramCondition); - aGuide.accept(matcher); - List programs = matcher.getMatches(); - EnumMap> messages = new EnumMap>( - RecordingResult.class); - for (RecordingResult result : RecordingResult.values()) { - messages.put(result, new ArrayList()); - } - for (Program program : programs) { - Program.RecordingResult result = program.record(); - messages.get(result).add(program); + + Set showsToRecord = new TreeSet(new Program.TimeSorter()); + Set interestingShows = new TreeSet(new Program.TimeSorter()); + + for (ProgramFilter filter : aProgramCondition) { + List programs = filter.apply(aGuide); + switch (filter.getAction()) { + case RECORD: { + for (Program program: programs) { + showsToRecord.add(program); + } + break; + } + case NOTIFY: { + for (Program program: programs) { + if ( program.isRecordingPossible()) { + interestingShows.add(program); + } + } + break; + } + default: { + throw new RuntimeException("Unknown action '" + filter.getAction() + "'"); + } + } } + + EnumMap> messages = recordShows(showsToRecord); + String msg = "Summary of KiSS crawler: \n\n\n"; for (RecordingResult result : RecordingResult.values()) { if (messages.get(result).size() > 0) { msg += result.getDescription() + "\n\n"; for (Program program : messages.get(result)) { - msg += program + "\n"; + msg += program + "\n\n"; } } } - if (programs.size() == 0) { + + if ( interestingShows.size() > 0 ) { + msg += "Possibly interesting shows:\n\n"; + for (Program program: interestingShows) { + msg += program + "\n\n"; + } + } + if (showsToRecord.size() + interestingShows.size() == 0) { msg += "No suitable programs found"; } @@ -191,6 +216,25 @@ public class KissCrawler { sendMail(msg); } + /** + * Records shows. + * @param showsToRecord Shows to record. + * @return Recording results. + */ + private EnumMap> recordShows(Set showsToRecord) { + EnumMap> messages = new EnumMap>( + RecordingResult.class); + for (RecordingResult result : RecordingResult.values()) { + messages.put(result, new ArrayList()); + } + + for (Program program : showsToRecord) { + Program.RecordingResult result = program.record(); + messages.get(result).add(program); + } + return messages; + } + /** * Creates the crawler. * diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/Program.java b/crawler/kiss/src/org/wamblee/crawler/kiss/Program.java index 83531ff7..7cb86cfd 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/Program.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/Program.java @@ -16,6 +16,8 @@ package org.wamblee.crawler.kiss; +import java.util.Comparator; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.wamblee.crawler.Action; @@ -27,6 +29,16 @@ import org.wamblee.crawler.PageException; */ public class Program { + public static class TimeSorter implements Comparator { + + /* (non-Javadoc) + * @see java.util.Comparator#compare(T, T) + */ + public int compare(Program o1, Program o2) { + return o1.getInterval().getBegin().compareTo(o2.getInterval().getBegin()); + } + } + private static final Log LOG = LogFactory.getLog(Program.class); /** @@ -161,6 +173,22 @@ public class Program { public TimeInterval getInterval() { return _interval; } + + /** + * Checks if recording is possible. + * @return True iff recording is possible. + */ + public boolean isRecordingPossible() { + try { + Action record = _programInfo.execute().getAction(RECORD_ACTION); + if (record == null) { + return false; + } + return true; + } catch (PageException e) { + return false; + } + } /** * Records the show. @@ -202,4 +230,17 @@ public class Program { + ")" + "\n" + (INDENT + _description).replaceAll("\n", "\n" + INDENT); } + + /* (non-Javadoc) + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(Object obj) { + if ( !(obj instanceof Program)) { + return false; + } + Program program = (Program)obj; + return getName().equals(program.getName()) && + _programInfo.equals(program._programInfo); + } } diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java index 5615ced2..b8d5c678 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java @@ -21,21 +21,28 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; +import org.wamblee.conditions.AndCondition; import org.wamblee.conditions.Condition; -import org.wamblee.conditions.OrCondition; +import org.wamblee.conditions.PropertyRegexCondition; +import org.wamblee.crawler.kiss.ProgramFilter.ProgramAction; /** * Parse the configuration of desired programs. */ -public class ProgramConfigurationParser { +class ProgramConfigurationParser { private static final String ELEM_PROGRAM = "program"; - private static final String ELEM_PATTERN = "name"; + private static final String ELEM_PATTERN = "match"; + + private static final String ELEM_ACTION = "action"; + + private static final String ACTION_NOTIFY = "notify"; /** * Parses the condition used to match the desired programs. @@ -44,21 +51,41 @@ public class ProgramConfigurationParser { * Input stream to parse from. * @return Condition. */ - Condition parse(InputStream aStream) { + List parse(InputStream aStream) { + List filters = new ArrayList(); try { SAXReader reader = new SAXReader(); Document document = reader.read(aStream); Element root = document.getRootElement(); - List> conditions = new ArrayList>(); for (Iterator i = root.elementIterator(ELEM_PROGRAM); i.hasNext();) { Element program = (Element) i.next(); - String pattern = ".*" + program.element(ELEM_PATTERN).getText() - + ".*"; - conditions.add(new ProgramNameMatcher(pattern)); + + Element actionElem = program.element(ELEM_ACTION); + ProgramAction action = ProgramAction.RECORD; + if (actionElem != null) { + if (actionElem.getText().equals(ACTION_NOTIFY)) { + action = ProgramAction.NOTIFY; + } + } + List> regexConditions = + new ArrayList>(); + for (Iterator j = program.elementIterator(ELEM_PATTERN); j.hasNext(); ) { + Element patternElem = (Element)j.next(); + String fieldName = "name"; + Attribute fieldAttribute = patternElem.attribute("field"); + if ( fieldAttribute != null ) { + fieldName = fieldAttribute.getText(); + } + String pattern = ".*" + patternElem.getText() + + ".*"; + regexConditions.add(new PropertyRegexCondition(fieldName, pattern, true)); + } + Condition condition = new AndCondition(regexConditions); + filters.add(new ProgramFilter(condition, action)); } - return new OrCondition(conditions); + return filters; } catch (DocumentException e) { throw new RuntimeException("Error parsing program configuraiton", e); } diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramFilter.java b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramFilter.java new file mode 100644 index 00000000..3f65847b --- /dev/null +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramFilter.java @@ -0,0 +1,50 @@ +/* + * Copyright 2005 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.wamblee.crawler.kiss; + +import java.util.List; + +import org.wamblee.conditions.Condition; + + +/** + * Obtains a list of interesting programs from a TV guide and decides + * what to do with them. + */ +public class ProgramFilter { + + public enum ProgramAction { RECORD, NOTIFY }; + + private Condition _condition; + + private ProgramAction _action; + + public ProgramFilter(Condition aCondition, ProgramAction aAction) { + _condition = aCondition; + _action = aAction; + } + + public ProgramAction getAction() { + return _action; + } + + public List apply(TVGuide aGuide) { + MatchVisitor matcher = new MatchVisitor(_condition); + aGuide.accept(matcher); + return matcher.getMatches(); + } +} diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/Time.java b/crawler/kiss/src/org/wamblee/crawler/kiss/Time.java index 6679223b..16cae4f0 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/Time.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/Time.java @@ -20,29 +20,32 @@ import java.text.DecimalFormat; import java.text.NumberFormat; /** - * TIme at which a program starts or ends. + * TIme at which a program starts or ends. */ -public class Time { +public class Time implements Comparable { /** - * Number of seconds per minute. + * Number of seconds per minute. */ private static final double SECONDS_PER_MINUTE = 60.0; /** - * Hour of the time. + * Hour of the time. */ private int _hour; /** - * Minute of the hour. + * Minute of the hour. */ private int _minute; /** - * Constructs the time. - * @param aHour Hour. - * @param aMinute Minute. + * Constructs the time. + * + * @param aHour + * Hour. + * @param aMinute + * Minute. */ public Time(int aHour, int aMinute) { _hour = aHour; @@ -50,8 +53,9 @@ public class Time { } /** - * Gets the hour. - * @return Hour. + * Gets the hour. + * + * @return Hour. */ public int getHour() { return _hour; @@ -59,7 +63,8 @@ public class Time { /** * Gets te minute. - * @return Minute. + * + * @return Minute. */ public int getMinute() { return _minute; @@ -77,10 +82,43 @@ public class Time { } /** - * Convert time to floating point value. Useful for comparing two times. - * @return Converted value. + * Convert time to floating point value. Useful for comparing two times. + * + * @return Converted value. */ float asFloat() { return (float) _hour + (float) _minute / (float) SECONDS_PER_MINUTE; } + + /* + * (non-Javadoc) + * + * @see java.lang.Object#equals(java.lang.Object) + */ + @Override + public boolean equals(Object obj) { + if ( !(obj instanceof Time )) { + return false; + } + return toString().equals(obj.toString()); + } + + /* (non-Javadoc) + * @see java.lang.Comparable#compareTo(T) + */ + public int compareTo(Object o) { + if ( !(o instanceof Time)) { + throw new RuntimeException("object not an instance of Time"); + } + Time time = (Time)o; + return new Float(asFloat()).compareTo(new Float(time.asFloat())); + } + + /* (non-Javadoc) + * @see java.lang.Object#hashCode() + */ + @Override + public int hashCode() { + return toString().hashCode(); + } } diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/TimeInterval.java b/crawler/kiss/src/org/wamblee/crawler/kiss/TimeInterval.java index 0dbd570b..d56e54e1 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/TimeInterval.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/TimeInterval.java @@ -101,4 +101,23 @@ public class TimeInterval { boolean isUncertain() { return _begin.asFloat() > _end.asFloat(); } + + /* (non-Javadoc) + * @see java.lang.Object#equals(java.lang.Object)j + */ + @Override + public boolean equals(Object obj) { + if ( !(obj instanceof TimeInterval)) { + return false; + } + return obj.toString().equals(obj.toString()); + } + + /* (non-Javadoc) + * @see java.lang.Object#hashCode() + */ + @Override + public int hashCode() { + return _begin.hashCode(); + } } diff --git a/support/src/org/wamblee/conditions/PropertyRegexCondition.java b/support/src/org/wamblee/conditions/PropertyRegexCondition.java index 48df7995..60726dcc 100644 --- a/support/src/org/wamblee/conditions/PropertyRegexCondition.java +++ b/support/src/org/wamblee/conditions/PropertyRegexCondition.java @@ -38,14 +38,21 @@ public class PropertyRegexCondition implements Condition { */ private Pattern _regex; + /** + * Whether or not to convert the value to lowercase before matching. + */ + private boolean _tolower; + /** * Constructs the condition. * @param aProperty Name of the property to examine. * @param aRegex Regular expression to use. + * @param aTolower Whether or not to convert the value to lowercase before matching. */ - public PropertyRegexCondition(String aProperty, String aRegex) { + public PropertyRegexCondition(String aProperty, String aRegex, boolean aTolower) { _property = aProperty; _regex = Pattern.compile(aRegex); + _tolower = aTolower; } /* (non-Javadoc) @@ -54,6 +61,9 @@ public class PropertyRegexCondition implements Condition { public boolean matches(T aObject) { try { String value = PropertyUtils.getProperty(aObject, _property) + ""; + if ( _tolower ) { + value = value.toLowerCase(); + } Matcher matcher = _regex.matcher(value); return matcher.matches(); } catch (IllegalAccessException e) { -- 2.31.1