(no commit message)
authorErik Brakkee <erik@brakkee.org>
Sat, 18 Mar 2006 11:17:07 +0000 (11:17 +0000)
committerErik Brakkee <erik@brakkee.org>
Sat, 18 Mar 2006 11:17:07 +0000 (11:17 +0000)
crawler/basic/src/org/wamblee/crawler/PageType.java
crawler/basic/src/org/wamblee/crawler/impl/ActionImpl.java
crawler/basic/src/org/wamblee/crawler/impl/ConfigurationParser.java
crawler/kiss/conf/kiss/programs.xml
crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java
crawler/kiss/src/org/wamblee/crawler/kiss/Program.java
crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java
crawler/kiss/src/org/wamblee/crawler/kiss/ProgramFilter.java [new file with mode: 0644]
crawler/kiss/src/org/wamblee/crawler/kiss/Time.java
crawler/kiss/src/org/wamblee/crawler/kiss/TimeInterval.java
support/src/org/wamblee/conditions/PropertyRegexCondition.java

index c23aa08784adb3831149d0ad278f2fd819ad3853..8320c0bea732142a40c0df81e3cbec549e27f2a9 100644 (file)
@@ -50,4 +50,15 @@ public class PageType {
     public String toString() {
         return "PageType(type='" + _type + "')";
     }
+    
+    /* (non-Javadoc)
+     * @see java.lang.Object#equals(java.lang.Object)
+     */
+    @Override
+    public boolean equals(Object obj) {
+        if (!(obj instanceof PageType)) { 
+            return false; 
+        }
+        return toString().equals(obj.toString());
+    }
 }
index c36637353b6ef2a1588c67e85b629ef3035db9f2..0ff4252d98804712981f46f842748e0bc024215b 100644 (file)
@@ -113,4 +113,17 @@ public class ActionImpl implements Action {
     public Element getContent() {
         return _content;
     }
+    
+    /* (non-Javadoc)
+     * @see java.lang.Object#equals(java.lang.Object)
+     */
+    @Override
+    public boolean equals(Object obj) {
+        if ( !(obj instanceof ActionImpl )) { 
+            return false; 
+        }
+        ActionImpl action = (ActionImpl)obj; 
+        return _reference.equals(action._reference) && 
+               _type.equals(action._type);
+    }
 }
index e9dc4013b5c3025817e5c6daaba1fe5537733fba..6795bf1c6c1fa1830d14e2902147f5d3816d31b4 100644 (file)
@@ -59,7 +59,7 @@ public class ConfigurationParser {
 
     private static final int MAX_TRIES = 3;
 
-    private static final int MAX_DELAY = 5000;
+    private static final int MAX_DELAY = 100;
 
     private PrintStream _os;
     
index 8fb74e2866c52a427d3c45de2850e7138048bb03..f6d5675d8d6096dfc0651c0307b850ec0582e4be 100644 (file)
@@ -1,35 +1,46 @@
 <programs>
   
   <program>
-    <name>star.*gate</name>
+    <action>notify</action>
+    <match field="description">horror</match>
   </program>
   
   <program>
-    <name>battlestar</name>
+    <action>notify</action>
+    <match field="description">((sci-fi)|(science fiction))</match>
+  </program>
+  
+  
+  <program>
+    <match>star.*gate</match>
+  </program>
+  
+  <program>
+    <match>battlestar</match>
   </program>
 
   <program>
-    <name>star trek</name>
+    <match>star trek</match>
   </program>
   
   <program>
-    <name>shouf shouf</name>
+    <match>shouf shouf</match>
   </program>
   
   <program>
-    <name>red dwarf</name>
+    <match>red dwarf</match>
   </program>
   
   <program>
-    <name>top gear</name>
+    <match>top gear</match>
   </program>
   
   <program>
-    <name>brainiac</name>
+    <match>brainiac</match>
   </program>
   
   <program>
-    <name>lois.*clark</name>
+    <match>lois.*clark</match>
   </program>
   
 </programs>
index 2f47c695a7a747b4ad00fa870d4b4923726f7a1a..8fbd232dcba2c685a4c981e75416b87d8512a3bf 100644 (file)
@@ -28,6 +28,8 @@ import java.util.Date;
 import java.util.EnumMap;
 import java.util.List;
 import java.util.Properties;
+import java.util.Set;
+import java.util.TreeSet;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -41,7 +43,6 @@ import javax.mail.internet.MimeMessage;
 import org.apache.commons.httpclient.HttpClient;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.wamblee.conditions.Condition;
 import org.wamblee.crawler.Action;
 import org.wamblee.crawler.Configuration;
 import org.wamblee.crawler.Crawler;
@@ -139,9 +140,9 @@ public class KissCrawler {
 
             InputStream programConfigFile = new FileInputStream(new File(
                     aProgramConfig));
-            Condition<Program> programCondition = new ProgramConfigurationParser()
+            List<ProgramFilter> programFilters = new ProgramConfigurationParser()
                     .parse(programConfigFile);
-            recordInterestingShows(programCondition, guide);
+            recordInterestingShows(programFilters, guide);
         } finally {
             os.flush();
             os.close();
@@ -159,31 +160,55 @@ public class KissCrawler {
      * @throws MessagingException
      *             In case of problems sending a summary mail.
      */
-    private void recordInterestingShows(Condition<Program> aProgramCondition,
+    private void recordInterestingShows(List<ProgramFilter> aProgramCondition,
             TVGuide aGuide) throws MessagingException {
-        MatchVisitor matcher = new MatchVisitor(aProgramCondition);
-        aGuide.accept(matcher);
-        List<Program> programs = matcher.getMatches();
-        EnumMap<RecordingResult, List<Program>> messages = new EnumMap<RecordingResult, List<Program>>(
-                RecordingResult.class);
-        for (RecordingResult result : RecordingResult.values()) {
-            messages.put(result, new ArrayList<Program>());
-        }
-        for (Program program : programs) {
-            Program.RecordingResult result = program.record();
-            messages.get(result).add(program);
+
+        Set<Program> showsToRecord = new TreeSet<Program>(new Program.TimeSorter());
+        Set<Program> interestingShows = new TreeSet<Program>(new Program.TimeSorter());
+       
+        for (ProgramFilter filter : aProgramCondition) {
+            List<Program> programs = filter.apply(aGuide); 
+            switch (filter.getAction()) {
+            case RECORD: {
+                for (Program program: programs) { 
+                    showsToRecord.add(program);
+                }
+                break; 
+            }
+            case NOTIFY: { 
+                for (Program program: programs) {
+                    if ( program.isRecordingPossible()) { 
+                        interestingShows.add(program);
+                    }
+                }
+                break;
+            }
+            default: { 
+                throw new RuntimeException("Unknown action '" + filter.getAction() + "'"); 
+            }
+            }
         }
+
+        EnumMap<RecordingResult, List<Program>> messages = recordShows(showsToRecord);
+        
         String msg = "Summary of KiSS crawler: \n\n\n";
 
         for (RecordingResult result : RecordingResult.values()) {
             if (messages.get(result).size() > 0) {
                 msg += result.getDescription() + "\n\n";
                 for (Program program : messages.get(result)) {
-                    msg += program + "\n";
+                    msg += program + "\n\n";
                 }
             }
         }
-        if (programs.size() == 0) {
+        
+        if ( interestingShows.size() > 0 ) { 
+            msg += "Possibly interesting shows:\n\n"; 
+            for (Program program: interestingShows) { 
+                msg += program + "\n\n";
+            }
+        }
+        if (showsToRecord.size()  + interestingShows.size() == 0) {
             msg += "No suitable programs found";
         }
 
@@ -191,6 +216,25 @@ public class KissCrawler {
         sendMail(msg);
     }
 
+    /**
+     * Records shows.
+     * @param showsToRecord Shows to record.
+     * @return Recording results. 
+     */
+    private EnumMap<RecordingResult, List<Program>> recordShows(Set<Program> showsToRecord) {
+        EnumMap<RecordingResult, List<Program>> messages = new EnumMap<RecordingResult, List<Program>>(
+                RecordingResult.class);
+        for (RecordingResult result : RecordingResult.values()) {
+            messages.put(result, new ArrayList<Program>());
+        }
+        
+        for (Program program : showsToRecord) {
+            Program.RecordingResult result = program.record();
+            messages.get(result).add(program);
+        }
+        return messages;
+    }
+
     /**
      * Creates the crawler.
      * 
index 83531ff75100d4040f2c1572d9f44cb28ee84247..7cb86cfdeecea92ccb66b2f2a3a4eae7b5e485a8 100644 (file)
@@ -16,6 +16,8 @@
 
 package org.wamblee.crawler.kiss;
 
+import java.util.Comparator;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.wamblee.crawler.Action;
@@ -27,6 +29,16 @@ import org.wamblee.crawler.PageException;
  */
 public class Program {
     
+    public static class TimeSorter implements Comparator<Program> { 
+     
+        /* (non-Javadoc)
+         * @see java.util.Comparator#compare(T, T)
+         */
+        public int compare(Program o1, Program o2) { 
+            return o1.getInterval().getBegin().compareTo(o2.getInterval().getBegin());
+        }
+    }
+    
     private static final Log LOG = LogFactory.getLog(Program.class);
 
     /**
@@ -161,6 +173,22 @@ public class Program {
     public TimeInterval getInterval() {
         return _interval;
     }
+    
+    /**
+     * Checks if recording is possible. 
+     * @return True iff recording is possible. 
+     */
+    public boolean isRecordingPossible() { 
+        try {
+            Action record = _programInfo.execute().getAction(RECORD_ACTION);
+            if (record == null) {
+                return false; 
+            }
+            return true; 
+        } catch (PageException e) {
+            return false; 
+        }
+    }
 
     /**
      * Records the show.
@@ -202,4 +230,17 @@ public class Program {
                 + ")" + "\n"
                 + (INDENT + _description).replaceAll("\n", "\n" + INDENT);
     }
+    
+    /* (non-Javadoc)
+     * @see java.lang.Object#equals(java.lang.Object)
+     */
+    @Override
+    public boolean equals(Object obj) {
+        if ( !(obj instanceof Program)) { 
+            return false; 
+        }
+        Program program = (Program)obj; 
+        return getName().equals(program.getName()) && 
+               _programInfo.equals(program._programInfo);
+    }
 }
index 5615ced2f6973d973c4cbcc623f35fa0a5f4bbc4..b8d5c67863adcd9406c3bae2fc76d6f11f86b829 100644 (file)
@@ -21,21 +21,28 @@ import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 
+import org.dom4j.Attribute;
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
 import org.dom4j.Element;
 import org.dom4j.io.SAXReader;
+import org.wamblee.conditions.AndCondition;
 import org.wamblee.conditions.Condition;
-import org.wamblee.conditions.OrCondition;
+import org.wamblee.conditions.PropertyRegexCondition;
+import org.wamblee.crawler.kiss.ProgramFilter.ProgramAction;
 
 /**
  * Parse the configuration of desired programs.
  */
-public class ProgramConfigurationParser {
+class ProgramConfigurationParser {
 
     private static final String ELEM_PROGRAM = "program";
 
-    private static final String ELEM_PATTERN = "name";
+    private static final String ELEM_PATTERN = "match";
+
+    private static final String ELEM_ACTION = "action";
+
+    private static final String ACTION_NOTIFY = "notify";
 
     /**
      * Parses the condition used to match the desired programs.
@@ -44,21 +51,41 @@ public class ProgramConfigurationParser {
      *            Input stream to parse from.
      * @return Condition.
      */
-    Condition<Program> parse(InputStream aStream) {
+    List<ProgramFilter> parse(InputStream aStream) {
+        List<ProgramFilter> filters = new ArrayList<ProgramFilter>();
         try {
             SAXReader reader = new SAXReader();
             Document document = reader.read(aStream);
 
             Element root = document.getRootElement();
-            List<Condition<Program>> conditions = new ArrayList<Condition<Program>>();
 
             for (Iterator i = root.elementIterator(ELEM_PROGRAM); i.hasNext();) {
                 Element program = (Element) i.next();
-                String pattern = ".*" + program.element(ELEM_PATTERN).getText()
-                        + ".*";
-                conditions.add(new ProgramNameMatcher(pattern));
+
+                Element actionElem = program.element(ELEM_ACTION);
+                ProgramAction action = ProgramAction.RECORD;
+                if (actionElem != null) {
+                    if (actionElem.getText().equals(ACTION_NOTIFY)) {
+                        action = ProgramAction.NOTIFY;
+                    }
+                }
+                List<Condition<Program>> regexConditions = 
+                    new ArrayList<Condition<Program>>();
+                for (Iterator j = program.elementIterator(ELEM_PATTERN); j.hasNext(); ) {
+                    Element patternElem = (Element)j.next();
+                    String fieldName = "name"; 
+                    Attribute fieldAttribute = patternElem.attribute("field"); 
+                    if ( fieldAttribute != null ) { 
+                        fieldName = fieldAttribute.getText(); 
+                    }
+                    String pattern = ".*" + patternElem.getText()
+                    + ".*";
+                    regexConditions.add(new PropertyRegexCondition<Program>(fieldName, pattern, true));
+                }
+                Condition<Program> condition = new AndCondition<Program>(regexConditions);
+                filters.add(new ProgramFilter(condition, action));
             }
-            return new OrCondition<Program>(conditions);
+            return filters;
         } catch (DocumentException e) {
             throw new RuntimeException("Error parsing program configuraiton", e);
         }
diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramFilter.java b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramFilter.java
new file mode 100644 (file)
index 0000000..3f65847
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2005 the original author or authors.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */ 
+
+package org.wamblee.crawler.kiss;
+
+import java.util.List;
+
+import org.wamblee.conditions.Condition;
+
+
+/**
+ * Obtains a list of interesting programs from a TV guide and decides
+ * what to do with them.  
+ */
+public class ProgramFilter {
+    
+    public enum ProgramAction { RECORD, NOTIFY }; 
+
+    private Condition<Program> _condition; 
+    
+    private ProgramAction _action; 
+    
+    public ProgramFilter(Condition<Program> aCondition, ProgramAction aAction) { 
+        _condition = aCondition; 
+        _action = aAction; 
+    }
+    
+    public ProgramAction getAction() { 
+        return _action;
+    }
+    
+    public List<Program> apply(TVGuide aGuide) { 
+        MatchVisitor matcher = new MatchVisitor(_condition);
+        aGuide.accept(matcher);
+        return matcher.getMatches(); 
+    }
+}
index 6679223bf3af77d14d3d462ca6f9a0e9e61bac88..16cae4f08d176ef3628dbf20299b9a65c7757825 100644 (file)
@@ -20,29 +20,32 @@ import java.text.DecimalFormat;
 import java.text.NumberFormat;
 
 /**
- * TIme at which a program starts or ends. 
+ * TIme at which a program starts or ends.
  */
-public class Time {
+public class Time implements Comparable {
 
     /**
-     * Number of seconds per minute. 
+     * Number of seconds per minute.
      */
     private static final double SECONDS_PER_MINUTE = 60.0;
 
     /**
-     * Hour of the time. 
+     * Hour of the time.
      */
     private int _hour;
 
     /**
-     * Minute of the hour. 
+     * Minute of the hour.
      */
     private int _minute;
 
     /**
-     * Constructs the time. 
-     * @param aHour Hour. 
-     * @param aMinute Minute. 
+     * Constructs the time.
+     * 
+     * @param aHour
+     *            Hour.
+     * @param aMinute
+     *            Minute.
      */
     public Time(int aHour, int aMinute) {
         _hour = aHour;
@@ -50,8 +53,9 @@ public class Time {
     }
 
     /**
-     * Gets the hour. 
-     * @return Hour. 
+     * Gets the hour.
+     * 
+     * @return Hour.
      */
     public int getHour() {
         return _hour;
@@ -59,7 +63,8 @@ public class Time {
 
     /**
      * Gets te minute.
-     * @return Minute. 
+     * 
+     * @return Minute.
      */
     public int getMinute() {
         return _minute;
@@ -77,10 +82,43 @@ public class Time {
     }
 
     /**
-     * Convert time to floating point value. Useful for comparing two times. 
-     * @return Converted value. 
+     * Convert time to floating point value. Useful for comparing two times.
+     * 
+     * @return Converted value.
      */
     float asFloat() {
         return (float) _hour + (float) _minute / (float) SECONDS_PER_MINUTE;
     }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see java.lang.Object#equals(java.lang.Object)
+     */
+    @Override
+    public boolean equals(Object obj) {
+        if ( !(obj instanceof Time )) { 
+            return false; 
+        }
+        return toString().equals(obj.toString());
+    }
+    
+    /* (non-Javadoc)
+     * @see java.lang.Comparable#compareTo(T)
+     */
+    public int compareTo(Object o) {
+        if ( !(o instanceof Time)) { 
+            throw new RuntimeException("object not an instance of Time"); 
+        }
+        Time time = (Time)o; 
+        return new Float(asFloat()).compareTo(new Float(time.asFloat()));
+    }
+    
+    /* (non-Javadoc)
+     * @see java.lang.Object#hashCode()
+     */
+    @Override
+    public int hashCode() {
+        return toString().hashCode();
+    }
 }
index 0dbd570b70f6481190c66330b3d06e8b8e0863e7..d56e54e17cb65ebcf374494a2c676acb1a3268de 100644 (file)
@@ -101,4 +101,23 @@ public class TimeInterval {
     boolean isUncertain() {
         return _begin.asFloat() > _end.asFloat();
     }
+    
+    /* (non-Javadoc)
+     * @see java.lang.Object#equals(java.lang.Object)j
+     */
+    @Override
+    public boolean equals(Object obj) {
+        if ( !(obj instanceof TimeInterval)) { 
+            return false; 
+        } 
+        return obj.toString().equals(obj.toString());
+    }
+    
+    /* (non-Javadoc)
+     * @see java.lang.Object#hashCode()
+     */
+    @Override
+    public int hashCode() {
+        return _begin.hashCode();
+    }
 }
index 48df7995cd0222547b7a52a8c6dbb4acb6f843d0..60726dcc540b205f012e7baf79c3410bfbda1c63 100644 (file)
@@ -38,14 +38,21 @@ public class PropertyRegexCondition<T> implements Condition<T> {
      */
     private Pattern _regex;
     
+    /**
+     * Whether or not to convert the value to lowercase before matching. 
+     */
+    private boolean _tolower; 
+    
     /**
      * Constructs the condition. 
      * @param aProperty Name of the property to examine. 
      * @param aRegex Regular expression to use. 
+     * @param aTolower Whether or not to convert the value to lowercase before matching. 
      */
-    public PropertyRegexCondition(String aProperty, String aRegex) {
+    public PropertyRegexCondition(String aProperty, String aRegex, boolean aTolower) {
         _property = aProperty;
         _regex = Pattern.compile(aRegex);
+        _tolower = aTolower;
     }
 
     /* (non-Javadoc)
@@ -54,6 +61,9 @@ public class PropertyRegexCondition<T> implements Condition<T> {
     public boolean matches(T aObject) {
         try {
             String value = PropertyUtils.getProperty(aObject, _property) + "";
+            if ( _tolower ) { 
+                value = value.toLowerCase(); 
+            }
             Matcher matcher = _regex.matcher(value); 
             return matcher.matches(); 
         } catch (IllegalAccessException e) {