From: Erik Brakkee Date: Sun, 19 Mar 2006 00:44:19 +0000 (+0000) Subject: (no commit message) X-Git-Tag: wamblee-utils-0.7~1110 X-Git-Url: http://wamblee.org/gitweb/?a=commitdiff_plain;h=a641296cc35afce34a55bab521150446ea896798;p=utils --- diff --git a/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java b/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java index 5633078c..9f985dc4 100644 --- a/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java +++ b/crawler/basic/src/org/wamblee/crawler/impl/CrawlerImpl.java @@ -36,8 +36,6 @@ public class CrawlerImpl implements Crawler { private static final Log LOG = LogFactory.getLog(CrawlerImpl.class); - private static final int MAX_DELAY = 5000; - private HttpClient _client; private Configuration _config; diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/InterestingProgramAction.java b/crawler/kiss/src/org/wamblee/crawler/kiss/InterestingProgramAction.java new file mode 100644 index 00000000..5cb9925f --- /dev/null +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/InterestingProgramAction.java @@ -0,0 +1,48 @@ +/* + * Copyright 2005 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.wamblee.crawler.kiss; + +/** + * Represents an action to execute for an interesting program. + */ +public class InterestingProgramAction implements ProgramAction { + + /** + * Category under which the interesting program is listed. + */ + private String _category; + + /** + * Constructs the action. + * + * @param aCategory + * Category of the program. Useful for structuring the output. + */ + public InterestingProgramAction(String aCategory) { + _category = aCategory; + } + + /* + * (non-Javadoc) + * + * @see org.wamblee.crawler.kiss.ProgramAction#execute(org.wamblee.crawler.kiss.Program, + * org.wamblee.crawler.kiss.Report) + */ + public void execute(Program aProgram, ProgramActionExecutor aReport) { + aReport.interestingProgram(_category, aProgram); + } +} diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java b/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java index d302fb6e..5922cc42 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java @@ -25,7 +25,6 @@ import java.io.InputStream; import java.io.PrintStream; import java.util.ArrayList; import java.util.Date; -import java.util.EnumMap; import java.util.List; import java.util.Properties; import java.util.Set; @@ -50,7 +49,6 @@ import org.wamblee.crawler.Page; import org.wamblee.crawler.PageException; import org.wamblee.crawler.impl.ConfigurationParser; import org.wamblee.crawler.impl.CrawlerImpl; -import org.wamblee.crawler.kiss.Program.RecordingResult; /** * The KiSS crawler for automatic recording of interesting TV shows. @@ -141,9 +139,7 @@ public class KissCrawler { TVGuide guide = createGuide(page); PrintVisitor printer = new PrintVisitor(System.out); guide.accept(printer); - - - recordInterestingShows(programFilters, guide); + processResults(programFilters, guide); } finally { os.flush(); os.close(); @@ -161,81 +157,22 @@ public class KissCrawler { * @throws MessagingException * In case of problems sending a summary mail. */ - private void recordInterestingShows(List aProgramCondition, + private void processResults(List aProgramCondition, TVGuide aGuide) throws MessagingException { - - Set showsToRecord = new TreeSet(new Program.TimeSorter()); - Set interestingShows = new TreeSet(new Program.TimeSorter()); - + ProgramActionExecutor executor = new ProgramActionExecutor(); for (ProgramFilter filter : aProgramCondition) { - List programs = filter.apply(aGuide); - switch (filter.getAction()) { - case RECORD: { - for (Program program: programs) { - showsToRecord.add(program); - } - break; - } - case NOTIFY: { - for (Program program: programs) { - if ( program.isRecordingPossible()) { - interestingShows.add(program); - } - } - break; - } - default: { - throw new RuntimeException("Unknown action '" + filter.getAction() + "'"); - } + List programs = filter.apply(aGuide); + ProgramAction action = filter.getAction(); + for (Program program: programs) { + action.execute(program, executor); } } - - EnumMap> messages = recordShows(showsToRecord); - - String msg = "Summary of KiSS crawler: \n\n\n"; - - for (RecordingResult result : RecordingResult.values()) { - if (messages.get(result).size() > 0) { - msg += result.getDescription() + "\n\n"; - for (Program program : messages.get(result)) { - msg += program + "\n\n"; - } - } - } - - if ( interestingShows.size() > 0 ) { - msg += "Possibly interesting shows:\n\n"; - for (Program program: interestingShows) { - msg += program + "\n\n"; - } - } - if (showsToRecord.size() + interestingShows.size() == 0) { - msg += "No suitable programs found"; - } - + executor.commit(); + String msg = executor.getReport(); System.out.println(msg); sendMail(msg); } - /** - * Records shows. - * @param showsToRecord Shows to record. - * @return Recording results. - */ - private EnumMap> recordShows(Set showsToRecord) { - EnumMap> messages = new EnumMap>( - RecordingResult.class); - for (RecordingResult result : RecordingResult.values()) { - messages.put(result, new ArrayList()); - } - - for (Program program : showsToRecord) { - Program.RecordingResult result = program.record(); - messages.get(result).add(program); - } - return messages; - } - /** * Creates the crawler. * diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramAction.java b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramAction.java new file mode 100644 index 00000000..e7395b58 --- /dev/null +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramAction.java @@ -0,0 +1,30 @@ +/* + * Copyright 2005 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.wamblee.crawler.kiss; + +/** + * Represents an action configured for a program. + */ +public interface ProgramAction { + + /** + * Executes the action. + * @param aProgram Program to execute the action for. + * @param aReport Report to use. + */ + void execute(Program aProgram, ProgramActionExecutor aReport); +} diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramActionExecutor.java b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramActionExecutor.java new file mode 100644 index 00000000..3865dcc8 --- /dev/null +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramActionExecutor.java @@ -0,0 +1,139 @@ +/* + * Copyright 2005 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.wamblee.crawler.kiss; + +import java.util.ArrayList; +import java.util.EnumMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.wamblee.crawler.kiss.Program.RecordingResult; + +/** + * Provides execution of actions for programs. Actions use + * this class to tell the executor what to do. The executor then decide + * on exactly what to do and in what order and makes decisions in case + * of conflicts. + */ +public class ProgramActionExecutor { + + /** + * A map of category name to a set of program. Useful for displaying the output of + * possibly interesting programs on a per category basis. + */ + private Map> _interestingShows; + + /** + * Set of programs to record. + */ + private Set _showsToRecord; + + /** + * Map or recording result to a set of programs. + */ + private EnumMap> _recordings; + + /** + * Constructs the program action executor. + * + */ + public ProgramActionExecutor() { + _interestingShows = new TreeMap>(); + _showsToRecord = new TreeSet(new Program.TimeSorter()); + _recordings = new EnumMap>( + RecordingResult.class); + for (RecordingResult result : RecordingResult.values()) { + _recordings.put(result, new TreeSet(new Program.TimeSorter())); + } + } + + /** + * Called by an action to indicate the desire to record a program. + * @param aPriority Priority of the program. Used to resolve conflicts. + * @param aProgram Program to record. + */ + public void recordProgram(int aPriority, Program aProgram) { + _showsToRecord.add(aProgram); + } + + /** + * Called by an action to indicate that a program is interesting. + * @param aCategory Category of the program. + * @param aProgram Program. + */ + public void interestingProgram(String aCategory, Program aProgram) { + Set programs = _interestingShows.get(aCategory); + if ( programs == null ) { + programs = new TreeSet(new Program.TimeSorter()); + _interestingShows.put(aCategory, programs); + } + programs.add(aProgram); + } + + /** + * Makes sure that the actions are performed. + * + */ + public void commit() { + for (Program program: _showsToRecord) { + RecordingResult result = program.record(); + _recordings.get(result).add(program); + } + } + + /** + * Gets the report describing what was done. + * @return Report. + */ + public String getReport() { + StringBuffer msg = new StringBuffer("Summary of KiSS crawler: \n\n\n"); + + boolean printed = false; + + for (RecordingResult result : RecordingResult.values()) { + if (_recordings.get(result).size() > 0) { + msg.append(result.getDescription() + "\n\n"); + for (Program program : _recordings.get(result)) { + msg.append(program + "\n\n"); + printed = true; + } + } + } + + if ( _interestingShows.size() > 0 ) { + msg.append("Possibly interesting shows:\n\n"); + for (String category: _interestingShows.keySet()) { + if ( category.length() > 0 ) { + msg.append("Category: " + category + "\n\n"); + } + for (Program program: _interestingShows.get(category)) { + msg.append(program + "\n\n"); + printed = true; + } + } + + } + if (!printed) { + msg.append("No suitable programs found"); + } + + return msg.toString(); + } +} diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java index 2c05b827..5d9f578d 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java @@ -29,7 +29,6 @@ import org.dom4j.io.SAXReader; import org.wamblee.conditions.AndCondition; import org.wamblee.conditions.Condition; import org.wamblee.conditions.PropertyRegexCondition; -import org.wamblee.crawler.kiss.ProgramFilter.ProgramAction; /** * Parse the configuration of desired programs. @@ -63,10 +62,10 @@ class ProgramConfigurationParser { Element program = (Element) i.next(); Element actionElem = program.element(ELEM_ACTION); - ProgramAction action = ProgramAction.RECORD; + ProgramAction action = new RecordProgramAction(); if (actionElem != null) { if (actionElem.getText().equals(ACTION_NOTIFY)) { - action = ProgramAction.NOTIFY; + action = new InterestingProgramAction(""); } } List> regexConditions = diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramFilter.java b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramFilter.java index 3f65847b..5f66e0bd 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramFilter.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramFilter.java @@ -27,8 +27,6 @@ import org.wamblee.conditions.Condition; */ public class ProgramFilter { - public enum ProgramAction { RECORD, NOTIFY }; - private Condition _condition; private ProgramAction _action; diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramNameMatcher.java b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramNameMatcher.java deleted file mode 100644 index 867d5b38..00000000 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramNameMatcher.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss; - -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.wamblee.conditions.Condition; - -/** - * Match programs based on a regular expression for the name in lower case. - */ -public class ProgramNameMatcher implements Condition { - - /** - * Pattern which describes interesting programs. - */ - private Pattern _pattern; - - /** - * Constructs the matcher. - * @param aPattern Pattern that describes interesting programs. - */ - public ProgramNameMatcher(String aPattern) { - _pattern = Pattern.compile(aPattern); - } - - /** - * Determines if the program name matches. - * @param aProgram Program. - * @return True iff the program name matches. - */ - public boolean matches(Program aProgram) { - Matcher matcher = _pattern.matcher(aProgram.getName().toLowerCase()); - return matcher.matches(); - } -} diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/RecordProgramAction.java b/crawler/kiss/src/org/wamblee/crawler/kiss/RecordProgramAction.java new file mode 100644 index 00000000..ac784f67 --- /dev/null +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/RecordProgramAction.java @@ -0,0 +1,43 @@ +/* + * Copyright 2005 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.wamblee.crawler.kiss; + +import org.wamblee.crawler.kiss.Program.RecordingResult; + +/** + * Represents an action to record a program. + */ +public class RecordProgramAction implements ProgramAction { + + private int _priority; + + /** + * Constructs the action. + * + */ + public void ReportProgramAction(int aPriority) { + _priority = aPriority; + } + + /* (non-Javadoc) + * @see org.wamblee.crawler.kiss.ProgramAction#execute(org.wamblee.crawler.kiss.Program, org.wamblee.crawler.kiss.Report) + */ + public void execute(Program aProgram, ProgramActionExecutor aReport) { + aReport.recordProgram(_priority, aProgram); + } + +}