import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Date;
+import java.util.EnumMap;
import java.util.List;
import java.util.Properties;
import java.util.regex.Matcher;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.dom4j.Element;
import org.wamblee.conditions.Condition;
import org.wamblee.crawler.Action;
import org.wamblee.crawler.Configuration;
import org.wamblee.crawler.PageException;
import org.wamblee.crawler.impl.ConfigurationParser;
import org.wamblee.crawler.impl.CrawlerImpl;
+import org.wamblee.crawler.kiss.Program.RecordingResult;
/**
- * The KiSS crawler for automatic recording of interesting TV shows.
+ * The KiSS crawler for automatic recording of interesting TV shows.
*
*/
public class KissCrawler {
private static final Log LOG = LogFactory.getLog(KissCrawler.class);
/**
- * Log file name for the crawler.
+ * Log file name for the crawler.
*/
private static final String LOG_FILE = "kiss.log";
/**
- * Start URL of the electronic programme guide.
+ * Start URL of the electronic programme guide.
*/
private static final String START_URL = "http://epg.kml.kiss-technology.com/login_core.php";
/**
- * Crawler configuration file.
+ * Crawler configuration file.
*/
private static final String CRAWLER_CONFIG = "config.xml";
/**
- * Configuration file describing interesting programs.
+ * Configuration file describing interesting programs.
*/
private static final String PROGRAM_CONFIG = "programs.xml";
/**
- * Regular expression for matching time interval strings in the
- * retrieved pages.
+ * Regular expression for matching time interval strings in the retrieved
+ * pages.
*/
private static final String TIME_REGEX = "([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*";
/**
- * Compiled pattern for the time regular expression.
+ * Compiled pattern for the time regular expression.
*/
private Pattern _pattern;
/**
- * Runs the KiSS crawler.
- * @param aArgs Arguments, currently all ignored because they are hardcoded.
- * @throws Exception In case of problems.
+ * Runs the KiSS crawler.
+ *
+ * @param aArgs
+ * Arguments, currently all ignored because they are hardcoded.
+ * @throws Exception
+ * In case of problems.
*/
public static void main(String[] aArgs) throws Exception {
new KissCrawler(START_URL, CRAWLER_CONFIG, PROGRAM_CONFIG);
}
-
+
/**
- * Constructs the crawler. This retrieves the TV guide by crawling the
- * KiSS EPG guide, filters the guide for interesting programs, tries to
- * record them, and sends a summary mail to the user.
- * @param aStartUrl Start URL of the electronic programme guide.
- * @param aCrawlerConfig Configuration file for the crawler.
- * @param aProgramConfig Configuration file describing interesting shows.
- * @throws IOException In case of problems reading files.
- * @throws MessagingException In case of problems sending a mail notification.
+ * Constructs the crawler. This retrieves the TV guide by crawling the KiSS
+ * EPG guide, filters the guide for interesting programs, tries to record
+ * them, and sends a summary mail to the user.
+ *
+ * @param aStartUrl
+ * Start URL of the electronic programme guide.
+ * @param aCrawlerConfig
+ * Configuration file for the crawler.
+ * @param aProgramConfig
+ * Configuration file describing interesting shows.
+ * @throws IOException
+ * In case of problems reading files.
+ * @throws MessagingException
+ * In case of problems sending a mail notification.
*/
public KissCrawler(String aStartUrl, String aCrawlerConfig,
String aProgramConfig) throws IOException, MessagingException {
try {
HttpClient client = new HttpClient();
- client.getHostConfiguration().setProxy("127.0.0.1", 3128);
+ // client.getHostConfiguration().setProxy("127.0.0.1", 3128);
Crawler crawler = createCrawler(aCrawlerConfig, os, client);
}
/**
- * Records interesting shows.
- * @param aProgramCondition Condition determining which shows are interesting.
- * @param aGuide Television guide.
- * @throws MessagingException In case of problems sending a summary mail.
+ * Records interesting shows.
+ *
+ * @param aProgramCondition
+ * Condition determining which shows are interesting.
+ * @param aGuide
+ * Television guide.
+ * @throws MessagingException
+ * In case of problems sending a summary mail.
*/
private void recordInterestingShows(Condition<Program> aProgramCondition,
TVGuide aGuide) throws MessagingException {
MatchVisitor matcher = new MatchVisitor(aProgramCondition);
aGuide.accept(matcher);
List<Program> programs = matcher.getMatches();
- String recorded = "";
- String notRecorded = "";
- String failures = "";
+ EnumMap<RecordingResult, List<Program>> messages = new EnumMap<RecordingResult, List<Program>>(
+ RecordingResult.class);
+ for (RecordingResult result: RecordingResult.values()) {
+ messages.put(result, new ArrayList<Program>());
+ }
for (Program program : programs) {
try {
- boolean result = program.record();
- if (result) {
- recorded += "\n" + program;
- } else {
- notRecorded += "\n" + program;
- }
+ Program.RecordingResult result = program.record();
+ messages.get(result).add(program);
} catch (PageException e) {
LOG.info("Attempt to record " + program + " failed.");
- failures += "\n" + program.toString() + ": " + e.getMessage();
+ messages.get(RecordingResult.ERROR).add(program);
}
}
String msg = "Summary of KiSS crawler: \n\n\n";
-
- if (recorded.length() > 0) {
- msg += "Recorded programs:\n\n" + recorded + "\n\n";
- }
- if (notRecorded.length() > 0) {
- msg += "Not recorded programs:\n\n" + notRecorded + "\n\n";
- }
- if (recorded.length() == 0 && notRecorded.length() == 0) {
- msg += "No suitable programs found";
+
+
+ for (RecordingResult result: RecordingResult.values()) {
+ if ( messages.get(result).size() > 0 ) {
+ msg += result.getDescription() + "\n\n";
+ for (Program program: messages.get(result)) {
+ msg += program + "\n";
+ }
+ }
}
- if (failures.length() > 0) {
- msg += "Failures:\n\n" + failures;
+ if ( programs.size() == 0 ) {
+ msg += "No suitable programs found";
}
+
System.out.println(msg);
sendMail(msg);
}
/**
- * Creates the crawler.
- * @param aCrawlerConfig Crawler configuration file.
- * @param aOs Logging output stream for the crawler.
- * @param aClient HTTP Client to use.
- * @return Crawler.
- * @throws FileNotFoundException In case configuration files cannot be found.
+ * Creates the crawler.
+ *
+ * @param aCrawlerConfig
+ * Crawler configuration file.
+ * @param aOs
+ * Logging output stream for the crawler.
+ * @param aClient
+ * HTTP Client to use.
+ * @return Crawler.
+ * @throws FileNotFoundException
+ * In case configuration files cannot be found.
*/
private Crawler createCrawler(String aCrawlerConfig, PrintStream aOs,
HttpClient aClient) throws FileNotFoundException {
}
/**
- * Gets the start page of the electronic programme guide. This involves login and
- * navigation to a suitable start page after logging in.
- * @param aStartUrl URL of the electronic programme guide.
- * @param aCrawler Crawler to use.
- * @return Starting page.
+ * Gets the start page of the electronic programme guide. This involves
+ * login and navigation to a suitable start page after logging in.
+ *
+ * @param aStartUrl
+ * URL of the electronic programme guide.
+ * @param aCrawler
+ * Crawler to use.
+ * @return Starting page.
*/
private Page getStartPage(String aStartUrl, Crawler aCrawler) {
try {
}
/**
- * Creates the TV guide by web crawling.
- * @param aPage Starting page.
- * @return TV guide.
+ * Creates the TV guide by web crawling.
+ *
+ * @param aPage
+ * Starting page.
+ * @return TV guide.
*/
private TVGuide createGuide(Page aPage) {
LOG.info("Obtaining full TV guide");
Channel channel = createChannel(action.getName(), action
.execute().getAction("right-now").execute());
channels.add(channel);
+ if ( SystemProperties.isDebugMode() ) {
+ break; // Only one channel is crawled.
+ }
} catch (PageException e) {
LOG.error("Could not create channel information for '"
+ action.getName() + "'", e);
}
/**
- * Create channel information for a specific channel.
- * @param aChannel Channel name.
- * @param aPage Starting page for the channel.
- * @return Channel.
+ * Create channel information for a specific channel.
+ *
+ * @param aChannel
+ * Channel name.
+ * @param aPage
+ * Starting page for the channel.
+ * @return Channel.
*/
private Channel createChannel(String aChannel, Page aPage) {
LOG.info("Obtaining program for " + aChannel);
Time end = new Time(Integer.parseInt(matcher.group(3)), Integer
.parseInt(matcher.group(4)));
TimeInterval interval = new TimeInterval(begin, end);
- // Page programInfo = action.execute();
- // String description =
- // programInfo.getContent().element("description").getText().trim();
- // String keywords =
- // programInfo.getContent().element("keywords").getText().trim();
String description = "";
String keywords = "";
+ if (!SystemProperties.isNoProgramDetailsRequired()) {
+ try {
+ Page programInfo = action.execute();
+ description = programInfo.getContent().element(
+ "description").getText().trim();
+ keywords = programInfo.getContent().element("keywords")
+ .getText().trim();
+ } catch (PageException e) {
+ LOG
+ .warn("Program details coul dnot be determined for '"
+ + action.getName() + "'");
+ }
+ }
Program program = new Program(aChannel, action.getName(),
description, keywords, interval, action);
}
/**
- * Sends a summary mail to the user.
- * @param aText Text of the mail.
- * @throws MessagingException In case of problems sending mail.
+ * Sends a summary mail to the user.
+ *
+ * @param aText
+ * Text of the mail.
+ * @throws MessagingException
+ * In case of problems sending mail.
*/
private void sendMail(String aText) throws MessagingException {
Properties props = new Properties();
message.setText(aText);
Transport.send(message);
}
+
}
--- /dev/null
+/*
+ * Copyright 2005 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wamblee.conditions;
+
+import java.lang.reflect.InvocationTargetException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.beanutils.PropertyUtils;
+
+/**
+ * Condition to check whether a given property value matches a certain
+ * regular expression.
+ */
+public class PropertyRegexCondition<T> implements Condition<T> {
+
+ /**
+ * Property name.
+ */
+ private String _property;
+
+ /**
+ * Regular expression.
+ */
+ private Pattern _regex;
+
+ /**
+ * Constructs the condition.
+ * @param aProperty Name of the property to examine.
+ * @param aRegex Regular expression to use.
+ */
+ public PropertyRegexCondition(String aProperty, String aRegex) {
+ _property = aProperty;
+ _regex = Pattern.compile(aRegex);
+ }
+
+ /* (non-Javadoc)
+ * @see org.wamblee.conditions.Condition#matches(T)
+ */
+ public boolean matches(T aObject) {
+ try {
+ String value = PropertyUtils.getProperty(aObject, _property) + "";
+ Matcher matcher = _regex.matcher(value);
+ return matcher.matches();
+ } catch (IllegalAccessException e) {
+ throw new RuntimeException(e.getMessage(), e);
+ } catch (InvocationTargetException e) {
+ throw new RuntimeException(e.getMessage(), e);
+ } catch (NoSuchMethodException e) {
+ throw new RuntimeException(e.getMessage(), e);
+ }
+ }
+}