X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fkiss%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2FKissCrawler.java;h=2f47c695a7a747b4ad00fa870d4b4923726f7a1a;hb=b71170b295ce9b576b62c2e325bc0f787c5a0826;hp=fc076a5abaf7294e4fc96d747ee39674955d999e;hpb=8f086d32cd6945b54c00eeb6b701b272304a3287;p=utils diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java b/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java index fc076a5a..2f47c695 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java @@ -25,6 +25,7 @@ import java.io.InputStream; import java.io.PrintStream; import java.util.ArrayList; import java.util.Date; +import java.util.EnumMap; import java.util.List; import java.util.Properties; import java.util.regex.Matcher; @@ -40,7 +41,6 @@ import javax.mail.internet.MimeMessage; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.dom4j.Element; import org.wamblee.conditions.Condition; import org.wamblee.crawler.Action; import org.wamblee.crawler.Configuration; @@ -49,9 +49,10 @@ import org.wamblee.crawler.Page; import org.wamblee.crawler.PageException; import org.wamblee.crawler.impl.ConfigurationParser; import org.wamblee.crawler.impl.CrawlerImpl; +import org.wamblee.crawler.kiss.Program.RecordingResult; /** - * The KiSS crawler for automatic recording of interesting TV shows. + * The KiSS crawler for automatic recording of interesting TV shows. * */ public class KissCrawler { @@ -59,54 +60,63 @@ public class KissCrawler { private static final Log LOG = LogFactory.getLog(KissCrawler.class); /** - * Log file name for the crawler. + * Log file name for the crawler. */ private static final String LOG_FILE = "kiss.log"; /** - * Start URL of the electronic programme guide. + * Start URL of the electronic programme guide. */ private static final String START_URL = "http://epg.kml.kiss-technology.com/login_core.php"; /** - * Crawler configuration file. + * Crawler configuration file. */ private static final String CRAWLER_CONFIG = "config.xml"; /** - * Configuration file describing interesting programs. + * Configuration file describing interesting programs. */ private static final String PROGRAM_CONFIG = "programs.xml"; /** - * Regular expression for matching time interval strings in the - * retrieved pages. + * Regular expression for matching time interval strings in the retrieved + * pages. */ private static final String TIME_REGEX = "([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*"; /** - * Compiled pattern for the time regular expression. + * Compiled pattern for the time regular expression. */ private Pattern _pattern; /** - * Runs the KiSS crawler. - * @param aArgs Arguments, currently all ignored because they are hardcoded. - * @throws Exception In case of problems. + * Runs the KiSS crawler. + * + * @param aArgs + * Arguments, currently all ignored because they are hardcoded. + * @throws Exception + * In case of problems. */ public static void main(String[] aArgs) throws Exception { new KissCrawler(START_URL, CRAWLER_CONFIG, PROGRAM_CONFIG); } - + /** - * Constructs the crawler. This retrieves the TV guide by crawling the - * KiSS EPG guide, filters the guide for interesting programs, tries to - * record them, and sends a summary mail to the user. - * @param aStartUrl Start URL of the electronic programme guide. - * @param aCrawlerConfig Configuration file for the crawler. - * @param aProgramConfig Configuration file describing interesting shows. - * @throws IOException In case of problems reading files. - * @throws MessagingException In case of problems sending a mail notification. + * Constructs the crawler. This retrieves the TV guide by crawling the KiSS + * EPG guide, filters the guide for interesting programs, tries to record + * them, and sends a summary mail to the user. + * + * @param aStartUrl + * Start URL of the electronic programme guide. + * @param aCrawlerConfig + * Configuration file for the crawler. + * @param aProgramConfig + * Configuration file describing interesting shows. + * @throws IOException + * In case of problems reading files. + * @throws MessagingException + * In case of problems sending a mail notification. */ public KissCrawler(String aStartUrl, String aCrawlerConfig, String aProgramConfig) throws IOException, MessagingException { @@ -118,7 +128,7 @@ public class KissCrawler { try { HttpClient client = new HttpClient(); - client.getHostConfiguration().setProxy("127.0.0.1", 3128); + // client.getHostConfiguration().setProxy("127.0.0.1", 3128); Crawler crawler = createCrawler(aCrawlerConfig, os, client); @@ -140,57 +150,59 @@ public class KissCrawler { } /** - * Records interesting shows. - * @param aProgramCondition Condition determining which shows are interesting. - * @param aGuide Television guide. - * @throws MessagingException In case of problems sending a summary mail. + * Records interesting shows. + * + * @param aProgramCondition + * Condition determining which shows are interesting. + * @param aGuide + * Television guide. + * @throws MessagingException + * In case of problems sending a summary mail. */ private void recordInterestingShows(Condition aProgramCondition, TVGuide aGuide) throws MessagingException { MatchVisitor matcher = new MatchVisitor(aProgramCondition); aGuide.accept(matcher); List programs = matcher.getMatches(); - String recorded = ""; - String notRecorded = ""; - String failures = ""; + EnumMap> messages = new EnumMap>( + RecordingResult.class); + for (RecordingResult result : RecordingResult.values()) { + messages.put(result, new ArrayList()); + } for (Program program : programs) { - try { - boolean result = program.record(); - if (result) { - recorded += "\n" + program; - } else { - notRecorded += "\n" + program; - } - } catch (PageException e) { - LOG.info("Attempt to record " + program + " failed."); - failures += "\n" + program.toString() + ": " + e.getMessage(); - } + Program.RecordingResult result = program.record(); + messages.get(result).add(program); } String msg = "Summary of KiSS crawler: \n\n\n"; - if (recorded.length() > 0) { - msg += "Recorded programs:\n\n" + recorded + "\n\n"; - } - if (notRecorded.length() > 0) { - msg += "Not recorded programs:\n\n" + notRecorded + "\n\n"; + for (RecordingResult result : RecordingResult.values()) { + if (messages.get(result).size() > 0) { + msg += result.getDescription() + "\n\n"; + for (Program program : messages.get(result)) { + msg += program + "\n"; + } + } } - if (recorded.length() == 0 && notRecorded.length() == 0) { + if (programs.size() == 0) { msg += "No suitable programs found"; } - if (failures.length() > 0) { - msg += "Failures:\n\n" + failures; - } + System.out.println(msg); sendMail(msg); } /** - * Creates the crawler. - * @param aCrawlerConfig Crawler configuration file. - * @param aOs Logging output stream for the crawler. - * @param aClient HTTP Client to use. - * @return Crawler. - * @throws FileNotFoundException In case configuration files cannot be found. + * Creates the crawler. + * + * @param aCrawlerConfig + * Crawler configuration file. + * @param aOs + * Logging output stream for the crawler. + * @param aClient + * HTTP Client to use. + * @return Crawler. + * @throws FileNotFoundException + * In case configuration files cannot be found. */ private Crawler createCrawler(String aCrawlerConfig, PrintStream aOs, HttpClient aClient) throws FileNotFoundException { @@ -203,11 +215,14 @@ public class KissCrawler { } /** - * Gets the start page of the electronic programme guide. This involves login and - * navigation to a suitable start page after logging in. - * @param aStartUrl URL of the electronic programme guide. - * @param aCrawler Crawler to use. - * @return Starting page. + * Gets the start page of the electronic programme guide. This involves + * login and navigation to a suitable start page after logging in. + * + * @param aStartUrl + * URL of the electronic programme guide. + * @param aCrawler + * Crawler to use. + * @return Starting page. */ private Page getStartPage(String aStartUrl, Crawler aCrawler) { try { @@ -220,9 +235,11 @@ public class KissCrawler { } /** - * Creates the TV guide by web crawling. - * @param aPage Starting page. - * @return TV guide. + * Creates the TV guide by web crawling. + * + * @param aPage + * Starting page. + * @return TV guide. */ private TVGuide createGuide(Page aPage) { LOG.info("Obtaining full TV guide"); @@ -234,6 +251,9 @@ public class KissCrawler { Channel channel = createChannel(action.getName(), action .execute().getAction("right-now").execute()); channels.add(channel); + if (SystemProperties.isDebugMode()) { + break; // Only one channel is crawled. + } } catch (PageException e) { LOG.error("Could not create channel information for '" + action.getName() + "'", e); @@ -243,10 +263,13 @@ public class KissCrawler { } /** - * Create channel information for a specific channel. - * @param aChannel Channel name. - * @param aPage Starting page for the channel. - * @return Channel. + * Create channel information for a specific channel. + * + * @param aChannel + * Channel name. + * @param aPage + * Starting page for the channel. + * @return Channel. */ private Channel createChannel(String aChannel, Page aPage) { LOG.info("Obtaining program for " + aChannel); @@ -261,13 +284,21 @@ public class KissCrawler { Time end = new Time(Integer.parseInt(matcher.group(3)), Integer .parseInt(matcher.group(4))); TimeInterval interval = new TimeInterval(begin, end); - // Page programInfo = action.execute(); - // String description = - // programInfo.getContent().element("description").getText().trim(); - // String keywords = - // programInfo.getContent().element("keywords").getText().trim(); String description = ""; String keywords = ""; + if (!SystemProperties.isNoProgramDetailsRequired()) { + try { + Page programInfo = action.execute(); + description = programInfo.getContent().element( + "description").getText().trim(); + keywords = programInfo.getContent().element("keywords") + .getText().trim(); + } catch (PageException e) { + LOG + .warn("Program details coul dnot be determined for '" + + action.getName() + "'"); + } + } Program program = new Program(aChannel, action.getName(), description, keywords, interval, action); @@ -279,9 +310,12 @@ public class KissCrawler { } /** - * Sends a summary mail to the user. - * @param aText Text of the mail. - * @throws MessagingException In case of problems sending mail. + * Sends a summary mail to the user. + * + * @param aText + * Text of the mail. + * @throws MessagingException + * In case of problems sending mail. */ private void sendMail(String aText) throws MessagingException { Properties props = new Properties(); @@ -300,4 +334,5 @@ public class KissCrawler { message.setText(aText); Transport.send(message); } + }