--- /dev/null
+/*
+ * Copyright 2005 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wamblee.crawler.kiss.main;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.List;
+import java.util.Properties;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.mail.MessagingException;
+import javax.mail.Session;
+import javax.mail.internet.InternetAddress;
+import javax.xml.transform.TransformerException;
+
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.mail.EmailException;
+import org.apache.commons.mail.HtmlEmail;
+import org.apache.xml.serialize.OutputFormat;
+import org.apache.xml.serialize.XMLSerializer;
+import org.w3c.dom.Document;
+import org.wamblee.crawler.Action;
+import org.wamblee.crawler.Configuration;
+import org.wamblee.crawler.Crawler;
+import org.wamblee.crawler.Page;
+import org.wamblee.crawler.PageException;
+import org.wamblee.crawler.impl.ConfigurationParser;
+import org.wamblee.crawler.impl.CrawlerImpl;
+import org.wamblee.crawler.kiss.guide.Channel;
+import org.wamblee.crawler.kiss.guide.PrintVisitor;
+import org.wamblee.crawler.kiss.guide.Program;
+import org.wamblee.crawler.kiss.guide.TVGuide;
+import org.wamblee.crawler.kiss.guide.Time;
+import org.wamblee.crawler.kiss.guide.TimeInterval;
+import org.wamblee.crawler.kiss.notification.NotificationException;
+import org.wamblee.crawler.kiss.notification.Notifier;
+import org.wamblee.io.FileResource;
+import org.wamblee.xml.XSLT;
+
+/**
+ * The KiSS crawler for automatic recording of interesting TV shows.
+ *
+ */
+public class KissCrawler {
+
+ private static final Log LOG = LogFactory.getLog(KissCrawler.class);
+
+ /**
+ * Log file name for the crawler.
+ */
+ private static final String LOG_FILE = "kiss.log";
+
+ /**
+ * Start URL of the electronic programme guide.
+ */
+ private static final String START_URL = "http://epg.kml.kiss-technology.com/login_core.php";
+
+ /**
+ * Crawler configuration file.
+ */
+ private static final String CRAWLER_CONFIG = "config.xml";
+
+ /**
+ * Configuration file describing interesting programs.
+ */
+ private static final String PROGRAM_CONFIG = "programs.xml";
+
+ /**
+ * Regular expression for matching time interval strings in the retrieved
+ * pages.
+ */
+ private static final String TIME_REGEX = "([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*";
+
+ /**
+ * Compiled pattern for the time regular expression.
+ */
+ private Pattern _pattern;
+
+ /**
+ * Runs the KiSS crawler.
+ *
+ * @param aArgs
+ * Arguments, currently all ignored because they are hardcoded.
+ * @throws Exception
+ * In case of problems.
+ */
+ public static void main(String[] aArgs) throws Exception {
+ new KissCrawler(START_URL, CRAWLER_CONFIG, PROGRAM_CONFIG);
+ }
+
+ /**
+ * Constructs the crawler. This retrieves the TV guide by crawling the KiSS
+ * EPG guide, filters the guide for interesting programs, tries to record
+ * them, and sends a summary mail to the user.
+ *
+ * @param aStartUrl
+ * Start URL of the electronic programme guide.
+ * @param aCrawlerConfig
+ * Configuration file for the crawler.
+ * @param aProgramConfig
+ * Configuration file describing interesting shows.
+ * @throws IOException
+ * In case of problems reading files.
+ * @throws MessagingException
+ * In case of problems sending a mail notification.
+ */
+ public KissCrawler(String aStartUrl, String aCrawlerConfig,
+ String aProgramConfig) throws IOException, MessagingException {
+
+ _pattern = Pattern.compile(TIME_REGEX);
+
+ FileOutputStream fos = new FileOutputStream(new File(LOG_FILE));
+ PrintStream os = new PrintStream(fos);
+
+ try {
+ HttpClient client = new HttpClient();
+ //client.getHostConfiguration().setProxy("127.0.0.1", 3128);
+
+ Crawler crawler = createCrawler(aCrawlerConfig, os, client);
+ InputStream programConfigFile = new FileInputStream(new File(
+ aProgramConfig));
+ ProgramConfigurationParser parser = new ProgramConfigurationParser();
+ parser.parse(programConfigFile);
+ List<ProgramFilter> programFilters = parser.getFilters();
+
+ Page page = getStartPage(aStartUrl, crawler);
+ TVGuide guide = createGuide(page);
+ PrintVisitor printer = new PrintVisitor(System.out);
+ guide.accept(printer);
+ processResults(programFilters, guide, parser.getNotifier());
+ } finally {
+ os.flush();
+ os.close();
+ System.out.println("Output written on '" + LOG_FILE + "'");
+ }
+ }
+
+ /**
+ * Records interesting shows.
+ *
+ * @param aProgramCondition
+ * Condition determining which shows are interesting.
+ * @param aGuide
+ * Television guide.
+ * @throws MessagingException
+ * In case of problems sending a summary mail.
+ */
+ private void processResults(List<ProgramFilter> aProgramCondition,
+ TVGuide aGuide, Notifier aNotifier) throws MessagingException {
+ ProgramActionExecutor executor = new ProgramActionExecutor();
+ for (ProgramFilter filter : aProgramCondition) {
+ List<Program> programs = filter.apply(aGuide);
+ ProgramAction action = filter.getAction();
+ for (Program program : programs) {
+ action.execute(program, executor);
+ }
+ }
+ executor.commit();
+ try {
+ aNotifier.send(executor.getXmlReport());
+ } catch (NotificationException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * Creates the crawler.
+ *
+ * @param aCrawlerConfig
+ * Crawler configuration file.
+ * @param aOs
+ * Logging output stream for the crawler.
+ * @param aClient
+ * HTTP Client to use.
+ * @return Crawler.
+ * @throws FileNotFoundException
+ * In case configuration files cannot be found.
+ */
+ private Crawler createCrawler(String aCrawlerConfig, PrintStream aOs,
+ HttpClient aClient) throws FileNotFoundException {
+ ConfigurationParser parser = new ConfigurationParser(aOs);
+ InputStream crawlerConfigFile = new FileInputStream(new File(
+ aCrawlerConfig));
+ Configuration config = parser.parse(crawlerConfigFile);
+ Crawler crawler = new CrawlerImpl(aClient, config);
+ return crawler;
+ }
+
+ /**
+ * Gets the start page of the electronic programme guide. This involves
+ * login and navigation to a suitable start page after logging in.
+ *
+ * @param aStartUrl
+ * URL of the electronic programme guide.
+ * @param aCrawler
+ * Crawler to use.
+ * @return Starting page.
+ */
+ private Page getStartPage(String aStartUrl, Crawler aCrawler) {
+ try {
+ Page page = aCrawler.getPage(aStartUrl);
+ return page.getAction("channels-favorites").execute();
+ } catch (PageException e) {
+ throw new RuntimeException(
+ "Could not login to electronic program guide", e);
+ }
+ }
+
+ /**
+ * Creates the TV guide by web crawling.
+ *
+ * @param aPage
+ * Starting page.
+ * @return TV guide.
+ */
+ private TVGuide createGuide(Page aPage) {
+ LOG.info("Obtaining full TV guide");
+ Action[] actions = aPage.getActions();
+ List<Channel> channels = new ArrayList<Channel>();
+ for (Action action : actions) {
+ try {
+ LOG.info("Getting channel info for '" + action.getName() + "'");
+ Channel channel = createChannel(action.getName(), action
+ .execute().getAction("right-now").execute());
+ channels.add(channel);
+ if (SystemProperties.isDebugMode()) {
+ break; // Only one channel is crawled.
+ }
+ } catch (PageException e) {
+ LOG.error("Could not create channel information for '"
+ + action.getName() + "'", e);
+ }
+ }
+ return new TVGuide(channels);
+ }
+
+ /**
+ * Create channel information for a specific channel.
+ *
+ * @param aChannel
+ * Channel name.
+ * @param aPage
+ * Starting page for the channel.
+ * @return Channel.
+ */
+ private Channel createChannel(String aChannel, Page aPage) {
+ LOG.info("Obtaining program for " + aChannel);
+ Action[] programActions = aPage.getActions();
+ List<Program> programs = new ArrayList<Program>();
+ for (Action action : programActions) {
+ String time = action.getContent().element("time").getText().trim();
+ Matcher matcher = _pattern.matcher(time);
+ if (matcher.matches()) {
+ Time begin = new Time(Integer.parseInt(matcher.group(1)),
+ Integer.parseInt(matcher.group(2)));
+ Time end = new Time(Integer.parseInt(matcher.group(3)), Integer
+ .parseInt(matcher.group(4)));
+ TimeInterval interval = new TimeInterval(begin, end);
+ String description = "";
+ String keywords = "";
+ if (!SystemProperties.isNoProgramDetailsRequired()) {
+ try {
+ Page programInfo = action.execute();
+ description = programInfo.getContent().element(
+ "description").getText().trim();
+ keywords = programInfo.getContent().element("keywords")
+ .getText().trim();
+ } catch (PageException e) {
+ LOG.warn(
+ "Program details could not be determined for '"
+ + action.getName() + "'", e);
+ }
+ }
+ Program program = new Program(aChannel, action.getName(),
+ description, keywords, interval, action);
+
+ LOG.info("Got program " + program);
+ programs.add(program);
+ }
+ }
+ return new Channel(aChannel, programs);
+ }
+
+ /**
+ * Sends a summary mail to the user.
+ *
+ * @param aText
+ * Text of the mail.
+ * @throws MessagingException
+ * In case of problems sending mail.
+ */
+ private void sendMail(ProgramActionExecutor aExecutor) throws MessagingException {
+ String textReport = aExecutor.getReport();
+ System.out.println("Text report: \n" + textReport);
+ System.out.println("XML report:\n" + aExecutor.getXmlReport().asXML());
+
+
+ Properties props = new Properties();
+ props.put("mail.transport.protocol", "smtp");
+ props.put("mail.smtp.host", "falcon");
+ props.put("mail.smtp.port", "25");
+
+ Session mailSession = Session.getInstance(props);
+ InternetAddress from = new InternetAddress("erik@brakkee.org");
+
+ HtmlEmail mail = new HtmlEmail();
+ mail.setMailSession(mailSession);
+ try {
+ mail.setFrom("erik@brakkee.org");
+ mail.setTo(Arrays.asList(new InternetAddress[] { from }));
+ mail.setSentDate(new Date());
+ mail.setSubject("KiSS Crawler Update");
+ String html = aExecutor.getXmlReport().asXML();
+ Document document = new XSLT().transform(html.getBytes(), new FileResource(new File("reportToHtml.xsl")));
+ ByteArrayOutputStream xhtml = new ByteArrayOutputStream();
+ XMLSerializer serializer = new XMLSerializer(xhtml, new OutputFormat());
+ serializer.serialize(document);
+ mail.setHtmlMsg(xhtml.toString());
+ mail.setTextMsg(textReport);
+ mail.send();
+ } catch (EmailException e) {
+ throw new RuntimeException(e);
+ } catch (TransformerException e) {
+ throw new RuntimeException(e);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+}