package org.wamblee.crawler.kiss.main;
-import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.io.PrintStream;
import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Date;
import java.util.List;
-import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.mail.MessagingException;
-import javax.mail.Session;
-import javax.mail.internet.InternetAddress;
-import javax.xml.transform.TransformerException;
import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.commons.mail.EmailException;
-import org.apache.commons.mail.HtmlEmail;
-import org.apache.xml.serialize.OutputFormat;
-import org.apache.xml.serialize.XMLSerializer;
-import org.w3c.dom.Document;
import org.wamblee.crawler.Action;
import org.wamblee.crawler.Configuration;
import org.wamblee.crawler.Crawler;
import org.wamblee.crawler.kiss.guide.TimeInterval;
import org.wamblee.crawler.kiss.notification.NotificationException;
import org.wamblee.crawler.kiss.notification.Notifier;
-import org.wamblee.io.FileResource;
-import org.wamblee.xml.XSLT;
+import org.wamblee.general.BeanFactory;
+import org.wamblee.xml.ClasspathUriResolver;
+import org.wamblee.xml.XslTransformer;
/**
* The KiSS crawler for automatic recording of interesting TV shows.
private static final Log LOG = LogFactory.getLog(KissCrawler.class);
- /**
- * Log file name for the crawler.
- */
- private static final String LOG_FILE = "kiss.log";
-
/**
* Start URL of the electronic programme guide.
*/
- private static final String START_URL = "http://epg.kml.kiss-technology.com/login_core.php";
-
- /**
- * Crawler configuration file.
- */
- private static final String CRAWLER_CONFIG = "config.xml";
-
+ private static final String START_URL = "http://epg.kml.kiss-technology.com/login.php";
+
/**
- * Configuration file describing interesting programs.
+ * Default socket timeout to use.
*/
- private static final String PROGRAM_CONFIG = "programs.xml";
+ private static final int SOCKET_TIMEOUT = 10000;
/**
* Regular expression for matching time interval strings in the retrieved
* pages.
*/
- private static final String TIME_REGEX = "([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*";
+ private static final String TIME_REGEX = "[^0-9]*([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*";
/**
* Compiled pattern for the time regular expression.
* In case of problems.
*/
public static void main(String[] aArgs) throws Exception {
- new KissCrawler(START_URL, CRAWLER_CONFIG, PROGRAM_CONFIG);
+ String crawlerConfig = new File(aArgs[0]).getCanonicalPath();
+ String programConfig = new File(aArgs[1]).getCanonicalPath();
+
+ BeanFactory factory = new StandaloneCrawlerBeanFactory();
+ Notifier notifier = factory.find(Notifier.class);
+ new KissCrawler(START_URL, SOCKET_TIMEOUT, crawlerConfig, programConfig, notifier, new Report());
+ }
+
+ /**
+ * Constructs the crawler. This retrieves the TV guide by crawling the KiSS
+ * EPG guide, filters the guide for interesting programs, tries to record
+ * them, and sends a summary mail to the user.
+ *
+ * @param aCrawlerConfig
+ * Configuration file for the crawler.
+ * @param aProgramConfig
+ * Configuration file describing interesting shows.
+ * @param aNotifier Object used to send notifications of the results.
+ * @param aReport Report to use.
+ * @throws IOException
+ * In case of problems reading files.
+ * @throws NotificationException In case notification fails.
+ * @throws PageException In case of problems retrieving the TV guide.
+ */
+ public KissCrawler(String aCrawlerConfig,
+ String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException {
+ this(START_URL, SOCKET_TIMEOUT, aCrawlerConfig, aProgramConfig, aNotifier, aReport);
}
+
/**
* Constructs the crawler. This retrieves the TV guide by crawling the KiSS
* EPG guide, filters the guide for interesting programs, tries to record
*
* @param aStartUrl
* Start URL of the electronic programme guide.
+ * @param aSocketTimeout Socket timeout to use.
* @param aCrawlerConfig
* Configuration file for the crawler.
* @param aProgramConfig
* Configuration file describing interesting shows.
+ * @param aNotifier Object used to send notifications of the results.
+ * @param aReport Report to use.
* @throws IOException
* In case of problems reading files.
- * @throws MessagingException
- * In case of problems sending a mail notification.
+ * @throws NotificationException In case notification fails.
+ * @throws PageException In case of problems retrieving the TV guide.
*/
- public KissCrawler(String aStartUrl, String aCrawlerConfig,
- String aProgramConfig) throws IOException, MessagingException {
+ public KissCrawler(String aStartUrl, int aSocketTimeout, String aCrawlerConfig,
+ String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException {
_pattern = Pattern.compile(TIME_REGEX);
- FileOutputStream fos = new FileOutputStream(new File(LOG_FILE));
- PrintStream os = new PrintStream(fos);
-
try {
HttpClient client = new HttpClient();
- //client.getHostConfiguration().setProxy("127.0.0.1", 3128);
+ // client.getHostConfiguration().setProxy("127.0.0.1", 3128);
+ client.getParams().setParameter("http.socket.timeout", SOCKET_TIMEOUT);
+
+ XslTransformer transformer = new XslTransformer(
+ new ClasspathUriResolver());
- Crawler crawler = createCrawler(aCrawlerConfig, os, client);
+ Crawler crawler = createCrawler(aCrawlerConfig, client, transformer);
InputStream programConfigFile = new FileInputStream(new File(
aProgramConfig));
ProgramConfigurationParser parser = new ProgramConfigurationParser();
parser.parse(programConfigFile);
- List<ProgramFilter> programFilters = parser.getFilters();
-
- Page page = getStartPage(aStartUrl, crawler);
- TVGuide guide = createGuide(page);
- PrintVisitor printer = new PrintVisitor(System.out);
- guide.accept(printer);
- processResults(programFilters, guide, parser.getNotifier());
+ List<ProgramFilter> programFilters = parser.getFilters();
+
+ try {
+ Page page = getStartPage(aStartUrl, crawler, aReport);
+ TVGuide guide = createGuide(page, aReport);
+ PrintVisitor printer = new PrintVisitor(System.out);
+ guide.accept(printer);
+ processResults(programFilters, guide, aNotifier,
+ aReport);
+ } catch (PageException e) {
+ aReport.addMessage("Problem getting TV guide", e);
+ LOG.info("Problem getting TV guide", e);
+ throw e;
+ }
+ aNotifier.send(aReport.asXml());
} finally {
- os.flush();
- os.close();
- System.out.println("Output written on '" + LOG_FILE + "'");
+ System.out.println("Crawler finished");
}
}
* In case of problems sending a summary mail.
*/
private void processResults(List<ProgramFilter> aProgramCondition,
- TVGuide aGuide, Notifier aNotifier) throws MessagingException {
- ProgramActionExecutor executor = new ProgramActionExecutor();
+ TVGuide aGuide, Notifier aNotifier, Report aReport) {
+ ProgramActionExecutor executor = new ProgramActionExecutor(aReport);
for (ProgramFilter filter : aProgramCondition) {
List<Program> programs = filter.apply(aGuide);
ProgramAction action = filter.getAction();
}
}
executor.commit();
- try {
- aNotifier.send(executor.getXmlReport());
- } catch (NotificationException e) {
- throw new RuntimeException(e);
- }
+
}
/**
* @throws FileNotFoundException
* In case configuration files cannot be found.
*/
- private Crawler createCrawler(String aCrawlerConfig, PrintStream aOs,
- HttpClient aClient) throws FileNotFoundException {
- ConfigurationParser parser = new ConfigurationParser(aOs);
+ private Crawler createCrawler(String aCrawlerConfig, HttpClient aClient,
+ XslTransformer aTransformer) throws FileNotFoundException {
+ ConfigurationParser parser = new ConfigurationParser(aTransformer);
InputStream crawlerConfigFile = new FileInputStream(new File(
aCrawlerConfig));
Configuration config = parser.parse(crawlerConfigFile);
* URL of the electronic programme guide.
* @param aCrawler
* Crawler to use.
+ * @param aReport
+ * Report to use.
* @return Starting page.
*/
- private Page getStartPage(String aStartUrl, Crawler aCrawler) {
+ private Page getStartPage(String aStartUrl, Crawler aCrawler, Report aReport)
+ throws PageException {
try {
- Page page = aCrawler.getPage(aStartUrl);
- return page.getAction("channels-favorites").execute();
+ Page page = aCrawler.getPage(aStartUrl, new NameValuePair[0]);
+ page = page.getAction("login").execute();
+ Action favorites = page.getAction("channels-favorites");
+ if (favorites == null) {
+ String msg = "Channels favorites action not found on start page";
+ throw new PageException(msg);
+ }
+ return favorites.execute();
} catch (PageException e) {
- throw new RuntimeException(
- "Could not login to electronic program guide", e);
+ String msg = "Could not complete login to electronic programme guide.";
+ throw new PageException(msg, e);
}
}
*
* @param aPage
* Starting page.
+ * @param aReport
+ * Report to use.
* @return TV guide.
+ * @throws PageException In case of problem getting the tv guide.
*/
- private TVGuide createGuide(Page aPage) {
+ private TVGuide createGuide(Page aPage, Report aReport) throws PageException {
LOG.info("Obtaining full TV guide");
Action[] actions = aPage.getActions();
+ if ( actions.length == 0 ) {
+ LOG.error("No channels found");
+ throw new PageException("No channels found");
+ }
List<Channel> channels = new ArrayList<Channel>();
for (Action action : actions) {
try {
LOG.info("Getting channel info for '" + action.getName() + "'");
- Channel channel = createChannel(action.getName(), action
- .execute().getAction("right-now").execute());
+ Action rightNow = action.execute().getAction("right-now");
+ if (rightNow == null) {
+ throw new PageException("Channel summary page for '"
+ + action.getName()
+ + "' does not contain required information");
+ }
+ Channel channel = createChannel(action.getName(), rightNow
+ .execute(), aReport);
channels.add(channel);
if (SystemProperties.isDebugMode()) {
break; // Only one channel is crawled.
}
} catch (PageException e) {
+ aReport.addMessage("Could not create channel information for '"
+ + action.getName() + "'");
LOG.error("Could not create channel information for '"
+ action.getName() + "'", e);
}
* Starting page for the channel.
* @return Channel.
*/
- private Channel createChannel(String aChannel, Page aPage) {
+ private Channel createChannel(String aChannel, Page aPage, Report aReport) {
LOG.info("Obtaining program for " + aChannel);
Action[] programActions = aPage.getActions();
List<Program> programs = new ArrayList<Program>();
keywords = programInfo.getContent().element("keywords")
.getText().trim();
} catch (PageException e) {
- LOG.warn(
- "Program details could not be determined for '"
- + action.getName() + "'", e);
+ String msg = "Program details could not be determined for '"
+ + action.getName() + "'";
+ aReport.addMessage(msg, e);
+ LOG.warn(msg, e);
}
}
Program program = new Program(aChannel, action.getName(),
}
return new Channel(aChannel, programs);
}
-
- /**
- * Sends a summary mail to the user.
- *
- * @param aText
- * Text of the mail.
- * @throws MessagingException
- * In case of problems sending mail.
- */
- private void sendMail(ProgramActionExecutor aExecutor) throws MessagingException {
- String textReport = aExecutor.getReport();
- System.out.println("Text report: \n" + textReport);
- System.out.println("XML report:\n" + aExecutor.getXmlReport().asXML());
-
-
- Properties props = new Properties();
- props.put("mail.transport.protocol", "smtp");
- props.put("mail.smtp.host", "falcon");
- props.put("mail.smtp.port", "25");
-
- Session mailSession = Session.getInstance(props);
- InternetAddress from = new InternetAddress("erik@brakkee.org");
-
- HtmlEmail mail = new HtmlEmail();
- mail.setMailSession(mailSession);
- try {
- mail.setFrom("erik@brakkee.org");
- mail.setTo(Arrays.asList(new InternetAddress[] { from }));
- mail.setSentDate(new Date());
- mail.setSubject("KiSS Crawler Update");
- String html = aExecutor.getXmlReport().asXML();
- Document document = new XSLT().transform(html.getBytes(), new FileResource(new File("reportToHtml.xsl")));
- ByteArrayOutputStream xhtml = new ByteArrayOutputStream();
- XMLSerializer serializer = new XMLSerializer(xhtml, new OutputFormat());
- serializer.serialize(document);
- mail.setHtmlMsg(xhtml.toString());
- mail.setTextMsg(textReport);
- mail.send();
- } catch (EmailException e) {
- throw new RuntimeException(e);
- } catch (TransformerException e) {
- throw new RuntimeException(e);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
}