import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import org.wamblee.crawler.kiss.guide.TimeInterval;
import org.wamblee.crawler.kiss.notification.NotificationException;
import org.wamblee.crawler.kiss.notification.Notifier;
+import org.wamblee.general.BeanFactory;
+import org.wamblee.xml.ClasspathUriResolver;
+import org.wamblee.xml.XslTransformer;
/**
* The KiSS crawler for automatic recording of interesting TV shows.
private static final Log LOG = LogFactory.getLog(KissCrawler.class);
- /**
- * Log file name for the crawler.
- */
- private static final String LOG_FILE = "kiss.log";
-
/**
* Start URL of the electronic programme guide.
*/
private static final String START_URL = "http://epg.kml.kiss-technology.com/login_core.php";
-
+
/**
- * Crawler configuration file.
+ * Default socket timeout to use.
*/
- private static final String CRAWLER_CONFIG = "config.xml";
-
- /**
- * Configuration file describing interesting programs.
- */
- private static final String PROGRAM_CONFIG = "programs.xml";
+ private static final int SOCKET_TIMEOUT = 10000;
/**
* Regular expression for matching time interval strings in the retrieved
* In case of problems.
*/
public static void main(String[] aArgs) throws Exception {
- new KissCrawler(START_URL, CRAWLER_CONFIG, PROGRAM_CONFIG);
+ String crawlerConfig = new File(aArgs[0]).getCanonicalPath();
+ String programConfig = new File(aArgs[1]).getCanonicalPath();
+
+ BeanFactory factory = new StandaloneCrawlerBeanFactory();
+ Notifier notifier = factory.find(Notifier.class);
+ new KissCrawler(START_URL, SOCKET_TIMEOUT, crawlerConfig, programConfig, notifier, new Report());
+ }
+
+ /**
+ * Constructs the crawler. This retrieves the TV guide by crawling the KiSS
+ * EPG guide, filters the guide for interesting programs, tries to record
+ * them, and sends a summary mail to the user.
+ *
+ * @param aCrawlerConfig
+ * Configuration file for the crawler.
+ * @param aProgramConfig
+ * Configuration file describing interesting shows.
+ * @param aNotifier Object used to send notifications of the results.
+ * @param aReport Report to use.
+ * @throws IOException
+ * In case of problems reading files.
+ * @throws NotificationException In case notification fails.
+ * @throws PageException In case of problems retrieving the TV guide.
+ */
+ public KissCrawler(String aCrawlerConfig,
+ String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException {
+ this(START_URL, SOCKET_TIMEOUT, aCrawlerConfig, aProgramConfig, aNotifier, aReport);
}
+
/**
* Constructs the crawler. This retrieves the TV guide by crawling the KiSS
* EPG guide, filters the guide for interesting programs, tries to record
*
* @param aStartUrl
* Start URL of the electronic programme guide.
+ * @param aSocketTimeout Socket timeout to use.
* @param aCrawlerConfig
* Configuration file for the crawler.
* @param aProgramConfig
* Configuration file describing interesting shows.
+ * @param aNotifier Object used to send notifications of the results.
+ * @param aReport Report to use.
* @throws IOException
* In case of problems reading files.
- * @throws MessagingException
- * In case of problems sending a mail notification.
+ * @throws NotificationException In case notification fails.
+ * @throws PageException In case of problems retrieving the TV guide.
*/
- public KissCrawler(String aStartUrl, String aCrawlerConfig,
- String aProgramConfig) throws IOException, MessagingException {
+ public KissCrawler(String aStartUrl, int aSocketTimeout, String aCrawlerConfig,
+ String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException {
_pattern = Pattern.compile(TIME_REGEX);
- FileOutputStream fos = new FileOutputStream(new File(LOG_FILE));
- PrintStream os = new PrintStream(fos);
-
try {
HttpClient client = new HttpClient();
- //client.getHostConfiguration().setProxy("127.0.0.1", 3128);
+ // client.getHostConfiguration().setProxy("127.0.0.1", 3128);
+ client.getParams().setParameter("http.socket.timeout", SOCKET_TIMEOUT);
+
+ XslTransformer transformer = new XslTransformer(
+ new ClasspathUriResolver());
- Crawler crawler = createCrawler(aCrawlerConfig, os, client);
+ Crawler crawler = createCrawler(aCrawlerConfig, client, transformer);
InputStream programConfigFile = new FileInputStream(new File(
aProgramConfig));
ProgramConfigurationParser parser = new ProgramConfigurationParser();
parser.parse(programConfigFile);
- List<ProgramFilter> programFilters = parser.getFilters();
-
- Page page = getStartPage(aStartUrl, crawler);
- TVGuide guide = createGuide(page);
- PrintVisitor printer = new PrintVisitor(System.out);
- guide.accept(printer);
- processResults(programFilters, guide, parser.getNotifier());
+ List<ProgramFilter> programFilters = parser.getFilters();
+
+ try {
+ Page page = getStartPage(aStartUrl, crawler, aReport);
+ TVGuide guide = createGuide(page, aReport);
+ PrintVisitor printer = new PrintVisitor(System.out);
+ guide.accept(printer);
+ processResults(programFilters, guide, aNotifier,
+ aReport);
+ } catch (PageException e) {
+ aReport.addMessage("Problem getting TV guide", e);
+ LOG.info("Problem getting TV guide", e);
+ throw e;
+ }
+ aNotifier.send(aReport.asXml());
} finally {
- os.flush();
- os.close();
- System.out.println("Output written on '" + LOG_FILE + "'");
+ System.out.println("Crawler finished");
}
}
* In case of problems sending a summary mail.
*/
private void processResults(List<ProgramFilter> aProgramCondition,
- TVGuide aGuide, Notifier aNotifier) throws MessagingException {
- ProgramActionExecutor executor = new ProgramActionExecutor();
+ TVGuide aGuide, Notifier aNotifier, Report aReport) {
+ ProgramActionExecutor executor = new ProgramActionExecutor(aReport);
for (ProgramFilter filter : aProgramCondition) {
List<Program> programs = filter.apply(aGuide);
ProgramAction action = filter.getAction();
}
}
executor.commit();
- try {
- aNotifier.send(executor.getReport());
- } catch (NotificationException e) {
- throw new RuntimeException(e);
- }
+
}
/**
* @throws FileNotFoundException
* In case configuration files cannot be found.
*/
- private Crawler createCrawler(String aCrawlerConfig, PrintStream aOs,
- HttpClient aClient) throws FileNotFoundException {
- ConfigurationParser parser = new ConfigurationParser(aOs);
+ private Crawler createCrawler(String aCrawlerConfig, HttpClient aClient,
+ XslTransformer aTransformer) throws FileNotFoundException {
+ ConfigurationParser parser = new ConfigurationParser(aTransformer);
InputStream crawlerConfigFile = new FileInputStream(new File(
aCrawlerConfig));
Configuration config = parser.parse(crawlerConfigFile);
* URL of the electronic programme guide.
* @param aCrawler
* Crawler to use.
+ * @param aReport
+ * Report to use.
* @return Starting page.
*/
- private Page getStartPage(String aStartUrl, Crawler aCrawler) {
+ private Page getStartPage(String aStartUrl, Crawler aCrawler, Report aReport)
+ throws PageException {
try {
Page page = aCrawler.getPage(aStartUrl);
- return page.getAction("channels-favorites").execute();
+ Action favorites = page.getAction("channels-favorites");
+ if (favorites == null) {
+ String msg = "Channels favorites action not found on start page";
+ throw new PageException(msg);
+ }
+ return favorites.execute();
} catch (PageException e) {
- throw new RuntimeException(
- "Could not login to electronic program guide", e);
+ String msg = "Could not complete login to electronic programme guide.";
+ throw new PageException(msg, e);
}
}
*
* @param aPage
* Starting page.
+ * @param aReport
+ * Report to use.
* @return TV guide.
+ * @throws PageException In case of problem getting the tv guide.
*/
- private TVGuide createGuide(Page aPage) {
+ private TVGuide createGuide(Page aPage, Report aReport) throws PageException {
LOG.info("Obtaining full TV guide");
Action[] actions = aPage.getActions();
+ if ( actions.length == 0 ) {
+ LOG.error("No channels found");
+ throw new PageException("No channels found");
+ }
List<Channel> channels = new ArrayList<Channel>();
for (Action action : actions) {
try {
LOG.info("Getting channel info for '" + action.getName() + "'");
- Channel channel = createChannel(action.getName(), action
- .execute().getAction("right-now").execute());
+ Action rightNow = action.execute().getAction("right-now");
+ if (rightNow == null) {
+ throw new PageException("Channel summary page for '"
+ + action.getName()
+ + "' does not contain required information");
+ }
+ Channel channel = createChannel(action.getName(), rightNow
+ .execute(), aReport);
channels.add(channel);
if (SystemProperties.isDebugMode()) {
break; // Only one channel is crawled.
}
} catch (PageException e) {
+ aReport.addMessage("Could not create channel information for '"
+ + action.getName() + "'");
LOG.error("Could not create channel information for '"
+ action.getName() + "'", e);
}
* Starting page for the channel.
* @return Channel.
*/
- private Channel createChannel(String aChannel, Page aPage) {
+ private Channel createChannel(String aChannel, Page aPage, Report aReport) {
LOG.info("Obtaining program for " + aChannel);
Action[] programActions = aPage.getActions();
List<Program> programs = new ArrayList<Program>();
keywords = programInfo.getContent().element("keywords")
.getText().trim();
} catch (PageException e) {
- LOG.warn(
- "Program details could not be determined for '"
- + action.getName() + "'", e);
+ String msg = "Program details could not be determined for '"
+ + action.getName() + "'";
+ aReport.addMessage(msg, e);
+ LOG.warn(msg, e);
}
}
Program program = new Program(aChannel, action.getName(),