import javax.mail.MessagingException;
import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.wamblee.crawler.Action;
import org.wamblee.crawler.kiss.guide.TimeInterval;
import org.wamblee.crawler.kiss.notification.NotificationException;
import org.wamblee.crawler.kiss.notification.Notifier;
+import org.wamblee.general.BeanFactory;
import org.wamblee.xml.ClasspathUriResolver;
import org.wamblee.xml.XslTransformer;
/**
* Start URL of the electronic programme guide.
*/
- private static final String START_URL = "http://epg.kml.kiss-technology.com/login_core.php";
-
- /**
- * Crawler configuration file.
- */
- private static final String CRAWLER_CONFIG = "config.xml";
-
+ private static final String START_URL = "http://epg.kml.kiss-technology.com/login.php";
+
/**
- * Configuration file describing interesting programs.
+ * Default socket timeout to use.
*/
- private static final String PROGRAM_CONFIG = "programs.xml";
+ private static final int SOCKET_TIMEOUT = 10000;
/**
* Regular expression for matching time interval strings in the retrieved
* pages.
*/
- private static final String TIME_REGEX = "([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*";
+ private static final String TIME_REGEX = "[^0-9]*([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*";
/**
* Compiled pattern for the time regular expression.
* In case of problems.
*/
public static void main(String[] aArgs) throws Exception {
- new KissCrawler(START_URL, CRAWLER_CONFIG, PROGRAM_CONFIG);
+ String crawlerConfig = new File(aArgs[0]).getCanonicalPath();
+ String programConfig = new File(aArgs[1]).getCanonicalPath();
+
+ BeanFactory factory = new StandaloneCrawlerBeanFactory();
+ Notifier notifier = factory.find(Notifier.class);
+ new KissCrawler(START_URL, SOCKET_TIMEOUT, crawlerConfig, programConfig, notifier, new Report());
+ }
+
+ /**
+ * Constructs the crawler. This retrieves the TV guide by crawling the KiSS
+ * EPG guide, filters the guide for interesting programs, tries to record
+ * them, and sends a summary mail to the user.
+ *
+ * @param aCrawlerConfig
+ * Configuration file for the crawler.
+ * @param aProgramConfig
+ * Configuration file describing interesting shows.
+ * @param aNotifier Object used to send notifications of the results.
+ * @param aReport Report to use.
+ * @throws IOException
+ * In case of problems reading files.
+ * @throws NotificationException In case notification fails.
+ * @throws PageException In case of problems retrieving the TV guide.
+ */
+ public KissCrawler(String aCrawlerConfig,
+ String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException {
+ this(START_URL, SOCKET_TIMEOUT, aCrawlerConfig, aProgramConfig, aNotifier, aReport);
}
+
/**
* Constructs the crawler. This retrieves the TV guide by crawling the KiSS
* EPG guide, filters the guide for interesting programs, tries to record
*
* @param aStartUrl
* Start URL of the electronic programme guide.
+ * @param aSocketTimeout Socket timeout to use.
* @param aCrawlerConfig
* Configuration file for the crawler.
* @param aProgramConfig
* Configuration file describing interesting shows.
+ * @param aNotifier Object used to send notifications of the results.
+ * @param aReport Report to use.
* @throws IOException
* In case of problems reading files.
- * @throws MessagingException
- * In case of problems sending a mail notification.
+ * @throws NotificationException In case notification fails.
+ * @throws PageException In case of problems retrieving the TV guide.
*/
- public KissCrawler(String aStartUrl, String aCrawlerConfig,
- String aProgramConfig) throws IOException, NotificationException {
+ public KissCrawler(String aStartUrl, int aSocketTimeout, String aCrawlerConfig,
+ String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException {
_pattern = Pattern.compile(TIME_REGEX);
try {
HttpClient client = new HttpClient();
// client.getHostConfiguration().setProxy("127.0.0.1", 3128);
+ client.getParams().setParameter("http.socket.timeout", SOCKET_TIMEOUT);
XslTransformer transformer = new XslTransformer(
new ClasspathUriResolver());
Crawler crawler = createCrawler(aCrawlerConfig, client, transformer);
InputStream programConfigFile = new FileInputStream(new File(
aProgramConfig));
- ProgramConfigurationParser parser = new ProgramConfigurationParser(
- transformer);
+ ProgramConfigurationParser parser = new ProgramConfigurationParser();
parser.parse(programConfigFile);
List<ProgramFilter> programFilters = parser.getFilters();
- Report report = new Report();
-
try {
- Page page = getStartPage(aStartUrl, crawler, report);
- TVGuide guide = createGuide(page, report);
+ Page page = getStartPage(aStartUrl, crawler, aReport);
+ TVGuide guide = createGuide(page, aReport);
PrintVisitor printer = new PrintVisitor(System.out);
guide.accept(printer);
- processResults(programFilters, guide, parser.getNotifier(),
- report);
+ processResults(programFilters, guide, aNotifier,
+ aReport);
} catch (PageException e) {
- report.addMessage("Problem getting TV guide", e);
+ aReport.addMessage("Problem getting TV guide", e);
LOG.info("Problem getting TV guide", e);
+ throw e;
}
- parser.getNotifier().send(report.asXml());
+ aNotifier.send(aReport.asXml());
} finally {
System.out.println("Crawler finished");
}
private Page getStartPage(String aStartUrl, Crawler aCrawler, Report aReport)
throws PageException {
try {
- Page page = aCrawler.getPage(aStartUrl);
+ Page page = aCrawler.getPage(aStartUrl, new NameValuePair[0]);
+ page = page.getAction("login").execute();
Action favorites = page.getAction("channels-favorites");
if (favorites == null) {
String msg = "Channels favorites action not found on start page";
}
return favorites.execute();
} catch (PageException e) {
- String msg = "Could not login to electronic programme guide.";
+ String msg = "Could not complete login to electronic programme guide.";
throw new PageException(msg, e);
}
}
* @param aReport
* Report to use.
* @return TV guide.
+ * @throws PageException In case of problem getting the tv guide.
*/
- private TVGuide createGuide(Page aPage, Report aReport) {
+ private TVGuide createGuide(Page aPage, Report aReport) throws PageException {
LOG.info("Obtaining full TV guide");
Action[] actions = aPage.getActions();
+ if ( actions.length == 0 ) {
+ LOG.error("No channels found");
+ throw new PageException("No channels found");
+ }
List<Channel> channels = new ArrayList<Channel>();
for (Action action : actions) {
try {