import org.wamblee.crawler.kiss.guide.TimeInterval;
import org.wamblee.crawler.kiss.notification.NotificationException;
import org.wamblee.crawler.kiss.notification.Notifier;
+import org.wamblee.general.BeanFactory;
import org.wamblee.xml.ClasspathUriResolver;
import org.wamblee.xml.XslTransformer;
* Start URL of the electronic programme guide.
*/
private static final String START_URL = "http://epg.kml.kiss-technology.com/login_core.php";
+
+ /**
+ * Default socket timeout to use.
+ */
+ private static final int SOCKET_TIMEOUT = 10000;
/**
* Regular expression for matching time interval strings in the retrieved
public static void main(String[] aArgs) throws Exception {
String crawlerConfig = new File(aArgs[0]).getCanonicalPath();
String programConfig = new File(aArgs[1]).getCanonicalPath();
- new KissCrawler(START_URL, crawlerConfig, programConfig);
+
+ BeanFactory factory = new StandaloneCrawlerBeanFactory();
+ Notifier notifier = factory.find(Notifier.class);
+ new KissCrawler(START_URL, SOCKET_TIMEOUT, crawlerConfig, programConfig, notifier, new Report());
}
/**
* Configuration file for the crawler.
* @param aProgramConfig
* Configuration file describing interesting shows.
+ * @param aNotifier Object used to send notifications of the results.
+ * @param aReport Report to use.
* @throws IOException
* In case of problems reading files.
- * @throws MessagingException
- * In case of problems sending a mail notification.
+ * @throws NotificationException In case notification fails.
+ * @throws PageException In case of problems retrieving the TV guide.
*/
public KissCrawler(String aCrawlerConfig,
- String aProgramConfig) throws IOException, NotificationException {
- this(START_URL, aCrawlerConfig, aProgramConfig);
+ String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException {
+ this(START_URL, SOCKET_TIMEOUT, aCrawlerConfig, aProgramConfig, aNotifier, aReport);
}
*
* @param aStartUrl
* Start URL of the electronic programme guide.
+ * @param aSocketTimeout Socket timeout to use.
* @param aCrawlerConfig
* Configuration file for the crawler.
* @param aProgramConfig
* Configuration file describing interesting shows.
+ * @param aNotifier Object used to send notifications of the results.
+ * @param aReport Report to use.
* @throws IOException
* In case of problems reading files.
- * @throws MessagingException
- * In case of problems sending a mail notification.
+ * @throws NotificationException In case notification fails.
+ * @throws PageException In case of problems retrieving the TV guide.
*/
- public KissCrawler(String aStartUrl, String aCrawlerConfig,
- String aProgramConfig) throws IOException, NotificationException {
+ public KissCrawler(String aStartUrl, int aSocketTimeout, String aCrawlerConfig,
+ String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException {
_pattern = Pattern.compile(TIME_REGEX);
try {
HttpClient client = new HttpClient();
// client.getHostConfiguration().setProxy("127.0.0.1", 3128);
+ client.getParams().setParameter("http.socket.timeout", SOCKET_TIMEOUT);
XslTransformer transformer = new XslTransformer(
new ClasspathUriResolver());
parser.parse(programConfigFile);
List<ProgramFilter> programFilters = parser.getFilters();
- Report report = new Report();
-
try {
- Page page = getStartPage(aStartUrl, crawler, report);
- TVGuide guide = createGuide(page, report);
+ Page page = getStartPage(aStartUrl, crawler, aReport);
+ TVGuide guide = createGuide(page, aReport);
PrintVisitor printer = new PrintVisitor(System.out);
guide.accept(printer);
- processResults(programFilters, guide, parser.getNotifier(),
- report);
+ processResults(programFilters, guide, aNotifier,
+ aReport);
} catch (PageException e) {
- report.addMessage("Problem getting TV guide", e);
+ aReport.addMessage("Problem getting TV guide", e);
LOG.info("Problem getting TV guide", e);
+ throw e;
}
- parser.getNotifier().send(report.asXml());
+ aNotifier.send(aReport.asXml());
} finally {
System.out.println("Crawler finished");
}