X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fkiss%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2Fmain%2FKissCrawler.java;h=3300e1299e71b6c4268d9ba9447b65d6df17fe8f;hb=5693cee5e48f72bf1f054feadb2bc7a6045f99e5;hp=3191ff24e627cffdd5d0eab3321e106c090a3ba9;hpb=e359f7995fcafb2ce18576be6f21345f9f7b1482;p=utils diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java index 3191ff24..3300e129 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java @@ -32,6 +32,7 @@ import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.dom4j.Element; import org.wamblee.crawler.Action; import org.wamblee.crawler.Configuration; import org.wamblee.crawler.Crawler; @@ -63,11 +64,11 @@ public class KissCrawler { * Start URL of the electronic programme guide. */ private static final String START_URL = "http://epg.kml.kiss-technology.com/login.php"; - + /** - * Default socket timeout to use. + * Default socket timeout to use. */ - private static final int SOCKET_TIMEOUT = 10000; + private static final int SOCKET_TIMEOUT = 10000; /** * Regular expression for matching time interval strings in the retrieved @@ -89,14 +90,15 @@ public class KissCrawler { * In case of problems. */ public static void main(String[] aArgs) throws Exception { - String crawlerConfig = new File(aArgs[0]).getCanonicalPath(); - String programConfig = new File(aArgs[1]).getCanonicalPath(); + String crawlerConfig = new File(aArgs[0]).getCanonicalPath(); + String programConfig = new File(aArgs[1]).getCanonicalPath(); BeanFactory factory = new StandaloneCrawlerBeanFactory(); Notifier notifier = factory.find(Notifier.class); - new KissCrawler(START_URL, SOCKET_TIMEOUT, crawlerConfig, programConfig, notifier, new Report()); + new KissCrawler(START_URL, SOCKET_TIMEOUT, crawlerConfig, + programConfig, notifier, new Report()); } - + /** * Constructs the crawler. This retrieves the TV guide by crawling the KiSS * EPG guide, filters the guide for interesting programs, tries to record @@ -106,19 +108,24 @@ public class KissCrawler { * Configuration file for the crawler. * @param aProgramConfig * Configuration file describing interesting shows. - * @param aNotifier Object used to send notifications of the results. - * @param aReport Report to use. + * @param aNotifier + * Object used to send notifications of the results. + * @param aReport + * Report to use. * @throws IOException * In case of problems reading files. - * @throws NotificationException In case notification fails. - * @throws PageException In case of problems retrieving the TV guide. + * @throws NotificationException + * In case notification fails. + * @throws PageException + * In case of problems retrieving the TV guide. */ - public KissCrawler(String aCrawlerConfig, - String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException { - this(START_URL, SOCKET_TIMEOUT, aCrawlerConfig, aProgramConfig, aNotifier, aReport); + public KissCrawler(String aCrawlerConfig, String aProgramConfig, + Notifier aNotifier, Report aReport) throws IOException, + NotificationException, PageException { + this(START_URL, SOCKET_TIMEOUT, aCrawlerConfig, aProgramConfig, + aNotifier, aReport); } - /** * Constructs the crawler. This retrieves the TV guide by crawling the KiSS * EPG guide, filters the guide for interesting programs, tries to record @@ -126,27 +133,35 @@ public class KissCrawler { * * @param aStartUrl * Start URL of the electronic programme guide. - * @param aSocketTimeout Socket timeout to use. + * @param aSocketTimeout + * Socket timeout to use. * @param aCrawlerConfig * Configuration file for the crawler. * @param aProgramConfig * Configuration file describing interesting shows. - * @param aNotifier Object used to send notifications of the results. - * @param aReport Report to use. + * @param aNotifier + * Object used to send notifications of the results. + * @param aReport + * Report to use. * @throws IOException * In case of problems reading files. - * @throws NotificationException In case notification fails. - * @throws PageException In case of problems retrieving the TV guide. + * @throws NotificationException + * In case notification fails. + * @throws PageException + * In case of problems retrieving the TV guide. */ - public KissCrawler(String aStartUrl, int aSocketTimeout, String aCrawlerConfig, - String aProgramConfig, Notifier aNotifier, Report aReport) throws IOException, NotificationException, PageException { + public KissCrawler(String aStartUrl, int aSocketTimeout, + String aCrawlerConfig, String aProgramConfig, Notifier aNotifier, + Report aReport) throws IOException, NotificationException, + PageException { _pattern = Pattern.compile(TIME_REGEX); try { HttpClient client = new HttpClient(); // client.getHostConfiguration().setProxy("127.0.0.1", 3128); - client.getParams().setParameter("http.socket.timeout", SOCKET_TIMEOUT); + client.getParams().setParameter("http.socket.timeout", + SOCKET_TIMEOUT); XslTransformer transformer = new XslTransformer( new ClasspathUriResolver()); @@ -163,12 +178,11 @@ public class KissCrawler { TVGuide guide = createGuide(page, aReport); PrintVisitor printer = new PrintVisitor(System.out); guide.accept(printer); - processResults(programFilters, guide, aNotifier, - aReport); + processResults(programFilters, guide, aNotifier, aReport); } catch (PageException e) { aReport.addMessage("Problem getting TV guide", e); LOG.info("Problem getting TV guide", e); - throw e; + throw e; } aNotifier.send(aReport.asXml()); } finally { @@ -260,12 +274,14 @@ public class KissCrawler { * @param aReport * Report to use. * @return TV guide. - * @throws PageException In case of problem getting the tv guide. + * @throws PageException + * In case of problem getting the tv guide. */ - private TVGuide createGuide(Page aPage, Report aReport) throws PageException { + private TVGuide createGuide(Page aPage, Report aReport) + throws PageException { LOG.info("Obtaining full TV guide"); Action[] actions = aPage.getActions(); - if ( actions.length == 0 ) { + if (actions.length == 0) { LOG.error("No channels found"); throw new PageException("No channels found"); } @@ -273,13 +289,13 @@ public class KissCrawler { for (Action action : actions) { try { LOG.info("Getting channel info for '" + action.getName() + "'"); - Action rightNow = action.execute().getAction("right-now"); - if (rightNow == null) { + Action tomorrow = action.execute().getAction("tomorrow"); + if (tomorrow == null) { throw new PageException("Channel summary page for '" + action.getName() + "' does not contain required information"); } - Channel channel = createChannel(action.getName(), rightNow + Channel channel = createChannel(action.getName(), tomorrow .execute(), aReport); channels.add(channel); if (SystemProperties.isDebugMode()) { @@ -319,18 +335,25 @@ public class KissCrawler { TimeInterval interval = new TimeInterval(begin, end); String description = ""; String keywords = ""; + if (!SystemProperties.isNoProgramDetailsRequired()) { - try { - Page programInfo = action.execute(); - description = programInfo.getContent().element( - "description").getText().trim(); - keywords = programInfo.getContent().element("keywords") - .getText().trim(); - } catch (PageException e) { - String msg = "Program details could not be determined for '" - + action.getName() + "'"; - aReport.addMessage(msg, e); - LOG.warn(msg, e); + Element descriptionElem = action.getContent().element( + "description"); + if (descriptionElem == null) { + try { + Page programInfo = action.execute(); + description = programInfo.getContent().element( + "description").getText().trim(); + keywords = programInfo.getContent().element( + "keywords").getText().trim(); + } catch (PageException e) { + String msg = "Program details could not be determined for '" + + action.getName() + "'"; + aReport.addMessage(msg, e); + LOG.warn(msg, e); + } + } else { + description = descriptionElem.getTextTrim(); } } Program program = new Program(aChannel, action.getName(),