X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fkiss%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2Fmain%2FKissCrawler.java;h=3191ff24e627cffdd5d0eab3321e106c090a3ba9;hb=e359f7995fcafb2ce18576be6f21345f9f7b1482;hp=b0be9ef6cede2918bbf842ae98617f6a0d87afc5;hpb=35f10e8a91e1660e23de1077bcbc9a5fbcaf8967;p=utils diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java index b0be9ef6..3191ff24 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java @@ -29,6 +29,7 @@ import java.util.regex.Pattern; import javax.mail.MessagingException; import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.wamblee.crawler.Action; @@ -61,7 +62,7 @@ public class KissCrawler { /** * Start URL of the electronic programme guide. */ - private static final String START_URL = "http://epg.kml.kiss-technology.com/login_core.php"; + private static final String START_URL = "http://epg.kml.kiss-technology.com/login.php"; /** * Default socket timeout to use. @@ -72,7 +73,7 @@ public class KissCrawler { * Regular expression for matching time interval strings in the retrieved * pages. */ - private static final String TIME_REGEX = "([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*"; + private static final String TIME_REGEX = "[^0-9]*([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*"; /** * Compiled pattern for the time regular expression. @@ -153,8 +154,7 @@ public class KissCrawler { Crawler crawler = createCrawler(aCrawlerConfig, client, transformer); InputStream programConfigFile = new FileInputStream(new File( aProgramConfig)); - ProgramConfigurationParser parser = new ProgramConfigurationParser( - transformer); + ProgramConfigurationParser parser = new ProgramConfigurationParser(); parser.parse(programConfigFile); List programFilters = parser.getFilters(); @@ -238,7 +238,8 @@ public class KissCrawler { private Page getStartPage(String aStartUrl, Crawler aCrawler, Report aReport) throws PageException { try { - Page page = aCrawler.getPage(aStartUrl); + Page page = aCrawler.getPage(aStartUrl, new NameValuePair[0]); + page = page.getAction("login").execute(); Action favorites = page.getAction("channels-favorites"); if (favorites == null) { String msg = "Channels favorites action not found on start page"; @@ -259,10 +260,15 @@ public class KissCrawler { * @param aReport * Report to use. * @return TV guide. + * @throws PageException In case of problem getting the tv guide. */ - private TVGuide createGuide(Page aPage, Report aReport) { + private TVGuide createGuide(Page aPage, Report aReport) throws PageException { LOG.info("Obtaining full TV guide"); Action[] actions = aPage.getActions(); + if ( actions.length == 0 ) { + LOG.error("No channels found"); + throw new PageException("No channels found"); + } List channels = new ArrayList(); for (Action action : actions) { try {