X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fkiss%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2FKissCrawler.java;h=298e94435a495c3357aaff8c1a5f7e7f56930e19;hb=d4bb47fd284738756cd112b788a49caa1a9d5c38;hp=dd9ba78d1d9a16e07ce956e256ce6922eb3e0a20;hpb=1785ad1948da7bf80f07d705c968726991507376;p=utils diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java b/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java index dd9ba78d..298e9443 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java @@ -18,24 +18,36 @@ package org.wamblee.crawler.kiss; import java.io.File; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; +import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import java.util.ArrayList; +import java.util.Date; import java.util.List; +import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.mail.Message; +import javax.mail.MessagingException; +import javax.mail.Session; +import javax.mail.Transport; +import javax.mail.internet.AddressException; +import javax.mail.internet.InternetAddress; +import javax.mail.internet.MimeMessage; + import org.apache.commons.httpclient.HttpClient; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dom4j.Element; import org.wamblee.conditions.Condition; -import org.wamblee.conditions.OrCondition; import org.wamblee.crawler.Action; import org.wamblee.crawler.Configuration; import org.wamblee.crawler.Crawler; import org.wamblee.crawler.Page; +import org.wamblee.crawler.PageException; import org.wamblee.crawler.impl.ConfigurationParser; import org.wamblee.crawler.impl.CrawlerImpl; @@ -43,7 +55,7 @@ import org.wamblee.crawler.impl.CrawlerImpl; * */ public class KissCrawler { - + private static final Log LOG = LogFactory.getLog(KissCrawler.class); private static final String LOG_FILE = "kiss.log"; @@ -51,14 +63,16 @@ public class KissCrawler { private static final String START_URL = "http://epg.kml.kiss-technology.com/login_core.php"; private static final String CRAWLER_CONFIG = "config.xml"; - + private static final String PROGRAM_CONFIG = "programs.xml"; private static final String TIME_REGEX = "([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*"; private Pattern _pattern; - public KissCrawler(String aStartUrl, String aCrawlerConfig, String aProgramConfig) throws Exception { + public KissCrawler(String aStartUrl, String aCrawlerConfig, + String aProgramConfig) throws IOException, AddressException, + MessagingException { _pattern = Pattern.compile(TIME_REGEX); @@ -66,33 +80,21 @@ public class KissCrawler { PrintStream os = new PrintStream(fos); try { - ConfigurationParser parser = new ConfigurationParser(os); - InputStream crawlerConfigFile = new FileInputStream(new File(aCrawlerConfig)); - Configuration config = parser.parse(crawlerConfigFile); - - InputStream programConfigFile = new FileInputStream(new File(aProgramConfig)); - Condition programCondition = new ProgramConfigurationParser().parse(programConfigFile); - - HttpClient client = new HttpClient(); // client.getHostConfiguration().setProxy("localhost", 3128); - Crawler crawler = new CrawlerImpl(client, config); + Crawler crawler = createCrawler(aCrawlerConfig, os, client); - Page page = crawler.getPage(aStartUrl); - showPage(page); - page = page.getAction("channels-favorites").execute(); + Page page = getStartPage(aStartUrl, crawler); TVGuide guide = createGuide(page); PrintVisitor printer = new PrintVisitor(System.out); guide.accept(printer); - - MatchVisitor matcher = new MatchVisitor(programCondition); - guide.accept(matcher); - List programs = matcher.getMatches(); - for (Program program: programs) { - System.out.println("Found: " + program + " record: " + program.record() ); - } - + + InputStream programConfigFile = new FileInputStream(new File( + aProgramConfig)); + Condition programCondition = new ProgramConfigurationParser() + .parse(programConfigFile); + recordInterestingShows(programCondition, guide); } finally { os.flush(); os.close(); @@ -100,6 +102,83 @@ public class KissCrawler { } } + /** + * @param programCondition + * @param guide + * @throws AddressException + * @throws MessagingException + */ + private void recordInterestingShows(Condition programCondition, + TVGuide guide) throws AddressException, MessagingException { + MatchVisitor matcher = new MatchVisitor(programCondition); + guide.accept(matcher); + List programs = matcher.getMatches(); + String recorded = ""; + String notRecorded = ""; + String failures = ""; + for (Program program : programs) { + try { + boolean result = program.record(); + if (result) { + recorded += "\n" + program; + } else { + notRecorded += "\n" + program; + } + } catch (PageException e) { + LOG.info("Attempt to record " + program + " failed."); + failures += "\n" + program.toString() + ": " + e.getMessage(); + } + } + String msg = "Summary of KiSS crawler: \n\n\n"; + + if (recorded.length() > 0) { + msg += "Recorded programs:\n\n" + recorded + "\n\n"; + } + if (notRecorded.length() > 0) { + msg += "Not recorded programs:\n\n" + notRecorded + "\n\n"; + } + if (recorded.length() == 0 && notRecorded.length() == 0) { + msg += "No suitable programs found"; + } + if (failures.length() > 0) { + msg += "Failures:\n\n" + failures; + } + System.out.println(msg); + sendMail(msg); + } + + /** + * @param aCrawlerConfig + * @param os + * @param client + * @return + * @throws FileNotFoundException + */ + private Crawler createCrawler(String aCrawlerConfig, PrintStream os, + HttpClient client) throws FileNotFoundException { + ConfigurationParser parser = new ConfigurationParser(os); + InputStream crawlerConfigFile = new FileInputStream(new File( + aCrawlerConfig)); + Configuration config = parser.parse(crawlerConfigFile); + Crawler crawler = new CrawlerImpl(client, config); + return crawler; + } + + /** + * @param aStartUrl + * @param crawler + * @return + */ + private Page getStartPage(String aStartUrl, Crawler crawler) { + try { + Page page = crawler.getPage(aStartUrl); + return page.getAction("channels-favorites").execute(); + } catch (PageException e) { + throw new RuntimeException( + "Could not login to electronic program guide", e); + } + } + public static void main(String[] args) throws Exception { new KissCrawler(START_URL, CRAWLER_CONFIG, PROGRAM_CONFIG); } @@ -118,9 +197,15 @@ public class KissCrawler { Action[] actions = page.getActions(); List channels = new ArrayList(); for (Action action : actions) { - Channel channel = createChannel(action.getName(), action.execute() - .getAction("right-now").execute()); - channels.add(channel); + try { + LOG.info("Getting channel info for '" + action.getName() + "'"); + Channel channel = createChannel(action.getName(), action + .execute().getAction("right-now").execute()); + channels.add(channel); + } catch (PageException e) { + LOG.error("Could not create channel information for '" + + action.getName() + "'", e); + } } return new TVGuide(channels); } @@ -133,22 +218,44 @@ public class KissCrawler { String time = action.getContent().element("time").getText().trim(); Matcher matcher = _pattern.matcher(time); if (matcher.matches()) { - Time begin = new Time(Integer.parseInt(matcher.group(1)), - Integer.parseInt(matcher.group(2))); - Time end = new Time(Integer.parseInt(matcher.group(3)), - Integer.parseInt(matcher.group(4))); + Time begin = new Time(Integer.parseInt(matcher.group(1)), + Integer.parseInt(matcher.group(2))); + Time end = new Time(Integer.parseInt(matcher.group(3)), Integer + .parseInt(matcher.group(4))); TimeInterval interval = new TimeInterval(begin, end); - //Page programInfo = action.execute(); - //String description = programInfo.getContent().element("description").getText().trim(); - //String keywords = programInfo.getContent().element("keywords").getText().trim(); + // Page programInfo = action.execute(); + // String description = + // programInfo.getContent().element("description").getText().trim(); + // String keywords = + // programInfo.getContent().element("keywords").getText().trim(); String description = ""; String keywords = ""; - Program program = new Program(aChannel, action.getName(), description, keywords, interval, action); - + Program program = new Program(aChannel, action.getName(), + description, keywords, interval, action); + LOG.debug("Got program " + program); programs.add(program); } } return new Channel(aChannel, programs); } + + private void sendMail(String aText) throws AddressException, + MessagingException { + Properties props = new Properties(); + props.put("mail.transport.protocol", "smtp"); + props.put("mail.smtp.host", "falcon"); + props.put("mail.smtp.port", "25"); + + Session mailSession = Session.getInstance(props); + Message message = new MimeMessage(mailSession); + + message.setFrom(new InternetAddress("erik@brakkee.org")); + message.setRecipient(Message.RecipientType.TO, new InternetAddress( + "erik@brakkee.org")); + message.setSentDate(new Date()); + message.setSubject("KiSS crawler update"); + message.setText(aText); + Transport.send(message); + } }