X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fkiss%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2FKissCrawler.java;h=0549e48f3a8a99910512c8ca125f574a021d027c;hb=c0da3814aaa1e707d253202ceb44fa745c671de8;hp=8fbd232dcba2c685a4c981e75416b87d8512a3bf;hpb=45954ad938187b0c16d18c763ccfd43a349cb862;p=utils diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java b/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java index 8fbd232d..0549e48f 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java @@ -16,6 +16,7 @@ package org.wamblee.crawler.kiss; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -24,12 +25,10 @@ import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.Date; -import java.util.EnumMap; import java.util.List; import java.util.Properties; -import java.util.Set; -import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -39,10 +38,16 @@ import javax.mail.Session; import javax.mail.Transport; import javax.mail.internet.InternetAddress; import javax.mail.internet.MimeMessage; +import javax.xml.transform.TransformerException; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.commons.mail.EmailException; +import org.apache.commons.mail.HtmlEmail; +import org.apache.xml.serialize.OutputFormat; +import org.apache.xml.serialize.XMLSerializer; +import org.w3c.dom.Document; import org.wamblee.crawler.Action; import org.wamblee.crawler.Configuration; import org.wamblee.crawler.Crawler; @@ -50,7 +55,8 @@ import org.wamblee.crawler.Page; import org.wamblee.crawler.PageException; import org.wamblee.crawler.impl.ConfigurationParser; import org.wamblee.crawler.impl.CrawlerImpl; -import org.wamblee.crawler.kiss.Program.RecordingResult; +import org.wamblee.io.FileResource; +import org.wamblee.xml.XSLT; /** * The KiSS crawler for automatic recording of interesting TV shows. @@ -132,17 +138,16 @@ public class KissCrawler { // client.getHostConfiguration().setProxy("127.0.0.1", 3128); Crawler crawler = createCrawler(aCrawlerConfig, os, client); + InputStream programConfigFile = new FileInputStream(new File( + aProgramConfig)); + List programFilters = new ProgramConfigurationParser() + .parse(programConfigFile); Page page = getStartPage(aStartUrl, crawler); TVGuide guide = createGuide(page); PrintVisitor printer = new PrintVisitor(System.out); guide.accept(printer); - - InputStream programConfigFile = new FileInputStream(new File( - aProgramConfig)); - List programFilters = new ProgramConfigurationParser() - .parse(programConfigFile); - recordInterestingShows(programFilters, guide); + processResults(programFilters, guide); } finally { os.flush(); os.close(); @@ -160,79 +165,18 @@ public class KissCrawler { * @throws MessagingException * In case of problems sending a summary mail. */ - private void recordInterestingShows(List aProgramCondition, + private void processResults(List aProgramCondition, TVGuide aGuide) throws MessagingException { - - Set showsToRecord = new TreeSet(new Program.TimeSorter()); - Set interestingShows = new TreeSet(new Program.TimeSorter()); - + ProgramActionExecutor executor = new ProgramActionExecutor(); for (ProgramFilter filter : aProgramCondition) { - List programs = filter.apply(aGuide); - switch (filter.getAction()) { - case RECORD: { - for (Program program: programs) { - showsToRecord.add(program); - } - break; - } - case NOTIFY: { - for (Program program: programs) { - if ( program.isRecordingPossible()) { - interestingShows.add(program); - } - } - break; - } - default: { - throw new RuntimeException("Unknown action '" + filter.getAction() + "'"); - } + List programs = filter.apply(aGuide); + ProgramAction action = filter.getAction(); + for (Program program : programs) { + action.execute(program, executor); } } - - EnumMap> messages = recordShows(showsToRecord); - - String msg = "Summary of KiSS crawler: \n\n\n"; - - for (RecordingResult result : RecordingResult.values()) { - if (messages.get(result).size() > 0) { - msg += result.getDescription() + "\n\n"; - for (Program program : messages.get(result)) { - msg += program + "\n\n"; - } - } - } - - if ( interestingShows.size() > 0 ) { - msg += "Possibly interesting shows:\n\n"; - for (Program program: interestingShows) { - msg += program + "\n\n"; - } - } - if (showsToRecord.size() + interestingShows.size() == 0) { - msg += "No suitable programs found"; - } - - System.out.println(msg); - sendMail(msg); - } - - /** - * Records shows. - * @param showsToRecord Shows to record. - * @return Recording results. - */ - private EnumMap> recordShows(Set showsToRecord) { - EnumMap> messages = new EnumMap>( - RecordingResult.class); - for (RecordingResult result : RecordingResult.values()) { - messages.put(result, new ArrayList()); - } - - for (Program program : showsToRecord) { - Program.RecordingResult result = program.record(); - messages.get(result).add(program); - } - return messages; + executor.commit(); + sendMail(executor); } /** @@ -338,15 +282,15 @@ public class KissCrawler { keywords = programInfo.getContent().element("keywords") .getText().trim(); } catch (PageException e) { - LOG - .warn("Program details coul dnot be determined for '" - + action.getName() + "'"); + LOG.warn( + "Program details could not be determined for '" + + action.getName() + "'", e); } } Program program = new Program(aChannel, action.getName(), description, keywords, interval, action); - LOG.debug("Got program " + program); + LOG.info("Got program " + program); programs.add(program); } } @@ -361,22 +305,42 @@ public class KissCrawler { * @throws MessagingException * In case of problems sending mail. */ - private void sendMail(String aText) throws MessagingException { + private void sendMail(ProgramActionExecutor aExecutor) throws MessagingException { + String textReport = aExecutor.getReport(); + System.out.println("Text report: \n" + textReport); + System.out.println("XML report:\n" + aExecutor.getXmlReport().asXML()); + + Properties props = new Properties(); props.put("mail.transport.protocol", "smtp"); props.put("mail.smtp.host", "falcon"); props.put("mail.smtp.port", "25"); Session mailSession = Session.getInstance(props); - Message message = new MimeMessage(mailSession); - - message.setFrom(new InternetAddress("erik@brakkee.org")); - message.setRecipient(Message.RecipientType.TO, new InternetAddress( - "erik@brakkee.org")); - message.setSentDate(new Date()); - message.setSubject("KiSS crawler update"); - message.setText(aText); - Transport.send(message); + InternetAddress from = new InternetAddress("erik@brakkee.org"); + + HtmlEmail mail = new HtmlEmail(); + mail.setMailSession(mailSession); + try { + mail.setFrom("erik@brakkee.org"); + mail.setTo(Arrays.asList(new InternetAddress[] { from })); + mail.setSentDate(new Date()); + mail.setSubject("KiSS Crawler Update"); + String html = aExecutor.getXmlReport().asXML(); + Document document = new XSLT().transform(html.getBytes(), new FileResource(new File("reportToHtml.xsl"))); + ByteArrayOutputStream xhtml = new ByteArrayOutputStream(); + XMLSerializer serializer = new XMLSerializer(xhtml, new OutputFormat()); + serializer.serialize(document); + mail.setHtmlMsg(xhtml.toString()); + mail.setTextMsg(textReport); + mail.send(); + } catch (EmailException e) { + throw new RuntimeException(e); + } catch (TransformerException e) { + throw new RuntimeException(e); + } catch (IOException e) { + throw new RuntimeException(e); + } } }