X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=crawler%2Fkiss%2Fsrc%2Forg%2Fwamblee%2Fcrawler%2Fkiss%2Fmain%2FKissCrawler.java;h=c522131d8bb8b1ddb49f9040bdc4b6463dfe1d9d;hb=0119c69858055c84592338a202e9d1b18510a29b;hp=6cdaa17a55da35f9aec05a0794cb03c5e63441c5;hpb=d85bc24e068a68a54786fae5dc71573607b3b0cb;p=utils diff --git a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java index 6cdaa17a..c522131d 100644 --- a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java +++ b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java @@ -16,35 +16,21 @@ package org.wamblee.crawler.kiss.main; -import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.PrintStream; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; import java.util.List; -import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.mail.MessagingException; -import javax.mail.Session; -import javax.mail.internet.InternetAddress; -import javax.xml.transform.TransformerException; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.mail.EmailException; -import org.apache.commons.mail.HtmlEmail; -import org.apache.xml.serialize.OutputFormat; -import org.apache.xml.serialize.XMLSerializer; -import org.w3c.dom.Document; import org.wamblee.crawler.Action; import org.wamblee.crawler.Configuration; import org.wamblee.crawler.Crawler; @@ -60,8 +46,6 @@ import org.wamblee.crawler.kiss.guide.Time; import org.wamblee.crawler.kiss.guide.TimeInterval; import org.wamblee.crawler.kiss.notification.NotificationException; import org.wamblee.crawler.kiss.notification.Notifier; -import org.wamblee.io.FileResource; -import org.wamblee.xml.XSLT; /** * The KiSS crawler for automatic recording of interesting TV shows. @@ -71,11 +55,6 @@ public class KissCrawler { private static final Log LOG = LogFactory.getLog(KissCrawler.class); - /** - * Log file name for the crawler. - */ - private static final String LOG_FILE = "kiss.log"; - /** * Start URL of the electronic programme guide. */ @@ -135,29 +114,24 @@ public class KissCrawler { _pattern = Pattern.compile(TIME_REGEX); - FileOutputStream fos = new FileOutputStream(new File(LOG_FILE)); - PrintStream os = new PrintStream(fos); - try { HttpClient client = new HttpClient(); - //client.getHostConfiguration().setProxy("127.0.0.1", 3128); + // client.getHostConfiguration().setProxy("127.0.0.1", 3128); - Crawler crawler = createCrawler(aCrawlerConfig, os, client); + Crawler crawler = createCrawler(aCrawlerConfig, client); InputStream programConfigFile = new FileInputStream(new File( aProgramConfig)); ProgramConfigurationParser parser = new ProgramConfigurationParser(); parser.parse(programConfigFile); - List programFilters = parser.getFilters(); - + List programFilters = parser.getFilters(); + Page page = getStartPage(aStartUrl, crawler); TVGuide guide = createGuide(page); PrintVisitor printer = new PrintVisitor(System.out); guide.accept(printer); processResults(programFilters, guide, parser.getNotifier()); } finally { - os.flush(); - os.close(); - System.out.println("Output written on '" + LOG_FILE + "'"); + System.out.println("Crawler finished"); } } @@ -183,8 +157,8 @@ public class KissCrawler { } executor.commit(); try { - aNotifier.send(executor.getXmlReport()); - } catch (NotificationException e) { + aNotifier.send(executor.getReport()); + } catch (NotificationException e) { throw new RuntimeException(e); } } @@ -202,9 +176,9 @@ public class KissCrawler { * @throws FileNotFoundException * In case configuration files cannot be found. */ - private Crawler createCrawler(String aCrawlerConfig, PrintStream aOs, + private Crawler createCrawler(String aCrawlerConfig, HttpClient aClient) throws FileNotFoundException { - ConfigurationParser parser = new ConfigurationParser(aOs); + ConfigurationParser parser = new ConfigurationParser(); InputStream crawlerConfigFile = new FileInputStream(new File( aCrawlerConfig)); Configuration config = parser.parse(crawlerConfigFile); @@ -306,51 +280,4 @@ public class KissCrawler { } return new Channel(aChannel, programs); } - - /** - * Sends a summary mail to the user. - * - * @param aText - * Text of the mail. - * @throws MessagingException - * In case of problems sending mail. - */ - private void sendMail(ProgramActionExecutor aExecutor) throws MessagingException { - String textReport = aExecutor.getReport(); - System.out.println("Text report: \n" + textReport); - System.out.println("XML report:\n" + aExecutor.getXmlReport().asXML()); - - - Properties props = new Properties(); - props.put("mail.transport.protocol", "smtp"); - props.put("mail.smtp.host", "falcon"); - props.put("mail.smtp.port", "25"); - - Session mailSession = Session.getInstance(props); - InternetAddress from = new InternetAddress("erik@brakkee.org"); - - HtmlEmail mail = new HtmlEmail(); - mail.setMailSession(mailSession); - try { - mail.setFrom("erik@brakkee.org"); - mail.setTo(Arrays.asList(new InternetAddress[] { from })); - mail.setSentDate(new Date()); - mail.setSubject("KiSS Crawler Update"); - String html = aExecutor.getXmlReport().asXML(); - Document document = new XSLT().transform(html.getBytes(), new FileResource(new File("reportToHtml.xsl"))); - ByteArrayOutputStream xhtml = new ByteArrayOutputStream(); - XMLSerializer serializer = new XMLSerializer(xhtml, new OutputFormat()); - serializer.serialize(document); - mail.setHtmlMsg(xhtml.toString()); - mail.setTextMsg(textReport); - mail.send(); - } catch (EmailException e) { - throw new RuntimeException(e); - } catch (TransformerException e) { - throw new RuntimeException(e); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - }