import java.io.File;
import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.io.FileOutputStream;
+import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.ArrayList;
+import java.util.Date;
import java.util.List;
+import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import javax.mail.Message;
+import javax.mail.MessagingException;
+import javax.mail.Session;
+import javax.mail.Transport;
+import javax.mail.internet.AddressException;
+import javax.mail.internet.InternetAddress;
+import javax.mail.internet.MimeMessage;
+
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Element;
import org.wamblee.conditions.Condition;
-import org.wamblee.conditions.OrCondition;
import org.wamblee.crawler.Action;
import org.wamblee.crawler.Configuration;
import org.wamblee.crawler.Crawler;
import org.wamblee.crawler.Page;
+import org.wamblee.crawler.PageException;
import org.wamblee.crawler.impl.ConfigurationParser;
import org.wamblee.crawler.impl.CrawlerImpl;
*
*/
public class KissCrawler {
-
+
private static final Log LOG = LogFactory.getLog(KissCrawler.class);
private static final String LOG_FILE = "kiss.log";
private static final String START_URL = "http://epg.kml.kiss-technology.com/login_core.php";
private static final String CRAWLER_CONFIG = "config.xml";
-
+
private static final String PROGRAM_CONFIG = "programs.xml";
private static final String TIME_REGEX = "([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*";
private Pattern _pattern;
- public KissCrawler(String aStartUrl, String aCrawlerConfig, String aProgramConfig) throws Exception {
+ public KissCrawler(String aStartUrl, String aCrawlerConfig,
+ String aProgramConfig) throws IOException, AddressException,
+ MessagingException {
_pattern = Pattern.compile(TIME_REGEX);
PrintStream os = new PrintStream(fos);
try {
- ConfigurationParser parser = new ConfigurationParser(os);
- InputStream crawlerConfigFile = new FileInputStream(new File(aCrawlerConfig));
- Configuration config = parser.parse(crawlerConfigFile);
-
- InputStream programConfigFile = new FileInputStream(new File(aProgramConfig));
- Condition<Program> programCondition = new ProgramConfigurationParser().parse(programConfigFile);
-
-
HttpClient client = new HttpClient();
// client.getHostConfiguration().setProxy("localhost", 3128);
- Crawler crawler = new CrawlerImpl(client, config);
+ Crawler crawler = createCrawler(aCrawlerConfig, os, client);
- Page page = crawler.getPage(aStartUrl);
- showPage(page);
- page = page.getAction("channels-favorites").execute();
+ Page page = getStartPage(aStartUrl, crawler);
TVGuide guide = createGuide(page);
PrintVisitor printer = new PrintVisitor(System.out);
guide.accept(printer);
-
- MatchVisitor matcher = new MatchVisitor(programCondition);
- guide.accept(matcher);
- List<Program> programs = matcher.getMatches();
- for (Program program: programs) {
- System.out.println("Found: " + program + " record: " + program.record() );
- }
-
+
+ InputStream programConfigFile = new FileInputStream(new File(
+ aProgramConfig));
+ Condition<Program> programCondition = new ProgramConfigurationParser()
+ .parse(programConfigFile);
+ recordInterestingShows(programCondition, guide);
} finally {
os.flush();
os.close();
}
}
+ /**
+ * @param programCondition
+ * @param guide
+ * @throws AddressException
+ * @throws MessagingException
+ */
+ private void recordInterestingShows(Condition<Program> programCondition,
+ TVGuide guide) throws AddressException, MessagingException {
+ MatchVisitor matcher = new MatchVisitor(programCondition);
+ guide.accept(matcher);
+ List<Program> programs = matcher.getMatches();
+ String recorded = "";
+ String notRecorded = "";
+ String failures = "";
+ for (Program program : programs) {
+ try {
+ boolean result = program.record();
+ if (result) {
+ recorded += "\n" + program;
+ } else {
+ notRecorded += "\n" + program;
+ }
+ } catch (PageException e) {
+ LOG.info("Attempt to record " + program + " failed.");
+ failures += "\n" + program.toString() + ": " + e.getMessage();
+ }
+ }
+ String msg = "Summary of KiSS crawler: \n\n\n";
+
+ if (recorded.length() > 0) {
+ msg += "Recorded programs:\n\n" + recorded + "\n\n";
+ }
+ if (notRecorded.length() > 0) {
+ msg += "Not recorded programs:\n\n" + notRecorded + "\n\n";
+ }
+ if (recorded.length() == 0 && notRecorded.length() == 0) {
+ msg += "No suitable programs found";
+ }
+ if (failures.length() > 0) {
+ msg += "Failures:\n\n" + failures;
+ }
+ System.out.println(msg);
+ sendMail(msg);
+ }
+
+ /**
+ * @param aCrawlerConfig
+ * @param os
+ * @param client
+ * @return
+ * @throws FileNotFoundException
+ */
+ private Crawler createCrawler(String aCrawlerConfig, PrintStream os,
+ HttpClient client) throws FileNotFoundException {
+ ConfigurationParser parser = new ConfigurationParser(os);
+ InputStream crawlerConfigFile = new FileInputStream(new File(
+ aCrawlerConfig));
+ Configuration config = parser.parse(crawlerConfigFile);
+ Crawler crawler = new CrawlerImpl(client, config);
+ return crawler;
+ }
+
+ /**
+ * @param aStartUrl
+ * @param crawler
+ * @return
+ */
+ private Page getStartPage(String aStartUrl, Crawler crawler) {
+ try {
+ Page page = crawler.getPage(aStartUrl);
+ return page.getAction("channels-favorites").execute();
+ } catch (PageException e) {
+ throw new RuntimeException(
+ "Could not login to electronic program guide", e);
+ }
+ }
+
public static void main(String[] args) throws Exception {
new KissCrawler(START_URL, CRAWLER_CONFIG, PROGRAM_CONFIG);
}
Action[] actions = page.getActions();
List<Channel> channels = new ArrayList<Channel>();
for (Action action : actions) {
- Channel channel = createChannel(action.getName(), action.execute()
- .getAction("right-now").execute());
- channels.add(channel);
+ try {
+ LOG.info("Getting channel info for '" + action.getName() + "'");
+ Channel channel = createChannel(action.getName(), action
+ .execute().getAction("right-now").execute());
+ channels.add(channel);
+ } catch (PageException e) {
+ LOG.error("Could not create channel information for '"
+ + action.getName() + "'", e);
+ }
}
return new TVGuide(channels);
}
String time = action.getContent().element("time").getText().trim();
Matcher matcher = _pattern.matcher(time);
if (matcher.matches()) {
- Time begin = new Time(Integer.parseInt(matcher.group(1)),
- Integer.parseInt(matcher.group(2)));
- Time end = new Time(Integer.parseInt(matcher.group(3)),
- Integer.parseInt(matcher.group(4)));
+ Time begin = new Time(Integer.parseInt(matcher.group(1)),
+ Integer.parseInt(matcher.group(2)));
+ Time end = new Time(Integer.parseInt(matcher.group(3)), Integer
+ .parseInt(matcher.group(4)));
TimeInterval interval = new TimeInterval(begin, end);
- //Page programInfo = action.execute();
- //String description = programInfo.getContent().element("description").getText().trim();
- //String keywords = programInfo.getContent().element("keywords").getText().trim();
+ // Page programInfo = action.execute();
+ // String description =
+ // programInfo.getContent().element("description").getText().trim();
+ // String keywords =
+ // programInfo.getContent().element("keywords").getText().trim();
String description = "";
String keywords = "";
- Program program = new Program(aChannel, action.getName(), description, keywords, interval, action);
-
+ Program program = new Program(aChannel, action.getName(),
+ description, keywords, interval, action);
+
LOG.debug("Got program " + program);
programs.add(program);
}
}
return new Channel(aChannel, programs);
}
+
+ private void sendMail(String aText) throws AddressException,
+ MessagingException {
+ Properties props = new Properties();
+ props.put("mail.transport.protocol", "smtp");
+ props.put("mail.smtp.host", "falcon");
+ props.put("mail.smtp.port", "25");
+
+ Session mailSession = Session.getInstance(props);
+ Message message = new MimeMessage(mailSession);
+
+ message.setFrom(new InternetAddress("erik@brakkee.org"));
+ message.setRecipient(Message.RecipientType.TO, new InternetAddress(
+ "erik@brakkee.org"));
+ message.setSentDate(new Date());
+ message.setSubject("KiSS crawler update");
+ message.setText(aText);
+ Transport.send(message);
+ }
}