2 * Copyright 2005 the original author or authors.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package org.wamblee.crawler.kiss;
20 import java.io.FileInputStream;
21 import java.io.FileNotFoundException;
22 import java.io.FileOutputStream;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.PrintStream;
26 import java.util.ArrayList;
27 import java.util.Date;
28 import java.util.EnumMap;
29 import java.util.List;
30 import java.util.Properties;
32 import java.util.TreeSet;
33 import java.util.regex.Matcher;
34 import java.util.regex.Pattern;
36 import javax.mail.Message;
37 import javax.mail.MessagingException;
38 import javax.mail.Session;
39 import javax.mail.Transport;
40 import javax.mail.internet.InternetAddress;
41 import javax.mail.internet.MimeMessage;
43 import org.apache.commons.httpclient.HttpClient;
44 import org.apache.commons.logging.Log;
45 import org.apache.commons.logging.LogFactory;
46 import org.wamblee.crawler.Action;
47 import org.wamblee.crawler.Configuration;
48 import org.wamblee.crawler.Crawler;
49 import org.wamblee.crawler.Page;
50 import org.wamblee.crawler.PageException;
51 import org.wamblee.crawler.impl.ConfigurationParser;
52 import org.wamblee.crawler.impl.CrawlerImpl;
53 import org.wamblee.crawler.kiss.Program.RecordingResult;
56 * The KiSS crawler for automatic recording of interesting TV shows.
59 public class KissCrawler {
61 private static final Log LOG = LogFactory.getLog(KissCrawler.class);
64 * Log file name for the crawler.
66 private static final String LOG_FILE = "kiss.log";
69 * Start URL of the electronic programme guide.
71 private static final String START_URL = "http://epg.kml.kiss-technology.com/login_core.php";
74 * Crawler configuration file.
76 private static final String CRAWLER_CONFIG = "config.xml";
79 * Configuration file describing interesting programs.
81 private static final String PROGRAM_CONFIG = "programs.xml";
84 * Regular expression for matching time interval strings in the retrieved
87 private static final String TIME_REGEX = "([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*";
90 * Compiled pattern for the time regular expression.
92 private Pattern _pattern;
95 * Runs the KiSS crawler.
98 * Arguments, currently all ignored because they are hardcoded.
100 * In case of problems.
102 public static void main(String[] aArgs) throws Exception {
103 new KissCrawler(START_URL, CRAWLER_CONFIG, PROGRAM_CONFIG);
107 * Constructs the crawler. This retrieves the TV guide by crawling the KiSS
108 * EPG guide, filters the guide for interesting programs, tries to record
109 * them, and sends a summary mail to the user.
112 * Start URL of the electronic programme guide.
113 * @param aCrawlerConfig
114 * Configuration file for the crawler.
115 * @param aProgramConfig
116 * Configuration file describing interesting shows.
117 * @throws IOException
118 * In case of problems reading files.
119 * @throws MessagingException
120 * In case of problems sending a mail notification.
122 public KissCrawler(String aStartUrl, String aCrawlerConfig,
123 String aProgramConfig) throws IOException, MessagingException {
125 _pattern = Pattern.compile(TIME_REGEX);
127 FileOutputStream fos = new FileOutputStream(new File(LOG_FILE));
128 PrintStream os = new PrintStream(fos);
131 HttpClient client = new HttpClient();
132 // client.getHostConfiguration().setProxy("127.0.0.1", 3128);
134 Crawler crawler = createCrawler(aCrawlerConfig, os, client);
136 Page page = getStartPage(aStartUrl, crawler);
137 TVGuide guide = createGuide(page);
138 PrintVisitor printer = new PrintVisitor(System.out);
139 guide.accept(printer);
141 InputStream programConfigFile = new FileInputStream(new File(
143 List<ProgramFilter> programFilters = new ProgramConfigurationParser()
144 .parse(programConfigFile);
145 recordInterestingShows(programFilters, guide);
149 System.out.println("Output written on '" + LOG_FILE + "'");
154 * Records interesting shows.
156 * @param aProgramCondition
157 * Condition determining which shows are interesting.
160 * @throws MessagingException
161 * In case of problems sending a summary mail.
163 private void recordInterestingShows(List<ProgramFilter> aProgramCondition,
164 TVGuide aGuide) throws MessagingException {
166 Set<Program> showsToRecord = new TreeSet<Program>(new Program.TimeSorter());
167 Set<Program> interestingShows = new TreeSet<Program>(new Program.TimeSorter());
169 for (ProgramFilter filter : aProgramCondition) {
170 List<Program> programs = filter.apply(aGuide);
171 switch (filter.getAction()) {
173 for (Program program: programs) {
174 showsToRecord.add(program);
179 for (Program program: programs) {
180 if ( program.isRecordingPossible()) {
181 interestingShows.add(program);
187 throw new RuntimeException("Unknown action '" + filter.getAction() + "'");
192 EnumMap<RecordingResult, List<Program>> messages = recordShows(showsToRecord);
194 String msg = "Summary of KiSS crawler: \n\n\n";
196 for (RecordingResult result : RecordingResult.values()) {
197 if (messages.get(result).size() > 0) {
198 msg += result.getDescription() + "\n\n";
199 for (Program program : messages.get(result)) {
200 msg += program + "\n\n";
205 if ( interestingShows.size() > 0 ) {
206 msg += "Possibly interesting shows:\n\n";
207 for (Program program: interestingShows) {
208 msg += program + "\n\n";
211 if (showsToRecord.size() + interestingShows.size() == 0) {
212 msg += "No suitable programs found";
215 System.out.println(msg);
221 * @param showsToRecord Shows to record.
222 * @return Recording results.
224 private EnumMap<RecordingResult, List<Program>> recordShows(Set<Program> showsToRecord) {
225 EnumMap<RecordingResult, List<Program>> messages = new EnumMap<RecordingResult, List<Program>>(
226 RecordingResult.class);
227 for (RecordingResult result : RecordingResult.values()) {
228 messages.put(result, new ArrayList<Program>());
231 for (Program program : showsToRecord) {
232 Program.RecordingResult result = program.record();
233 messages.get(result).add(program);
239 * Creates the crawler.
241 * @param aCrawlerConfig
242 * Crawler configuration file.
244 * Logging output stream for the crawler.
246 * HTTP Client to use.
248 * @throws FileNotFoundException
249 * In case configuration files cannot be found.
251 private Crawler createCrawler(String aCrawlerConfig, PrintStream aOs,
252 HttpClient aClient) throws FileNotFoundException {
253 ConfigurationParser parser = new ConfigurationParser(aOs);
254 InputStream crawlerConfigFile = new FileInputStream(new File(
256 Configuration config = parser.parse(crawlerConfigFile);
257 Crawler crawler = new CrawlerImpl(aClient, config);
262 * Gets the start page of the electronic programme guide. This involves
263 * login and navigation to a suitable start page after logging in.
266 * URL of the electronic programme guide.
269 * @return Starting page.
271 private Page getStartPage(String aStartUrl, Crawler aCrawler) {
273 Page page = aCrawler.getPage(aStartUrl);
274 return page.getAction("channels-favorites").execute();
275 } catch (PageException e) {
276 throw new RuntimeException(
277 "Could not login to electronic program guide", e);
282 * Creates the TV guide by web crawling.
288 private TVGuide createGuide(Page aPage) {
289 LOG.info("Obtaining full TV guide");
290 Action[] actions = aPage.getActions();
291 List<Channel> channels = new ArrayList<Channel>();
292 for (Action action : actions) {
294 LOG.info("Getting channel info for '" + action.getName() + "'");
295 Channel channel = createChannel(action.getName(), action
296 .execute().getAction("right-now").execute());
297 channels.add(channel);
298 if (SystemProperties.isDebugMode()) {
299 break; // Only one channel is crawled.
301 } catch (PageException e) {
302 LOG.error("Could not create channel information for '"
303 + action.getName() + "'", e);
306 return new TVGuide(channels);
310 * Create channel information for a specific channel.
315 * Starting page for the channel.
318 private Channel createChannel(String aChannel, Page aPage) {
319 LOG.info("Obtaining program for " + aChannel);
320 Action[] programActions = aPage.getActions();
321 List<Program> programs = new ArrayList<Program>();
322 for (Action action : programActions) {
323 String time = action.getContent().element("time").getText().trim();
324 Matcher matcher = _pattern.matcher(time);
325 if (matcher.matches()) {
326 Time begin = new Time(Integer.parseInt(matcher.group(1)),
327 Integer.parseInt(matcher.group(2)));
328 Time end = new Time(Integer.parseInt(matcher.group(3)), Integer
329 .parseInt(matcher.group(4)));
330 TimeInterval interval = new TimeInterval(begin, end);
331 String description = "";
332 String keywords = "";
333 if (!SystemProperties.isNoProgramDetailsRequired()) {
335 Page programInfo = action.execute();
336 description = programInfo.getContent().element(
337 "description").getText().trim();
338 keywords = programInfo.getContent().element("keywords")
340 } catch (PageException e) {
342 .warn("Program details coul dnot be determined for '"
343 + action.getName() + "'");
346 Program program = new Program(aChannel, action.getName(),
347 description, keywords, interval, action);
349 LOG.debug("Got program " + program);
350 programs.add(program);
353 return new Channel(aChannel, programs);
357 * Sends a summary mail to the user.
361 * @throws MessagingException
362 * In case of problems sending mail.
364 private void sendMail(String aText) throws MessagingException {
365 Properties props = new Properties();
366 props.put("mail.transport.protocol", "smtp");
367 props.put("mail.smtp.host", "falcon");
368 props.put("mail.smtp.port", "25");
370 Session mailSession = Session.getInstance(props);
371 Message message = new MimeMessage(mailSession);
373 message.setFrom(new InternetAddress("erik@brakkee.org"));
374 message.setRecipient(Message.RecipientType.TO, new InternetAddress(
375 "erik@brakkee.org"));
376 message.setSentDate(new Date());
377 message.setSubject("KiSS crawler update");
378 message.setText(aText);
379 Transport.send(message);