git://wamblee.org
/
utils
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
(no commit message)
[utils]
/
crawler
/
kiss
/
src
/
org
/
wamblee
/
crawler
/
kiss
/
main
/
KissCrawler.java
diff --git
a/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java
b/crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java
index 5125f407d45094b4bc71698ef47c5fe375691ed0..3191ff24e627cffdd5d0eab3321e106c090a3ba9 100644
(file)
--- a/
crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java
+++ b/
crawler/kiss/src/org/wamblee/crawler/kiss/main/KissCrawler.java
@@
-29,6
+29,7
@@
import java.util.regex.Pattern;
import javax.mail.MessagingException;
import org.apache.commons.httpclient.HttpClient;
import javax.mail.MessagingException;
import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.wamblee.crawler.Action;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.wamblee.crawler.Action;
@@
-61,7
+62,7
@@
public class KissCrawler {
/**
* Start URL of the electronic programme guide.
*/
/**
* Start URL of the electronic programme guide.
*/
- private static final String START_URL = "http://epg.kml.kiss-technology.com/login
_core
.php";
+ private static final String START_URL = "http://epg.kml.kiss-technology.com/login.php";
/**
* Default socket timeout to use.
/**
* Default socket timeout to use.
@@
-72,7
+73,7
@@
public class KissCrawler {
* Regular expression for matching time interval strings in the retrieved
* pages.
*/
* Regular expression for matching time interval strings in the retrieved
* pages.
*/
- private static final String TIME_REGEX = "([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*";
+ private static final String TIME_REGEX = "
[^0-9]*
([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*";
/**
* Compiled pattern for the time regular expression.
/**
* Compiled pattern for the time regular expression.
@@
-237,7
+238,8
@@
public class KissCrawler {
private Page getStartPage(String aStartUrl, Crawler aCrawler, Report aReport)
throws PageException {
try {
private Page getStartPage(String aStartUrl, Crawler aCrawler, Report aReport)
throws PageException {
try {
- Page page = aCrawler.getPage(aStartUrl);
+ Page page = aCrawler.getPage(aStartUrl, new NameValuePair[0]);
+ page = page.getAction("login").execute();
Action favorites = page.getAction("channels-favorites");
if (favorites == null) {
String msg = "Channels favorites action not found on start page";
Action favorites = page.getAction("channels-favorites");
if (favorites == null) {
String msg = "Channels favorites action not found on start page";