git://wamblee.org
/
utils
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
ed76ddf
)
(no commit message)
author
Erik Brakkee
<erik@brakkee.org>
Sat, 18 Mar 2006 11:51:51 +0000
(11:51 +0000)
committer
Erik Brakkee
<erik@brakkee.org>
Sat, 18 Mar 2006 11:51:51 +0000
(11:51 +0000)
crawler/kiss/conf/kiss/programs.xml
patch
|
blob
|
history
crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java
patch
|
blob
|
history
crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java
patch
|
blob
|
history
diff --git
a/crawler/kiss/conf/kiss/programs.xml
b/crawler/kiss/conf/kiss/programs.xml
index f6d5675d8d6096dfc0651c0307b850ec0582e4be..0fa865e9e8d41934809ae82138eed1eb2c83eb28 100644
(file)
--- a/
crawler/kiss/conf/kiss/programs.xml
+++ b/
crawler/kiss/conf/kiss/programs.xml
@@
-7,9
+7,13
@@
<program>
<action>notify</action>
<program>
<action>notify</action>
- <match field="description">(
(sci-fi)|(science fiction)
)</match>
+ <match field="description">(
sci-fi)|(science fiction
)</match>
</program>
</program>
+ <program>
+ <action>notify</action>
+ <match>(zembla)|(uur.*wolf)</match>
+ </program>
<program>
<match>star.*gate</match>
<program>
<match>star.*gate</match>
diff --git
a/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java
b/crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java
index 8fbd232dcba2c685a4c981e75416b87d8512a3bf..d302fb6e915b8e5bffcf9228837ef015a6fbd453 100644
(file)
--- a/
crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java
+++ b/
crawler/kiss/src/org/wamblee/crawler/kiss/KissCrawler.java
@@
-132,16
+132,17
@@
public class KissCrawler {
// client.getHostConfiguration().setProxy("127.0.0.1", 3128);
Crawler crawler = createCrawler(aCrawlerConfig, os, client);
// client.getHostConfiguration().setProxy("127.0.0.1", 3128);
Crawler crawler = createCrawler(aCrawlerConfig, os, client);
+ InputStream programConfigFile = new FileInputStream(new File(
+ aProgramConfig));
+ List<ProgramFilter> programFilters = new ProgramConfigurationParser()
+ .parse(programConfigFile);
Page page = getStartPage(aStartUrl, crawler);
TVGuide guide = createGuide(page);
PrintVisitor printer = new PrintVisitor(System.out);
guide.accept(printer);
Page page = getStartPage(aStartUrl, crawler);
TVGuide guide = createGuide(page);
PrintVisitor printer = new PrintVisitor(System.out);
guide.accept(printer);
- InputStream programConfigFile = new FileInputStream(new File(
- aProgramConfig));
- List<ProgramFilter> programFilters = new ProgramConfigurationParser()
- .parse(programConfigFile);
+
recordInterestingShows(programFilters, guide);
} finally {
os.flush();
recordInterestingShows(programFilters, guide);
} finally {
os.flush();
diff --git
a/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java
b/crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java
index b8d5c67863adcd9406c3bae2fc76d6f11f86b829..2c05b827233a82888c34be1452ef07e8ed5ff269 100644
(file)
--- a/
crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java
+++ b/
crawler/kiss/src/org/wamblee/crawler/kiss/ProgramConfigurationParser.java
@@
-78,8
+78,8
@@
class ProgramConfigurationParser {
if ( fieldAttribute != null ) {
fieldName = fieldAttribute.getText();
}
if ( fieldAttribute != null ) {
fieldName = fieldAttribute.getText();
}
- String pattern = ".*" + patternElem.getText()
- + ".*";
+ String pattern = ".*
(
" + patternElem.getText()
+ + "
)
.*";
regexConditions.add(new PropertyRegexCondition<Program>(fieldName, pattern, true));
}
Condition<Program> condition = new AndCondition<Program>(regexConditions);
regexConditions.add(new PropertyRegexCondition<Program>(fieldName, pattern, true));
}
Condition<Program> condition = new AndCondition<Program>(regexConditions);