fc41cde40aae4340b1f338f523a875433fdcc875
[utils] / crawler / kiss / src / org / wamblee / crawler / kiss / main / ProgramConfigurationParser.java
1 /*
2  * Copyright 2005 the original author or authors.
3  * 
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  * 
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  * 
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 package org.wamblee.crawler.kiss.main;
18
19 import java.io.InputStream;
20 import java.util.ArrayList;
21 import java.util.Iterator;
22 import java.util.List;
23
24 import org.dom4j.Attribute;
25 import org.dom4j.Document;
26 import org.dom4j.DocumentException;
27 import org.dom4j.Element;
28 import org.dom4j.io.SAXReader;
29 import org.wamblee.conditions.AndCondition;
30 import org.wamblee.conditions.Condition;
31 import org.wamblee.conditions.PropertyRegexCondition;
32 import org.wamblee.crawler.kiss.guide.Program;
33 import org.wamblee.xml.XslTransformer;
34
35 /**
36  * Parse the configuration of desired programs.
37  */
38 class ProgramConfigurationParser {
39     private static final int DEFAULT_SMTP_PORT = 25;
40     
41     private static final int DEFAULT_PRIORITY = 1; 
42
43     // Formatting configuration.
44     private static final String ELEM_FORMAT = "format";
45
46     private static final String ELEM_TEXT = "text";
47
48     private static final String ELEM_HTML = "html";
49
50     // Configuration of interesting programs.
51
52     private static final String ELEM_PROGRAM = "program";
53     
54     private static final String ELEM_PRIORITY = "priority";
55
56     private static final String ELEM_PATTERN = "match";
57
58     private static final String ELEM_ACTION = "action";
59
60     private static final String ELEM_CATEGORY = "category";
61
62     private static final String ACTION_NOTIFY = "notify";
63
64     private List<ProgramFilter> _filters;
65     
66     private XslTransformer _transformer;
67
68     ProgramConfigurationParser(XslTransformer aTransformer) {
69         _filters = null;
70         _transformer = aTransformer;
71     }
72
73     /**
74      * Parses the condition used to match the desired programs.
75      * 
76      * @param aStream
77      *            Input stream to parse from.
78      * @return Condition.
79      */
80     void parse(InputStream aStream) {
81         List<ProgramFilter> filters = new ArrayList<ProgramFilter>();
82         try {
83             SAXReader reader = new SAXReader();
84             Document document = reader.read(aStream);
85
86             Element root = document.getRootElement();
87
88             for (Iterator i = root.elementIterator(ELEM_PROGRAM); i.hasNext();) {
89                 Element program = (Element) i.next();
90
91                 Element categoryElem = program.element(ELEM_CATEGORY);
92                 String category = "";
93                 if (categoryElem != null) {
94                     category = categoryElem.getText().trim();
95                 }
96
97                 Element actionElem = program.element(ELEM_ACTION);
98                 int priority = DEFAULT_PRIORITY; 
99                 String priorityString = program.elementTextTrim(ELEM_PRIORITY);
100                 if ( priorityString != null ) { 
101                     priority = Integer.valueOf(priorityString);
102                 }
103                 ProgramAction action = new RecordProgramAction(priority);
104                 if (actionElem != null) {
105                     if (actionElem.getText().equals(ACTION_NOTIFY)) {
106                         action = new InterestingProgramAction(category);
107                     }
108                 }
109
110                 List<Condition<Program>> regexConditions = new ArrayList<Condition<Program>>();
111                 for (Iterator j = program.elementIterator(ELEM_PATTERN); j
112                         .hasNext();) {
113                     Element patternElem = (Element) j.next();
114                     String fieldName = "name";
115                     Attribute fieldAttribute = patternElem.attribute("field");
116                     if (fieldAttribute != null) {
117                         fieldName = fieldAttribute.getText();
118                     }
119                     String pattern = ".*(" + patternElem.getText() + ").*";
120                     regexConditions.add(new PropertyRegexCondition<Program>(
121                             fieldName, pattern, true));
122                 }
123                 Condition<Program> condition = new AndCondition<Program>(
124                         regexConditions);
125                 filters.add(new ProgramFilter(condition, action));
126             }
127             _filters = filters;
128         } catch (DocumentException e) {
129             throw new RuntimeException("Error parsing program configuraiton", e);
130         }
131     }
132
133     /**
134      * Returns the list of program filters.
135      * 
136      * @return Filter list.
137      */
138     public List<ProgramFilter> getFilters() {
139         return _filters;
140     }
141 }