<classpathentry output="support/bin" kind="src" path="support/src"/>
<classpathentry output="support/testbin" kind="src" path="support/test"/>
<classpathentry kind="lib" path="crawler/basic/lib/external/commons-httpclient-3.0.jar"/>
- <classpathentry kind="lib" path="crawler/basic/lib/external/commons-logging-1.0.2.jar"/>
- <classpathentry kind="lib" path="crawler/basic/lib/external/dom4j-1.6.jar"/>
<classpathentry kind="lib" path="crawler/basic/lib/external/jtidy-4aug2000r7-dev.jar"/>
- <classpathentry kind="lib" path="crawler/basic/lib/external/log4j-1.2.9.jar"/>
<classpathentry kind="lib" path="crawler/basic/lib/test/dbunit-2.1.jar"/>
- <classpathentry kind="lib" path="crawler/basic/lib/test/emma_ant-2.0.5312.jar"/>
- <classpathentry kind="lib" path="crawler/basic/lib/test/emma-2.0.5312.jar"/>
- <classpathentry kind="lib" path="crawler/basic/lib/test/jdbc2_0-stdext.jar"/>
<classpathentry kind="lib" path="crawler/basic/lib/test/jmock-1.0.1.jar"/>
<classpathentry kind="lib" path="crawler/basic/lib/test/jmock-cglib-1.0.1.jar"/>
<classpathentry kind="lib" path="crawler/basic/lib/test/jta.jar"/>
<classpathentry kind="lib" path="crawler/basic/lib/test/junit-3.8.1.jar"/>
<classpathentry kind="lib" path="crawler/kiss/lib/external/commons-httpclient-3.0.jar"/>
- <classpathentry kind="lib" path="crawler/kiss/lib/external/commons-logging-1.0.2.jar"/>
- <classpathentry kind="lib" path="crawler/kiss/lib/external/dom4j-1.6.jar"/>
<classpathentry kind="lib" path="crawler/kiss/lib/external/jtidy-4aug2000r7-dev.jar"/>
- <classpathentry kind="lib" path="crawler/kiss/lib/external/log4j-1.2.9.jar"/>
<classpathentry kind="lib" path="crawler/kiss/lib/test/dbunit-2.1.jar"/>
- <classpathentry kind="lib" path="crawler/kiss/lib/test/emma_ant-2.0.5312.jar"/>
- <classpathentry kind="lib" path="crawler/kiss/lib/test/emma-2.0.5312.jar"/>
- <classpathentry kind="lib" path="crawler/kiss/lib/test/jdbc2_0-stdext.jar"/>
<classpathentry kind="lib" path="crawler/kiss/lib/test/jmock-1.0.1.jar"/>
<classpathentry kind="lib" path="crawler/kiss/lib/test/jmock-cglib-1.0.1.jar"/>
<classpathentry kind="lib" path="crawler/kiss/lib/test/jta.jar"/>
<classpathentry kind="lib" path="lib/special/hibernate/antlr-2.7.5H3.jar"/>
<classpathentry kind="lib" path="lib/special/hibernate/asm.jar"/>
<classpathentry kind="lib" path="lib/special/hibernate/asm-attrs.jar"/>
- <classpathentry kind="lib" path="lib/special/test/jdbc2_0-stdext.jar"/>
<classpathentry kind="lib" path="lib/special/test/jta.jar"/>
- <classpathentry kind="lib" path="support/lib/external/commons-logging-1.0.2.jar"/>
<classpathentry kind="lib" path="support/lib/external/dom4j-1.6.jar"/>
<classpathentry kind="lib" path="support/lib/external/ehcache-1.1.jar"/>
<classpathentry kind="lib" path="support/lib/external/log4j-1.2.9.jar"/>
<classpathentry kind="lib" path="support/lib/test/asm-attrs.jar"/>
<classpathentry kind="lib" path="support/lib/test/cglib-2.1.jar"/>
<classpathentry kind="lib" path="support/lib/test/dbunit-2.1.jar"/>
- <classpathentry kind="lib" path="support/lib/test/emma_ant-2.0.5312.jar"/>
- <classpathentry kind="lib" path="support/lib/test/emma-2.0.5312.jar"/>
<classpathentry kind="lib" path="support/lib/test/hibernate-3.0.5.jar"/>
- <classpathentry kind="lib" path="support/lib/test/jdbc2_0-stdext.jar"/>
<classpathentry kind="lib" path="support/lib/test/jmock-1.0.1.jar"/>
<classpathentry kind="lib" path="support/lib/test/jmock-cglib-1.0.1.jar"/>
<classpathentry kind="lib" path="support/lib/test/jta.jar"/>
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
-import java.io.PrintStream;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
private String _xslt;
- private PrintStream _os;
-
/**
* Constructs the request.
*
* Request parameters to use.
* @param aXslt
* XSLT used to convert the response.
- * @param aOs
- * Output stream for logging (if null then no logging is done).
*/
protected AbstractPageRequest(int aMaxTries, int aMaxDelay,
- NameValuePair[] aParams, String aXslt, PrintStream aOs) {
+ NameValuePair[] aParams, String aXslt) {
if (aParams == null) {
throw new IllegalArgumentException("aParams is null");
}
_maxDelay = aMaxDelay;
_params = aParams;
_xslt = aXslt;
- _os = aOs;
}
/*
Document transformed = new XSLT().transform(xhtmlData,
new FileResource(new File(_xslt)));
- _os.println("Transformed result is: ");
+ ByteArrayOutputStream os = new ByteArrayOutputStream();
Transformer transformer = TransformerFactory.newInstance()
.newTransformer();
transformer.setParameter(OutputKeys.INDENT, "yes");
transformer.setParameter(OutputKeys.METHOD, "xml");
transformer.transform(new DOMSource(transformed), new StreamResult(
- _os));
-
+ os));
+ LOG.debug("Transformed result is \n" + os.toString());
return transformed;
} catch (TransformerConfigurationException e) {
throw new RuntimeException(e.getMessage(), e);
tidy.setXHTML(true);
tidy.setQuiet(true);
tidy.setShowWarnings(false);
- if (_os != null) {
- _os.println("Content of '" + aMethod.getURI() + "'");
- _os.println();
- }
+
// We write the jtidy output to XML since the DOM tree it produces is
// not namespace aware and namespace awareness is required by XSLT.
// An alternative is to configure namespace awareness of the XML parser
// in a system wide way.
- Document w3cDoc = tidy.parseDOM(aMethod.getResponseBodyAsStream(), _os);
+ ByteArrayOutputStream os = new ByteArrayOutputStream();
+ Document w3cDoc = tidy.parseDOM(aMethod.getResponseBodyAsStream(), os);
DOMUtility.removeDuplicateAttributes(w3cDoc);
+ LOG.debug("Content of response is \n" + os.toString());
ByteArrayOutputStream xhtml = new ByteArrayOutputStream();
XMLSerializer serializer = new XMLSerializer(xhtml, new OutputFormat());
serializer.serialize(w3cDoc);
xhtml.flush();
- if (_os != null) {
- _os.println();
- }
+
return xhtml.toByteArray();
}
package org.wamblee.crawler;
import java.io.IOException;
-import java.io.PrintStream;
import javax.xml.transform.TransformerException;
* @param aXslt XSLT to use.
*/
public GetPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, String aXslt) {
- super(aMaxTries, aMaxDelay, aParams, aXslt, null);
+ super(aMaxTries, aMaxDelay, aParams, aXslt);
}
-
- /**
- * Constructs the request.
- * @param aMaxTries Maximum number of retries.
- * @param aMaxDelay Maximum delay before executing the request.
- * @param aParams Request parameters to use.
- * @param aXslt XSLT to use.
- * @param aOs Logging output stream to use.
- */
- public GetPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, String aXslt, PrintStream aOs) {
- super(aMaxTries, aMaxDelay, aParams, aXslt, aOs);
- }
-
+
/*
* (non-Javadoc)
*
package org.wamblee.crawler;
import java.io.IOException;
-import java.io.PrintStream;
import javax.xml.transform.TransformerException;
* @param aXslt XSLT to use.
*/
public PostPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, String aXslt) {
- super(aMaxTries, aMaxDelay, aParams, aXslt, null);
- }
-
- /**
- * Constructs the request.
- * @param aMaxTries Maximum number of retries.
- * @param aMaxDelay Maximum delay before executing the request.
- * @param aParams Request parameters to use.
- * @param aXslt XSLT to use.
- * @param aOs Logging output stream to use.
- */
- public PostPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, String aXslt,
- PrintStream aOs) {
- super(aMaxTries, aMaxDelay, aParams, aXslt, aOs);
+ super(aMaxTries, aMaxDelay, aParams, aXslt);
}
/*
PageRequest request;
if (METHOD_POST.equals(method)) {
request = new PostPageRequest(MAX_TRIES, MAX_DELAY, paramsArray,
- xslt, _os);
+ xslt);
} else if (METHOD_GET.equals(method) || method == null) {
request = new GetPageRequest(MAX_TRIES, MAX_DELAY, paramsArray,
- xslt, _os);
+ xslt);
} else {
throw new RuntimeException("Unknown request method '" + method
+ "'. Only " + METHOD_GET + " and " + METHOD_POST
<programs>
-
+
<program>
+ <category>horror</category>
<action>notify</action>
<match field="description">horror</match>
</program>
<program>
+ <category>films</category>
<action>notify</action>
<match field="keywords">film</match>
<match field="description">horror|actie|thriller</match>
</program>
<program>
+ <category>science fiction</category>
<action>notify</action>
<match field="description">(sci-fi)|(science fiction)</match>
</program>
<program>
+ <category>documentaires</category>
<action>notify</action>
<match>(zembla)|(uur.*wolf)</match>
</program>
</program>
<program>
+ <action>notify</action>
<match>brainiac</match>
</program>
import java.util.Date;
import java.util.List;
import java.util.Properties;
-import java.util.Set;
-import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
*/
public class Program {
+ /**
+ * Lexicographical comparison of programs based on (time, title, channel).
+ *
+ */
public static class TimeSorter implements Comparator<Program> {
/* (non-Javadoc)
* @see java.util.Comparator#compare(T, T)
*/
public int compare(Program o1, Program o2) {
- return o1.getInterval().getBegin().compareTo(o2.getInterval().getBegin());
+ int value = o1.getInterval().getBegin().compareTo(o2.getInterval().getBegin());
+ if ( value != 0 ) {
+ return value;
+ }
+ value = o1.getName().compareTo(o2.getName());
+ if (value != 0 ) {
+ return value;
+ }
+ return o1.getChannel().compareTo(o2.getChannel());
}
}
*/
public RecordingResult record() {
LOG.info("Recording " + this);
+ if ( SystemProperties.isRecordDisabled() ) {
+ return RecordingResult.OK;
+ }
try {
Action record = _programInfo.execute().getAction(RECORD_ACTION);
if (record == null) {
package org.wamblee.crawler.kiss;
-import java.util.ArrayList;
import java.util.EnumMap;
-import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
private static final String ELEM_PATTERN = "match";
private static final String ELEM_ACTION = "action";
+
+ private static final String ELEM_CATEGORY = "category";
private static final String ACTION_NOTIFY = "notify";
for (Iterator i = root.elementIterator(ELEM_PROGRAM); i.hasNext();) {
Element program = (Element) i.next();
+ Element categoryElem = program.element(ELEM_CATEGORY);
+ String category = "";
+ if ( categoryElem != null ) {
+ category = categoryElem.getText().trim();
+ }
+
Element actionElem = program.element(ELEM_ACTION);
ProgramAction action = new RecordProgramAction();
if (actionElem != null) {
if (actionElem.getText().equals(ACTION_NOTIFY)) {
- action = new InterestingProgramAction("");
+ action = new InterestingProgramAction(category);
}
}
+
List<Condition<Program>> regexConditions =
new ArrayList<Condition<Program>>();
for (Iterator j = program.elementIterator(ELEM_PATTERN); j.hasNext(); ) {
private static final String DEBUG_PROPERTY = "kiss.debug";
private static final String NO_PROGRAM_DETAILS = "kiss.nodetails";
+ private static final String DISABLE_RECORD = "kiss.norecord";
/**
* Disabled constructor.
public static boolean isNoProgramDetailsRequired() {
return System.getProperties().getProperty(NO_PROGRAM_DETAILS) != null;
}
+
+
+ /**
+ * Determines if recording is disabled.
+ * @return True iff no recording should be done.
+ */
+ public static boolean isRecordDisabled() {
+ return System.getProperties().getProperty(DISABLE_RECORD) != null;
+ }
+
}