From: Erik Brakkee Date: Mon, 12 Apr 2010 20:02:23 +0000 (+0000) Subject: Moved crawler to X-Git-Tag: wamblee-utils-0.7~619 X-Git-Url: http://wamblee.org/gitweb/?a=commitdiff_plain;h=2ef561a4fbf29b65335f1558bfebd74733a6ddd9;p=utils Moved crawler to https://wamblee.org/svn/public/crawler --- diff --git a/crawler/ABOUT.txt b/crawler/ABOUT.txt deleted file mode 100644 index ba9a7ad3..00000000 --- a/crawler/ABOUT.txt +++ /dev/null @@ -1,2 +0,0 @@ -This directory contains a generic web crawler (basic directory) and several useful implementations build on top of this. - diff --git a/crawler/basic/ABOUT.txt b/crawler/basic/ABOUT.txt deleted file mode 100644 index b61c613d..00000000 --- a/crawler/basic/ABOUT.txt +++ /dev/null @@ -1,9 +0,0 @@ -This is a general library for implementing a web crawler. - -The crawler works by retrieving an HTML page and transforming the HTML -(content + presentation) into content using XSLT stylesheets. Using a convention -for links in the converted content, it becomes possible to build a generic interface on the retrieved pages for navigating through the content. - -A configuration file determines how a certain page must be retrieved and transformed. - - diff --git a/crawler/basic/deps.xml b/crawler/basic/deps.xml deleted file mode 100644 index e1a94e91..00000000 --- a/crawler/basic/deps.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - diff --git a/crawler/basic/pom.xml b/crawler/basic/pom.xml deleted file mode 100644 index 8b964239..00000000 --- a/crawler/basic/pom.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - org.wamblee - wamblee-crawler - 0.2-SNAPSHOT - - - 4.0.0 - org.wamblee - wamblee-crawler-basic - jar - /crawler/basic - http://wamblee.org - - - org.wamblee - wamblee-support-general - 0.2-SNAPSHOT - - - - commons-httpclient - commons-httpclient - - - jtidy - jtidy - - - dom4j - dom4j - - - - - diff --git a/crawler/basic/src/log4j.properties b/crawler/basic/src/log4j.properties deleted file mode 100644 index ab710b36..00000000 --- a/crawler/basic/src/log4j.properties +++ /dev/null @@ -1,56 +0,0 @@ - -############################################################################################ -# Default configuration file for log4j. -# -# This properties file is used if no other configuration if log4j is done explicitly. -############################################################################################ - - -# Root logger reports everything and uses the console appender -log4j.rootLogger=ERROR, console - -# Log level for wamblee.org -log4j.logger.org.wamblee=DEBUG -log4j.logger.org.wamblee.usermgt.UserAdministrationImplTest=INFO -log4j.logger.org.wamblee.security.authorization=ERROR -log4j.logger.org.wamblee.cache=INFO - - -log4j.logger.org.springframework=ERROR -log4j.logger.net.sf.ehcache=WARN - -# Default log level for hibernate -log4j.logger.org.hibernate=ERROR -log4j.logger.org.hibernate3=ERROR - -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n - -###################################################################################### -# Hibernate SQL logging, switch the log level to DEBUG to see the output. -###################################################################################### - -log4j.logger.org.wamblee.test.SpringTestCase=ERROR, console -log4j.additivity.org.wamblee.test.SpringTestCase=false - -# Logging for queries. -log4j.logger.org.hibernate.SQL=ERROR, sql -log4j.additivity.org.hibernate.SQL=false - -# Logging for query parameters and return values. -log4j.logger.org.hibernate.type=ERROR, sqltype -log4j.additivity.org.hibernate.type=false - -# Appender for the queries -log4j.appender.sql=org.apache.log4j.ConsoleAppender -log4j.appender.sql.layout=org.apache.log4j.PatternLayout -log4j.appender.sql.layout.ConversionPattern=%n%-4r [%t] SQL: %x - %m%n - -# Appender to show the actual parameters and return values of the queries. -log4j.appender.sqltype=org.apache.log4j.ConsoleAppender -log4j.appender.sqltype.layout=org.apache.log4j.PatternLayout -log4j.appender.sqltype.layout.ConversionPattern=%-4r [%t] SQL: %x - %m%n - - - diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/AbstractPageRequest.java b/crawler/basic/src/main/java/org/wamblee/crawler/AbstractPageRequest.java deleted file mode 100644 index 6b75e48e..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/AbstractPageRequest.java +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerConfigurationException; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; - -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpMethod; -import org.apache.commons.httpclient.HttpStatus; -import org.apache.commons.httpclient.NameValuePair; -import org.apache.commons.httpclient.methods.GetMethod; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.xml.serialize.OutputFormat; -import org.apache.xml.serialize.XMLSerializer; -import org.w3c.dom.Document; -import org.w3c.tidy.Tidy; -import org.wamblee.xml.DomUtils; -import org.wamblee.xml.XslTransformer; - -/** - * General support claas for all kinds of requests. - * - * @author Erik Brakkee - */ -public abstract class AbstractPageRequest implements PageRequest { - - private static final Log LOG = LogFactory.getLog(AbstractPageRequest.class); - - private static final String REDIRECT_HEADER = "Location"; - - private int _maxTries; - - private int _maxDelay; - - private NameValuePair[] _params; - - private NameValuePair[] _headers; - - private String _xslt; - - private XslTransformer _transformer; - - /** - * Constructs the request. - * - * @param aMaxTries - * Maximum retries to perform. - * @param aMaxDelay - * Maximum delay before executing a request. - * @param aParams - * Request parameters to use. - * @param aHeaders - * Request headers to use. - * @param aXslt - * XSLT used to convert the response. - */ - protected AbstractPageRequest(int aMaxTries, int aMaxDelay, - NameValuePair[] aParams, NameValuePair[] aHeaders, String aXslt, XslTransformer aTransformer) { - if (aParams == null) { - throw new IllegalArgumentException("aParams is null"); - } - if (aHeaders == null) { - throw new IllegalArgumentException("aHeaders is null"); - } - if (aXslt == null) { - throw new IllegalArgumentException("aXslt is null"); - } - _maxTries = aMaxTries; - _maxDelay = aMaxDelay; - _params = aParams; - _headers = aHeaders; - _xslt = aXslt; - _transformer = aTransformer; - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.PageRequest#overrideXslt(java.lang.String) - */ - public void overrideXslt(String aXslt) { - _xslt = aXslt; - } - - /** - * Gets the parameters for the request. - * - * @param aParams Additional parameters to use, obtained from another page, most likely as - * hidden form fields. - * @return Request parameters. - */ - protected NameValuePair[] getParameters(NameValuePair[] aParams) { - List params = new ArrayList(); - params.addAll(Arrays.asList(_params)); - params.addAll(Arrays.asList(aParams)); - return params.toArray(new NameValuePair[0]); - } - - /** - * Gets the headers for the request. - * @return Request headers. - */ - protected NameValuePair[] getHeaders() { - return _headers; - } - - /** - * Executes the request with a random delay and with a maximum number of - * retries. - * - * @param aClient - * HTTP client to use. - * @param aMethod - * Method representing the request. - * @return XML document describing the response. - * @throws IOException - * In case of IO problems. - * @throws TransformerException - * In case transformation of the HTML to XML fails. - */ - protected Document executeMethod(HttpClient aClient, HttpMethod aMethod) - throws IOException, TransformerException { - - for (NameValuePair header: getHeaders()) { - aMethod.setRequestHeader(header.getName(), header.getValue()); - } - - int triesLeft = _maxTries; - while (triesLeft > 0) { - triesLeft--; - try { - return executeMethodWithoutRetries(aClient, aMethod); - } catch (TransformerException e) { - if (triesLeft == 0) { - throw e; - } - } - } - throw new RuntimeException("Code should never reach this point"); - } - - /** - * Executes the request without doing any retries in case XSLT - * transformation fails. - * - * @param aClient - * HTTP client to use. - * @param aMethod - * Method to execute. - * @return XML document containing the result. - * @throws IOException - * In case of IO problems. - * @throws TransformerException - * In case transformation of the result to XML fails. - */ - protected Document executeMethodWithoutRetries(HttpClient aClient, - HttpMethod aMethod) throws IOException, TransformerException { - try { - aMethod = executeWithRedirects(aClient, aMethod); - byte[] xhtmlData = getXhtml(aMethod); - - - Document transformed = _transformer.transform(xhtmlData, - _transformer.resolve(_xslt)); - ByteArrayOutputStream os = new ByteArrayOutputStream(); - Transformer transformer = TransformerFactory.newInstance() - .newTransformer(); - transformer.setParameter(OutputKeys.INDENT, "yes"); - transformer.setParameter(OutputKeys.METHOD, "xml"); - transformer.transform(new DOMSource(transformed), new StreamResult( - os)); - LOG.debug("Transformed result is \n" + os.toString()); - return transformed; - } catch (TransformerConfigurationException e) { - throw new TransformerException("Transformer configuration problem", e); - } finally { - // Release the connection. - aMethod.releaseConnection(); - } - } - - /** - * Gets the result of the HTTP method as an XHTML document. - * - * @param aMethod - * Method to invoke. - * @return XHTML as a byte array. - * @throws IOException - * In case of problems obtaining the XHTML. - */ - private byte[] getXhtml(HttpMethod aMethod) throws IOException { - // Transform the HTML into wellformed XML. - Tidy tidy = new Tidy(); - tidy.setXHTML(true); - tidy.setQuiet(true); - tidy.setShowWarnings(false); - - // We write the jtidy output to XML since the DOM tree it produces is - // not namespace aware and namespace awareness is required by XSLT. - // An alternative is to configure namespace awareness of the XML parser - // in a system wide way. - ByteArrayOutputStream os = new ByteArrayOutputStream(); - Document w3cDoc = tidy.parseDOM(aMethod.getResponseBodyAsStream(), os); - DomUtils.removeDuplicateAttributes(w3cDoc); - LOG.debug("Content of response is \n" + os.toString()); - - ByteArrayOutputStream xhtml = new ByteArrayOutputStream(); - XMLSerializer serializer = new XMLSerializer(xhtml, new OutputFormat()); - serializer.serialize(w3cDoc); - xhtml.flush(); - - return xhtml.toByteArray(); - } - - /** - * Sleeps for a random time but no more than the maximum delay. - * - */ - private void delay() { - try { - Thread.sleep((long) ((float) _maxDelay * Math.random())); - } catch (InterruptedException e) { - return; // to satisfy checkstyle - } - } - - /** - * Executes the request and follows redirects if needed. - * - * @param aClient - * HTTP client to use. - * @param aMethod - * Method to use. - * @return Final HTTP method used (differs from the parameter passed in in - * case of redirection). - * @throws IOException - * In case of network problems. - */ - private HttpMethod executeWithRedirects(HttpClient aClient, - HttpMethod aMethod) throws IOException { - delay(); - int statusCode = aClient.executeMethod(aMethod); - - switch (statusCode) { - case HttpStatus.SC_OK: { - return aMethod; - } - case HttpStatus.SC_MOVED_PERMANENTLY: - case HttpStatus.SC_MOVED_TEMPORARILY: - case HttpStatus.SC_SEE_OTHER: { - aMethod.releaseConnection(); - Header header = aMethod.getResponseHeader(REDIRECT_HEADER); - aMethod = new GetMethod(header.getValue()); - return executeWithRedirects(aClient, aMethod); // TODO protect - // against infinite - // recursion. - } - default: { - throw new IOException("Method failed: " - + aMethod.getStatusLine()); - } - } - } -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/Action.java b/crawler/basic/src/main/java/org/wamblee/crawler/Action.java deleted file mode 100644 index 1125a0ee..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/Action.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler; - -import org.dom4j.Element; - -/** - * An action defined on a page. - * - * @author Erik Brakkee - */ -public interface Action { - - /** - * The name of the action. - * @return Action name. - */ - String getName(); - - /** - * Executes the action. - * @return New page as a result of the action. - * @throws PageException In case of an error obtaining the page. - */ - Page execute() throws PageException; - - /** - * Gets a description of the action. THe element returned is the action element - * itself. - * @return Content as XML. - */ - Element getContent(); -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/Configuration.java b/crawler/basic/src/main/java/org/wamblee/crawler/Configuration.java deleted file mode 100644 index 0e7da0b2..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/Configuration.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler; - -/** - * Configuration which determines how a specific page must be retrieved and - * what transformation should be applied to it. - * - * @author Erik Brakkee - */ -public interface Configuration { - - /** - * Gets the page request based on the URL. - * @param aUrl Url of the page to retrieve. - * @return Page request. - */ - PageRequest getRequest(String aUrl); - - /** - * Gets the page request based on the type of the page instead - * of on the URL. - * @param aType Type of page. - * @return Page request. - */ - PageRequest getRequest(PageType aType); -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/Crawler.java b/crawler/basic/src/main/java/org/wamblee/crawler/Crawler.java deleted file mode 100644 index a5f2c1d3..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/Crawler.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler; - -import org.apache.commons.httpclient.NameValuePair; - - -/** - * The object that actually obtains pages based on URL. - * - * @author Erik Brakkee - */ -public interface Crawler { - - /** - * Gets the content for a specific page. - * @param aUrl Url of page. - * @param aParameters Paremeters to supply. - * @return Page to retrieve. - * @throws PageException In case of problems retrieving the page. - */ - Page getPage(String aUrl, NameValuePair[] aParameters) throws PageException; - - /** - * Gets the content for a specific page. - * @param aUrl Url of page. - * @param aParameters Parameters to supply. - * @param aType Type of page. - * @return Page. - * @throws PageException In case of problems retrieving the page. - */ - Page getPage(String aUrl, NameValuePair[] aParameters, PageType aType) throws PageException; -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/GetPageRequest.java b/crawler/basic/src/main/java/org/wamblee/crawler/GetPageRequest.java deleted file mode 100644 index 16eb94c7..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/GetPageRequest.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler; - -import java.io.IOException; - -import javax.xml.transform.TransformerException; - -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpMethod; -import org.apache.commons.httpclient.NameValuePair; -import org.apache.commons.httpclient.methods.GetMethod; -import org.w3c.dom.Document; -import org.wamblee.xml.XslTransformer; - -/** - * Gets a page by issueing a get request. - * - * @author Erik Brakkee - */ -public class GetPageRequest extends AbstractPageRequest { - - /** - * Constructs the request. - * @param aMaxTries Maximum number of retries. - * @param aMaxDelay Maximum delay before executing the request. - * @param aParams Request parameters to use. - * @param aHeaders Request headers to use. - * @param aXslt XSLT to use. - */ - public GetPageRequest(int aMaxTries, int aMaxDelay, NameValuePair[] aParams, - NameValuePair[] aHeaders, String aXslt, XslTransformer aTransformer) { - super(aMaxTries, aMaxDelay, aParams, aHeaders, aXslt, aTransformer); - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.PageRequest#getPage(org.apache.commons.httpclient.HttpClient) - */ - public Document execute(String aUrl, NameValuePair[] aParams, HttpClient aClient) - throws PageException { - HttpMethod method = new GetMethod(aUrl); - NameValuePair[] params = getParameters(aParams); - if (params.length > 0) { - String oldQueryString = method.getQueryString(); - method.setQueryString(params); - String queryString = method.getQueryString(); - if (oldQueryString.length() > 0) { - queryString = queryString + '&' + oldQueryString; - method.setQueryString(queryString); - } - } - try { - return executeMethod(aClient, method); - } catch (TransformerException e) { - throw new PageException("Transformation problem for url " + aUrl, e); - } catch (IOException e) { - throw new PageException("Problem getting " + aUrl, e); - } - } - -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/Page.java b/crawler/basic/src/main/java/org/wamblee/crawler/Page.java deleted file mode 100644 index 09ab7bc2..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/Page.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler; - -import org.dom4j.Element; - -/** - * Represents a retrieved page. - * - * @author Erik Brakkee - */ -public interface Page { - - /** - * Gets the content of the page as raw XML. - * @return Page content. - */ - Element getContent(); - - /** - * Obtains the links available on the page. - * @return Link names. - */ - Action[] getActions(); - - /** - * Gets the named action. Only works if the action name is unique. - * @param aName Name of the action. - * @return Action object. - */ - Action getAction(String aName); -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/PageException.java b/crawler/basic/src/main/java/org/wamblee/crawler/PageException.java deleted file mode 100644 index 35273e7e..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/PageException.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler; - -/** - * Exception thrown when there is a problem in retrieving or transforming the - * page. - * - * @author Erik Brakkee - */ -public class PageException extends Exception { - - /** - * Constructs the exception. - * @param aMsg Message. - */ - public PageException(String aMsg) { - super(aMsg); - } - - /** - * Constructs the exception. - * @param aMsg Message. - * @param aCause Cause of the exception. - */ - public PageException(String aMsg, Throwable aCause) { - super(aMsg, aCause); - } -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/PageRequest.java b/crawler/basic/src/main/java/org/wamblee/crawler/PageRequest.java deleted file mode 100644 index 2a4e05fd..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/PageRequest.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler; - -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.NameValuePair; -import org.w3c.dom.Document; - -/** - * Represents a specific request to obtain and transform a page. - * - * @author Erik Brakkee - */ -public interface PageRequest { - - /** - * Gets a page as an XML document. - * @param aUrl Url of the page. - * @param aParams Additional parameters to supply. - * @param aClient Http client to use. - * @return Client. - * @throws PageException In case of problems retrieving the page. - */ - Document execute(String aUrl, NameValuePair[] aParams, HttpClient aClient) throws PageException; - - /** - * Overrides the Xslt to use. This is used when the transformed page specifies - * the page type explicitly for an action. - * @param aXslt Xslt to use. - */ - void overrideXslt(String aXslt); -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/PageType.java b/crawler/basic/src/main/java/org/wamblee/crawler/PageType.java deleted file mode 100644 index 4fc7f0d8..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/PageType.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler; - -/** - * Represents the type of a page determining how the HTML should be transformed into - * XML. - * - * @author Erik Brakkee - */ -public class PageType { - - /** - * Type string. - */ - private String _type; - - /** - * Constructs the type. - * @param aType Type. - */ - public PageType(String aType) { - _type = aType; - } - - /** - * Gets the type. - * @return Type. - */ - public String getType() { - return _type; - } - - /* (non-Javadoc) - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - return "PageType(type='" + _type + "')"; - } - - /* (non-Javadoc) - * @see java.lang.Object#equals(java.lang.Object) - */ - @Override - public boolean equals(Object obj) { - if (!(obj instanceof PageType)) { - return false; - } - return toString().equals(obj.toString()); - } -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/PostPageRequest.java b/crawler/basic/src/main/java/org/wamblee/crawler/PostPageRequest.java deleted file mode 100644 index 62845403..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/PostPageRequest.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler; - -import java.io.IOException; - -import javax.xml.transform.TransformerException; - -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.NameValuePair; -import org.apache.commons.httpclient.methods.PostMethod; -import org.w3c.dom.Document; -import org.wamblee.xml.XslTransformer; - -/** - * Retrieving pages using the post method. - * - * @author Erik Brakkee - */ -public class PostPageRequest extends AbstractPageRequest { - - /** - * Constructs the request. - * @param aMaxTries Maximum number of retries. - * @param aMaxDelay Maximum delay before executing the request. - * @param aParams Request parameters to use. - * @param aHeaders Request headers to use. - * @param aXslt XSLT to use. - */ - public PostPageRequest(int aMaxTries, int aMaxDelay, - NameValuePair[] aParams, - NameValuePair[] aHeaders, - String aXslt, XslTransformer aTransformer) { - super(aMaxTries, aMaxDelay, aParams, aHeaders, aXslt, aTransformer); - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.PageRequest#execute(java.lang.String, - * org.apache.commons.httpclient.HttpClient) - */ - public Document execute(String aUrl, NameValuePair[] aParams, HttpClient aClient) - throws PageException { - PostMethod method = new PostMethod(aUrl); - method.addParameters(getParameters(aParams)); - try { - return executeMethod(aClient, method); - } catch (TransformerException e) { - throw new PageException("Transformation problem for url " + aUrl, e); - } catch (IOException e) { - throw new PageException("Problem getting page " + aUrl, e); - } - } - -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/impl/ActionImpl.java b/crawler/basic/src/main/java/org/wamblee/crawler/impl/ActionImpl.java deleted file mode 100644 index 34972c59..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/impl/ActionImpl.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.impl; - -import org.apache.commons.httpclient.NameValuePair; -import org.dom4j.Element; -import org.wamblee.crawler.Action; -import org.wamblee.crawler.Crawler; -import org.wamblee.crawler.Page; -import org.wamblee.crawler.PageException; -import org.wamblee.crawler.PageType; - -/** - * Action implementation. - * - * @author Erik Brakkee - */ -public class ActionImpl implements Action { - - private Crawler _crawler; - - private Element _content; - - private String _name; - - private String _reference; - - private PageType _type; - - private NameValuePair[] _parameters; - - /** - * Constructs the action. - * - * @param aCrawler - * Crawler to use. - * @param aContent - * Content of the action element in the page where the action - * occurs. - * @param aName - * Name of the action. - * @param aReference - * URL of the reference. - * @param aParameters Parameters to use for the action. - */ - public ActionImpl(Crawler aCrawler, Element aContent, String aName, - String aReference, NameValuePair[] aParameters) { - _crawler = aCrawler; - _content = aContent; - _name = aName; - _reference = aReference; - _type = null; - _parameters = aParameters; - } - - /** - * Constructs the action. - * - * @param aCrawler - * Crawler to use. - * @param aContent - * Content of the action element in the page where the action - * occurs. - * @param aName - * Name of the action. - * @param aReference - * URL of the reference. - * @param aType - * Type of the referenced page. - * @param aParameters Parameters to use. - */ - public ActionImpl(Crawler aCrawler, Element aContent, String aName, - String aReference, PageType aType, NameValuePair[] aParameters) { - _crawler = aCrawler; - _content = aContent; - _name = aName; - _reference = aReference; - _type = aType; - _parameters = aParameters; - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.Action#getName() - */ - public String getName() { - return _name; - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.Action#execute() - */ - public Page execute() throws PageException { - if (_type == null) { - return _crawler.getPage(_reference, _parameters); - } - return _crawler.getPage(_reference, _parameters, _type); - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.Action#getContent() - */ - public Element getContent() { - return _content; - } - - /* (non-Javadoc) - * @see java.lang.Object#equals(java.lang.Object) - */ - @Override - public boolean equals(Object obj) { - if ( !(obj instanceof ActionImpl )) { - return false; - } - ActionImpl action = (ActionImpl)obj; - return _reference.equals(action._reference) && - _type.equals(action._type); - } -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/impl/App.java b/crawler/basic/src/main/java/org/wamblee/crawler/impl/App.java deleted file mode 100644 index da9056ce..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/impl/App.java +++ /dev/null @@ -1,118 +0,0 @@ -package org.wamblee.crawler.impl; - -import java.io.File; -import java.io.FileInputStream; -import java.io.InputStream; - -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.NameValuePair; -import org.dom4j.Element; -import org.wamblee.crawler.Action; -import org.wamblee.crawler.Configuration; -import org.wamblee.crawler.Crawler; -import org.wamblee.crawler.Page; -import org.wamblee.crawler.PageException; -import org.wamblee.xml.XslTransformer; - -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Test application which uses the crawler. - * - * @author Erik Brakkee - */ -public final class App { - - /** - * Disabled constructor. - * - */ - private App() { - // Empty - } - - /** - * Runs a test program. - * - * @param aArgs - * Arguments. First argument is the crawler config file name and - * second argument is the start url. - * @throws Exception - * In case of problems. - */ - public static void main(String[] aArgs) throws Exception { - String configFileName = aArgs[0]; - String starturl = aArgs[1]; - - ConfigurationParser parser = new ConfigurationParser(new XslTransformer()); - InputStream configFile = new FileInputStream(new File(configFileName)); - Configuration config = parser.parse(configFile); - - HttpClient client = new HttpClient(); - // client.getHostConfiguration().setProxy("localhost", 3128); - - Crawler crawler = new CrawlerImpl(client, config); - - System.out.println("Retrieving: " + starturl); - Page page = crawler.getPage(starturl, new NameValuePair[0]); - showPage(page); - page = page.getAction("channels-favorites").execute(); - recordInterestingShows(page); - showPage(page); - page = page.getAction("Nederland 1").execute(); - showPage(page); - page = page.getAction("right-now").execute(); - showPage(page); - page = page.getAction("Het elfde uur").execute(); - showPage(page); - } - - /** - * @param starturl - * @param crawler - */ - private static void showPage(Page aPage) { - Action[] links = aPage.getActions(); - for (Action link : links) { - System.out.println("Link found '" + link.getName() + "'"); - } - Element element = aPage.getContent(); - System.out.println("Retrieved content: " + element.asXML()); - } - - private static void recordInterestingShows(Page page) throws PageException { - Action[] channels = page.getActions(); - for (Action channel : channels) { - examineChannel(channel.getName(), channel.execute().getAction( - "right-now").execute()); - } - } - - private static void examineChannel(String aChannel, Page aPage) - throws PageException { - Action[] programs = aPage.getActions(); - for (Action program : programs) { - System.out.println(aChannel + " - " + program.getName()); - if (program.getName().toLowerCase().matches(".*babe.*")) { - Page programPage = program.execute(); - Action record = programPage.getAction("record"); - System.out.println("Recording possible: " + record != null); - } - } - } - -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/impl/ConfigItem.java b/crawler/basic/src/main/java/org/wamblee/crawler/impl/ConfigItem.java deleted file mode 100644 index e42a16c4..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/impl/ConfigItem.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.impl; - -import java.util.regex.Pattern; - -/** - * Configuration item for obtaining an object in case a pattern matches. - * - * @author Erik Brakkee - */ -class ConfigItem { - - private Pattern _pattern; - - private ValueType _value; - - /** - * Constructs the item. - * @param aPattern Pattern. - * @param aValue Value. - */ - protected ConfigItem(String aPattern, ValueType aValue) { - _pattern = Pattern.compile(aPattern); - _value = aValue; - } - - /** - * Returns the object in case the value matches. - * @param aValue Value to match. - * @return Object in case there is a match, null otherwise. - */ - protected ValueType match(String aValue) { - if (!_pattern.matcher(aValue).matches()) { - return null; - } - return _value; - } - -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/impl/ConfigurationImpl.java b/crawler/basic/src/main/java/org/wamblee/crawler/impl/ConfigurationImpl.java deleted file mode 100644 index ca329334..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/impl/ConfigurationImpl.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.impl; - -import java.util.List; - -import org.wamblee.crawler.Configuration; -import org.wamblee.crawler.PageRequest; -import org.wamblee.crawler.PageType; - -/** - * Implementation of the configuration for the crawler. - * - * @author Erik Brakkee - */ -public class ConfigurationImpl implements Configuration { - - private List _urlConfig; - - private List _pageTypeConfig; - - /** - * Constructs the configuration. - * @param aUrlConfig List of URL configuration elements. - * @param aPageTypeConfig List of page type configuration elements. - */ - public ConfigurationImpl(List aUrlConfig, - List aPageTypeConfig) { - _urlConfig = aUrlConfig; - _pageTypeConfig = aPageTypeConfig; - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.Configuration#getRequest(java.lang.String) - */ - public PageRequest getRequest(String aUrl) { - - for (UrlConfig config : _urlConfig) { - PageRequest request = config.getRequest(aUrl); - if (request != null) { - return request; - } - } - throw new RuntimeException("No configuration matched the URL '" + aUrl - + "'"); - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.Configuration#getRequest(org.wamblee.crawler.PageType) - */ - public PageRequest getRequest(PageType aType) { - for (PageTypeConfig config : _pageTypeConfig) { - PageRequest request = config.getRequest(aType.getType()); - if (request != null) { - return request; - } - } - throw new RuntimeException("No configuration matched type '" + aType - + "'"); - } -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/impl/ConfigurationParser.java b/crawler/basic/src/main/java/org/wamblee/crawler/impl/ConfigurationParser.java deleted file mode 100644 index ab4d7539..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/impl/ConfigurationParser.java +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.impl; - -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.commons.httpclient.NameValuePair; -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.Element; -import org.dom4j.io.SAXReader; -import org.wamblee.crawler.Configuration; -import org.wamblee.crawler.GetPageRequest; -import org.wamblee.crawler.PageRequest; -import org.wamblee.crawler.PostPageRequest; -import org.wamblee.xml.XslTransformer; - -/** - * Parsing of the configuration from an XML file. - * - * @author Erik Brakkee - */ -public class ConfigurationParser { - - private static final String ELEM_URL = "url"; - - private static final String ELEM_TYPE = "type"; - - private static final String ELEM_PATTERN = "pattern"; - - private static final String ELEM_METHOD = "method"; - - private static final String ELEM_XSLT = "xslt"; - - private static final String ELEM_PARAM = "param"; - - private static final String ELEM_HEADER = "header"; - - private static final String AT_NAME = "name"; - - private static final String AT_VALUE = "value"; - - private static final String METHOD_POST = "post"; - - private static final String METHOD_GET = "get"; - - private static final int MAX_TRIES = 3; - - private static final int MAX_DELAY = 10000; - - private XslTransformer _transformer; - - /** - * Constructs the configuration parser. - */ - public ConfigurationParser(XslTransformer aTransformer) { - _transformer = aTransformer; - } - - /** - * Parses the configuration from an input stream. - * @param aStream Input file. - * @return Configuration. - */ - public Configuration parse(InputStream aStream) { - try { - SAXReader reader = new SAXReader(); - Document document = reader.read(aStream); - - Element root = document.getRootElement(); - List urlConfigs = parseUrlConfigs(root); - List pageTypeConfigs = parsePageTypeConfigs(root); - return new ConfigurationImpl(urlConfigs, pageTypeConfigs); - } catch (DocumentException e) { - throw new RuntimeException("Problem parsing config file", e); - } - } - - /** - * Parses the URL-based configuration. - * @param aRoot Root of the configuration file document. - * @return List of URL-based configurations. - */ - private List parseUrlConfigs(Element aRoot) { - List configs = new ArrayList(); - for (Iterator i = aRoot.elementIterator(ELEM_URL); i.hasNext();) { - Element url = (Element) i.next(); - UrlConfig config = parseUrlConfig(url); - configs.add(config); - } - return configs; - } - - /** - * Parses the page type based configurations. - * @param aRoot Root of the configuration file document. - * @return LIst of page type based configurations. - */ - private List parsePageTypeConfigs(Element aRoot) { - List configs = new ArrayList(); - for (Iterator i = aRoot.elementIterator(ELEM_TYPE); i.hasNext();) { - Element url = (Element) i.next(); - PageTypeConfig config = parsePageTypeConfig(url); - configs.add(config); - } - return configs; - } - - /** - * Parses a URL-based configuration. - * @param aUrlElem Configuration element. - * @return Configuration. - */ - private UrlConfig parseUrlConfig(Element aUrlElem) { - String pattern = aUrlElem.elementText(ELEM_PATTERN); - PageRequest request = parseRequestConfig(aUrlElem); - return new UrlConfig(pattern, request); - } - - /** - * Parses a page type based configuration. - * @param aTypeElem Configuration element. - * @return Configuration. - */ - private PageTypeConfig parsePageTypeConfig(Element aTypeElem) { - String pattern = aTypeElem.elementText(ELEM_PATTERN); - PageRequest request = parseRequestConfig(aTypeElem); - return new PageTypeConfig(pattern, request); - } - - /** - * Parses a request configuration describing how to execute requests. - * @param aElem Configuration element. - * @return Page request. - */ - private PageRequest parseRequestConfig(Element aElem) { - String method = aElem.elementText(ELEM_METHOD); - String xslt = aElem.elementText(ELEM_XSLT); - List params = parseNameValuePairs(aElem, ELEM_PARAM); - List headers = parseNameValuePairs(aElem, ELEM_HEADER); - - NameValuePair[] paramsArray = params.toArray(new NameValuePair[0]); - NameValuePair[] headersArray = headers.toArray(new NameValuePair[0]); - PageRequest request; - if (METHOD_POST.equals(method)) { - request = new PostPageRequest(MAX_TRIES, MAX_DELAY, paramsArray, headersArray, - xslt, _transformer); - } else if (METHOD_GET.equals(method) || method == null) { - request = new GetPageRequest(MAX_TRIES, MAX_DELAY, paramsArray, headersArray, - xslt, _transformer); - } else { - throw new RuntimeException("Unknown request method '" + method - + "'. Only " + METHOD_GET + " and " + METHOD_POST - + " are supported"); - } - return request; - } - - /** - * @param aElem - * @return - */ - private List parseNameValuePairs(Element aElem, String aElemName) { - List headers = new ArrayList(); - for (Iterator i = aElem.elementIterator(aElemName); i.hasNext();) { - Element paramElem = (Element) i.next(); - NameValuePair header = parseParameter(paramElem); - headers.add(header); - } - return headers; - } - - /** - * Parses a parameter definition. - * @param aParam Parameter. - * @return Name value pair describing a parameter. - */ - private NameValuePair parseParameter(Element aParam) { - String name = aParam.attributeValue(AT_NAME); - String value = aParam.attributeValue(AT_VALUE); - return new NameValuePair(name, value); - } -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/impl/CrawlerImpl.java b/crawler/basic/src/main/java/org/wamblee/crawler/impl/CrawlerImpl.java deleted file mode 100644 index fd02bfb4..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/impl/CrawlerImpl.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.impl; - -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.NameValuePair; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.dom4j.Element; -import org.dom4j.io.DOMReader; -import org.w3c.dom.Document; -import org.wamblee.crawler.Configuration; -import org.wamblee.crawler.Crawler; -import org.wamblee.crawler.Page; -import org.wamblee.crawler.PageException; -import org.wamblee.crawler.PageRequest; -import org.wamblee.crawler.PageType; - -/** - * Crawler implementation. - * - * @author Erik Brakkee - */ -public class CrawlerImpl implements Crawler { - - private static final Log LOG = LogFactory.getLog(CrawlerImpl.class); - - private HttpClient _client; - - private Configuration _config; - - /** - * Constructs the crawler. - * - * @param aClient - * Http client to use. - * @param aConfig - * Configuration. - */ - public CrawlerImpl(HttpClient aClient, Configuration aConfig) { - _client = aClient; - _config = aConfig; - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.Crawler#getPage(java.lang.String) - */ - public Page getPage(String aUrl, NameValuePair[] aParams) throws PageException { - LOG.debug("Getting page: url = '" + aUrl + "'"); - PageRequest request = _config.getRequest(aUrl); - Document content = request.execute(aUrl, aParams, _client); - return transformToDom4jDoc(aUrl, content); - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.Crawler#getPage(java.lang.String, - * java.lang.String) - */ - public Page getPage(String aUrl, NameValuePair[] aParams, PageType aType) throws PageException { - LOG.debug("Getting page: url = '" + aUrl + "', type = '" + aType + "'"); - PageRequest request = _config.getRequest(aType); - Document content = request.execute(aUrl, aParams, _client); - return transformToDom4jDoc(aUrl, content); - } - - /** - * Converts a w3c DOM document to a page object. - * @param content DOM document. - * @return - */ - private Page transformToDom4jDoc(String aUrl, Document content) { - DOMReader reader = new DOMReader(); - org.dom4j.Document dom4jDoc = reader.read(content); - Element root = dom4jDoc.getRootElement(); - dom4jDoc.remove(root); - - return new PageImpl(aUrl, this, replaceReferencesWithContent(root)); - } - - /** - * Perform crawling. Find references in the retrieved content and replace - * them by the content they refer to by retrieving the appropriate pages as - * well. - * - * @param content - * Content which must be made complete. - * @return Fully processed content. - */ - private Element replaceReferencesWithContent(Element content) { - return content; // TODO implement. - } -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/impl/PageImpl.java b/crawler/basic/src/main/java/org/wamblee/crawler/impl/PageImpl.java deleted file mode 100644 index e27c1c65..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/impl/PageImpl.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.impl; - -import java.net.MalformedURLException; -import java.net.URL; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.httpclient.NameValuePair; -import org.dom4j.DocumentHelper; -import org.dom4j.Element; -import org.dom4j.XPath; -import org.wamblee.crawler.Action; -import org.wamblee.crawler.Crawler; -import org.wamblee.crawler.Page; -import org.wamblee.crawler.PageType; - -/** - * Page implementation. - * - * @author Erik Brakkee - */ -public class PageImpl implements Page { - - private static final String ELEM_NAME = "action"; - - private static final String ATT_NAME = "name"; - - private static final String ATT_HREF = "reference"; - - private static final String ATT_TYPE = "type"; - - private static final String ELEM_PARAM = "param"; - - private static final String ATT_VALUE = "value"; - - private String _href; - - private Crawler _crawler; - - private Element _content; - - private Action[] _actions; - - /** - * Constructs a page. - * - * @param aContent - */ - public PageImpl(String aHref, Crawler aCrawler, Element aContent) { - _href = aHref; - _crawler = aCrawler; - _content = aContent; - _actions = computeActions(); - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.Page#getLinkNames() - */ - private Action[] computeActions() { - XPath xpath = DocumentHelper.createXPath(ELEM_NAME); - List results = (List) xpath.selectNodes(_content); - List names = new ArrayList(); - for (Element elem : results) { - String name = elem.attributeValue(ATT_NAME); - String href = elem.attributeValue(ATT_HREF); - String type = elem.attributeValue(ATT_TYPE); - NameValuePair[] params = getMandatoryParameters(elem); - href = absolutizeHref(_href, href); - if (type == null) { - names.add(new ActionImpl(_crawler, elem, name, href, params)); - } else { - names.add(new ActionImpl(_crawler, elem, name, href, - new PageType(type), params)); - } - } - return names.toArray(new Action[0]); - } - - /** - * Absolutize the hyperlink - * @param aPageHref Absolute page reference. - * @param aLinkHref Possibly relative link reference. - * @return Absolute hyperlink. - */ - private String absolutizeHref(String aPageHref, String aLinkHref) { - - try { - URL pageUrl = new URL(aPageHref); - URL newUrl = new URL(pageUrl, aLinkHref); - return newUrl.toString(); // TODO need to use URL instead of String throughout the code. - } catch (MalformedURLException e) { - throw new RuntimeException("Malformed URL", e); - } - } - - private NameValuePair[] getMandatoryParameters(Element aAction) { - List result = new ArrayList(); - for (Element param: (List)aAction.elements(ELEM_PARAM)) { - String name = param.attributeValue(ATT_NAME); - String value = param.attributeValue(ATT_VALUE); - result.add(new NameValuePair(name, value)); - } - return result.toArray(new NameValuePair[0]); - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.Page#getContent() - */ - public Element getContent() { - return _content; - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.Page#getActions() - */ - public Action[] getActions() { - return _actions; - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.Page#getAction(java.lang.String) - */ - public Action getAction(String aName) { - List results = new ArrayList(); - for (Action action : _actions) { - if (action.getName().equals(aName)) { - results.add(action); - } - } - if (results.size() == 0) { - return null; - } - if (results.size() > 1) { - throw new RuntimeException("Duplicate action '" + aName + "'"); - } - return results.get(0); - } -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/impl/PageTypeConfig.java b/crawler/basic/src/main/java/org/wamblee/crawler/impl/PageTypeConfig.java deleted file mode 100644 index c9438515..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/impl/PageTypeConfig.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.impl; - -import org.wamblee.crawler.PageRequest; - -/** - * Page type configuration. - * - * @author Erik Brakkee - */ -public class PageTypeConfig extends ConfigItem { - - /** - * Constructs the configuration. - * @param aPattern Page type pattern. - * @param aRequest Page request. - */ - public PageTypeConfig(String aPattern, PageRequest aRequest) { - super(aPattern, aRequest); - } - - /** - * Returns the request in case the type matches. - * @param aType Page type. - * @return Request if the type matches, null otherwise. - */ - public PageRequest getRequest(String aType) { - return match(aType); - } -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/impl/UrlConfig.java b/crawler/basic/src/main/java/org/wamblee/crawler/impl/UrlConfig.java deleted file mode 100644 index c9d35554..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/impl/UrlConfig.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.impl; - -import org.wamblee.crawler.PageRequest; - -/** - * Represents the configuration for specific URLs. - * - * @author Erik Brakkee - */ -public class UrlConfig extends ConfigItem { - /** - * Constructs the information for how to perform a request for a specific - * URL. - * - * @param aPattern - * Pattern that the URL must match. - * @param aRequest - * Request that must be executed to retrieve the URL. - */ - public UrlConfig(String aPattern, PageRequest aRequest) { - super(aPattern, aRequest); - } - - /** - * Gets the request to execute. - * - * @return Request, or null if the URL does not match. - */ - public PageRequest getRequest(String aUrl) { - return match(aUrl); - } -} diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/impl/package.html b/crawler/basic/src/main/java/org/wamblee/crawler/impl/package.html deleted file mode 100644 index 2ba3c928..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/impl/package.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - wamblee.org - - - -This package provides the implementations of the web crawler interfaces. - - - -@since - - - - diff --git a/crawler/basic/src/main/java/org/wamblee/crawler/package.html b/crawler/basic/src/main/java/org/wamblee/crawler/package.html deleted file mode 100644 index 6423f15b..00000000 --- a/crawler/basic/src/main/java/org/wamblee/crawler/package.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - wamblee.org - - - -This package provides the basic interfaces for a web crawler. - - - -@since - - - - diff --git a/crawler/kiss/ABOUT.txt b/crawler/kiss/ABOUT.txt deleted file mode 100644 index 4f23b392..00000000 --- a/crawler/kiss/ABOUT.txt +++ /dev/null @@ -1,5 +0,0 @@ -This is a crawler for the KiSS Electronic Program Guide that can be used for instance with the KiSS DP558 hard-disc recorder. It uses the basic crawler for its implementation. - -Based on preferences for recording programs, the crawler automatically records programs that are scheduled to run on the same day. This saves a lot of manual work in recording programs. - -The final idea is to define ones own interests in television programs and have the crawler record them automatically or send notifications of possibly interesting programs. Whether programs should be recorded can be determined by several criteria such as program title, channel, time of day, and keywords in the description. diff --git a/crawler/kiss/conf/kiss/config.xml.example b/crawler/kiss/conf/kiss/config.xml.example deleted file mode 100644 index 37ae6667..00000000 --- a/crawler/kiss/conf/kiss/config.xml.example +++ /dev/null @@ -1,57 +0,0 @@ - - - - - login - post - login-graphic.xsl - - - - - -
- - - - channels-favorites - channels-favorites-graphic.xsl - - - - channel-overview - channel-overview.xsl - - - - right-now - channel-right-now-graphic.xsl - - - - tomorrow - channel-right-now-graphic.xsl - - - - program-info - program-info-mobile.xsl - - - - recorded - recorded.xsl - - - - http://epg.kml.kiss-technology.com/login.php - mainpage.xsl - - - - .* - get - identity.xsl - - - diff --git a/crawler/kiss/conf/kiss/org.wamblee.crawler.properties b/crawler/kiss/conf/kiss/org.wamblee.crawler.properties deleted file mode 100644 index f26a4329..00000000 --- a/crawler/kiss/conf/kiss/org.wamblee.crawler.properties +++ /dev/null @@ -1,17 +0,0 @@ - - -############################################################################ -# Mail server configuration -############################################################################ -org.wamblee.crawler.smtp.host=falcon -org.wamblee.crawler.smtp.port=25 -org.wamblee.crawler.smtp.username= -org.wamblee.crawler.smtp.password= - -############################################################################ -# Mail notification configuration -############################################################################ -org.wamblee.crawler.notification.from=kiss@wamblee.org -org.wamblee.crawler.notification.to=erik@bladibla.org -org.wamblee.crawler.notification.subject=Recording summary for tomorrow - diff --git a/crawler/kiss/conf/kiss/programs.xml b/crawler/kiss/conf/kiss/programs.xml deleted file mode 100644 index 538ce257..00000000 --- a/crawler/kiss/conf/kiss/programs.xml +++ /dev/null @@ -1,125 +0,0 @@ - - - - horror - notify - horror - - - - horror - notify - the.*ghost.*whisperer - - - - films - notify - film - horror|actie|thriller - - - - wetenschap - notify - wetenschap - - - - science fiction - notify - sf-|(sci-fi)|(science fiction) - - - - invasion - - - - notify - documentaires - (zembla)|(uur.*wolf)|(andere tijden)|(de.*leugen.*regeert) - - - - 20 - star.*gate - - - - six.*feet.*under - - - - 11 - battlestar - - - - 10 - star trek - - - - 9 - ((dr)|(doct.*)).*who - - - - 8 - little britain - - - - 9 - the.*x.*files - - - - 9 - buffy.*vampire.*slayer - - - - notify - de.*wereld.*draait.*door - - - - 8 - jag - - - - 5 - shouf shouf - - - - red dwarf - - - - top gear - - - - bedreigde.*paradijzen - - - - wie is de baas - - - - wetenschap - notify - brainiac - - - - auto - wegmisbruikers|(blik.*op.*weg) - - - diff --git a/crawler/kiss/conf/kiss/run.bat b/crawler/kiss/conf/kiss/run.bat deleted file mode 100755 index a992c423..00000000 --- a/crawler/kiss/conf/kiss/run.bat +++ /dev/null @@ -1,11 +0,0 @@ - - - -cd ../conf - - -set CP=.;../lib/wamblee-support-0.2-SNAPSHOT.jar;../lib/wamblee-crawler-kiss-0.2-SNAPSHOT.jar - - -java -classpath %CP% org.wamblee.crawler.kiss.main.KissCrawlerBootstrapper ../lib config.xml programs.xml - diff --git a/crawler/kiss/conf/kiss/run.sh b/crawler/kiss/conf/kiss/run.sh deleted file mode 100755 index e788ed42..00000000 --- a/crawler/kiss/conf/kiss/run.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/ksh - -cd $( dirname $0 )/../conf - -CP=.:../lib/wamblee-support-0.2-SNAPSHOT.jar:../lib/wamblee-crawler-kiss-0.2-SNAPSHOT.jar - -set -x -java -classpath $CP org.wamblee.crawler.kiss.main.KissCrawlerBootstrapper \ - ../lib config.xml programs.xml - diff --git a/crawler/kiss/conf/xml/channel-overview.xml b/crawler/kiss/conf/xml/channel-overview.xml deleted file mode 100644 index 528737ce..00000000 --- a/crawler/kiss/conf/xml/channel-overview.xml +++ /dev/null @@ -1,56 +0,0 @@ - - - - - - -KiSS - Nederland 1 * - - -d> -

 Nederland 1 *

- -

What's on?

- - -Right now - 12:11, Monday 13th March
- -Evening - Starting 20:00
- -Afternoon - Starting 16:00
- -Noon - Starting 12:00
- -Morning - Starting 6:00
- -Tomorrow  Wednesday  Thursday  Friday  Saturday  Sunday 
-
- [  -Back ] [ Home ] [ Logout ]
- 12:11, Monday 13th March - - - diff --git a/crawler/kiss/conf/xml/channel-right-now-graphic.xml b/crawler/kiss/conf/xml/channel-right-now-graphic.xml deleted file mode 100644 index 4e71ee99..00000000 --- a/crawler/kiss/conf/xml/channel-right-now-graphic.xml +++ /dev/null @@ -1,749 +0,0 @@ - - - - - - - - - - -KiSS - Nederland 1 - - -
-
- - - -

- -
- - - - - - -
-

 Nederland 1

- -

-
- - - - - - - -
-What's On Now? -Favorite Shows -Movies -Sports -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Wednesday 23rd Page -1/1   -Next 
-
 23:15 - -00:00 
-
- - - -Karels keuze
-
 00:00 - -00:30 
-
- - - -Nederland helpt
-
 00:30 - -00:55 
-
- - - -Man beet hond
-
 00:55 - -06:45 
-
- - - -Nacht-tv: Netwerk herhaling
-
 06:45 - -07:00 
-
- - - -Nederland in beweging
-
 07:00 - -09:00 
-
- - - -NOS-Journaal
-
 09:00 - -09:10 
-
- - - -NOS-Journaal
-
 09:10 - -09:30 
-
- - - -Nederland in beweging
-
 09:30 - -09:55 
-
- - - -That's the question
-
 09:55 - -11:00 
-
- - - -TROS Zomeravondcafé
-
 11:00 - -11:30 
-
- - - -Andere tijden
-
 11:30 - -12:00 
-
- - - -De vloer op
-
 12:00 - -12:10 
-
- - - -NOS-Journaal
-
 12:10 - -12:40 
-
- - - -Man beet hond
-
 12:40 - -13:00 
-
- - - -Lingo
-
 13:00 - -13:10 
-
- - - -NOS-Journaal
-
 13:10 - -13:15 
-
- - - -NOS-Sportjournaal
-
 13:15 - -14:05 
-
- - - -Warzone: Congo
-
 14:05 - -14:35 
-
- - - -Wroeten
-
 14:35 - -15:14 
-
- - - -Spraakmakende zaken
-
 15:14 - -15:16 
-
- - - -Wilde Ganzen
-
 15:16 - -16:00 
-
- - - -Opsporing verzocht
-
 16:00 - -16:05 
-
- - - -NOS-Journaal
-
 16:05 - -17:00 
-
- - - -Denkend aan Showroom
-
 17:00 - -17:05 
-
- - - -NOS-Journaal
-
 17:05 - -17:35 
-
- - - -God voor...
-
 17:35 - -18:30 
-
- - - -Himalaya with Michael Palin
-
 18:30 - -19:00 
-
- - - -That's the question
-
 19:00 - -19:30 
-
- - - -Man beet hond
-
 19:30 - -20:00 
-
- - - -De confrontatie
-
 20:00 - -20:30 
-
- - - -NOS-Journaal
-
 20:30 - -21:00 
-
- - - -Netwerk
-
 21:00 - -22:35 
-
- - - -KRO Detectives: Judge John Deed
-
 22:35 - -23:05 
-
- - - -Dochters + moeders
-
 23:05 - -23:32 
-
- - - -De God van Nederland
-
 23:32 - -23:40 
-
- - - -Wilde Ganzen
-
 23:40 - -00:05 
-
- - - -Man beet hond
-
 00:05 - -06:45 
-
- - - -Nacht-tv: Netwerk herhaling
- -
- - - - - - - - - - - - - - - - - - -
   Currently -showing
-
  Favorite
-
  Future showing
-
  Scheduled -recording
-
  Movie
-
 
- - - - - -

- -BackHomeLogout  22:38, Wednesday 23rd August
- - - diff --git a/crawler/kiss/conf/xml/channel-right-now-mobile-output.xml b/crawler/kiss/conf/xml/channel-right-now-mobile-output.xml deleted file mode 100644 index eca47bcd..00000000 --- a/crawler/kiss/conf/xml/channel-right-now-mobile-output.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/crawler/kiss/conf/xml/channel-right-now-mobile.xml b/crawler/kiss/conf/xml/channel-right-now-mobile.xml deleted file mode 100644 index 969c7ade..00000000 --- a/crawler/kiss/conf/xml/channel-right-now-mobile.xml +++ /dev/null @@ -1,411 +0,0 @@ - - - - - - - - -KiSS - Nederland 1 - - - - -

Nederland 1

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Monday 21st Page -1/1   -Next 
-
 22:30 - -00:15 
-
-KRO Filmtheater: Hollywood ending
-
 00:15 - -06:45 
-
-NOS-Tekst tv
-
 06:45 - -07:00 
-
-Nederland in beweging
-
 07:00 - -09:00 
-
-NOS-Journaal
-
 09:00 - -09:10 
-
-NOS-Journaal
-
 09:10 - -09:30 
-
-Nederland in beweging
-
 09:30 - -10:00 
-
-That's the question
-
 10:00 - -11:00 
-
-TROS Muziekfeest in de ArenA 2004
-
 11:00 - -11:25 
-
-Hello Goodbye
-
 11:25 - -12:00 
-
-Wat Zou JIJ Doen?
-
 12:00 - -12:10 
-
-NOS-Journaal
-
 12:10 - -12:40 
-
-Taxi
-
 12:40 - -13:00 
-
-Lingo
-
 13:00 - -13:10 
-
-NOS-Journaal
-
 13:10 - -13:20 
-
-NOS-Sportjournaal
-
 13:20 - -14:05 
-
-Warzone: Zuid-Afrika
-
 14:05 - -15:20 
-
-Tien maal Mozart
-
 15:20 - -16:00 
-
-Kruispunt
-
 16:00 - -16:05 
-
-NOS-Journaal
-
 16:05 - -17:00 
-
-Oppassen & wegwezen
-
 17:00 - -17:05 
-
-NOS-Journaal
-
 17:05 - -17:30 
-
-Schepper & co
-
 17:30 - -18:30 
-
-Himalaya with Michael Palin
-
 18:30 - -18:55 
-
-That's the question
-
 18:55 - -19:30 
-
-Man beet hond
-
 19:30 - -20:00 
-
-Ingang Oost
-
 20:00 - -20:30 
-
-NOS-Journaal
-
 20:30 - -21:00 
-
-Netwerk
-
 21:00 - -22:00 
-
-Memories Tour d'Amour
-
 22:00 - -22:30 
-
-Hello Goodbye
-
 22:30 - -23:00 
-
-Villa historica
-
 23:00 - -23:45 
-
  -NCRV Dokument: Het recht om te sterven
-
 23:45 - -00:40 
-
  -Apocalyps Vietnam
-
 00:40 - -01:10 
-
  -Man beet hond
-
 01:10 - -06:45 
-
  -Nacht-tv: Netwerk herhaling
 Monday 21st Page -1/1   -Next 
- - - - - -

-
- -Back Home Logout 23:13, Monday 21st August
- - diff --git a/crawler/kiss/conf/xml/channel-right-now-output.xml b/crawler/kiss/conf/xml/channel-right-now-output.xml deleted file mode 100644 index 6597e6ad..00000000 --- a/crawler/kiss/conf/xml/channel-right-now-output.xml +++ /dev/null @@ -1,112 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -BackHomeLogout diff --git a/crawler/kiss/conf/xml/channel-right-now.xml b/crawler/kiss/conf/xml/channel-right-now.xml deleted file mode 100644 index e6fd80d3..00000000 --- a/crawler/kiss/conf/xml/channel-right-now.xml +++ /dev/null @@ -1,166 +0,0 @@ - - - - - -KiSS - Nederland 1 - - - -

Nederland 1

- -Monday 13th
-Page 1/1   ->> 
-
- 23:55 - 00:40 - -Wintertijd
-00:50 - 06:15 - -Nacht-tv: Netwerk herhalingen
-06:45 - 06:59 - -Nederland in beweging
-06:59 - 09:00 - -Goedemorgen Nederland
- 07:00 - 07:10 - -NOS-Journaal
-07:10 - 07:30 - -Goedemorgen Nederland
-07:30 - 07:40 - -NOS-Journaal
-07:40 - 08:00 - -Goedemorgen Nederland
-08:00 - 08:10 - -NOS-Journaal
-08:10 - 08:30 - -Goedemorgen Nederland
- 08:30 - 08:40 - -NOS-Journaal
-08:40 - 09:00 - -Goedemorgen Nederland
-09:00 - 09:10 - -NOS-Journaal
-09:10 - 09:30 - -Nederland in beweging
-09:30 - 09:55 - -That's the question
-09:55 - 10:50 - -Schoondochter gezocht
- 10:50 - 11:15 - -Blauw bloed
-11:15 - 12:00 - -Appeltje voor de dorst
-12:00 - 12:10 - -NOS-Journaal
-12:10 - 12:35 - -Man bijt hond
-12:35 - 12:57 - -Voor alle fans: Drukwerk
-12:57 - 13:00 - -Trekking Lingo
- 13:00 - 13:10 - -NOS-Journaal
-13:10 - 13:20 - -NOS-Sportjournaal
-13:20 - 14:15 - -Buitenhof
-14:15 - 14:55 - -Hoge bomen in de misdaad
-14:55 - 15:20 - -AVRO Dierenpark
-15:20 - 16:00 - -Kruispunt
- 16:00 - 16:05 - -NOS-Journaal
-16:05 - 16:30 - -Helden van nu: Vrijwilligers in de gezondheidszorg
-16:30 - 17:00 - -Leven met verlies
-17:00 - 17:10 - -NOS-Journaal
-17:10 - 17:35 - -Schepper & co
- 17:35 - 18:30 - -MAX & Catherine
-18:30 - 18:55 - -That's the question
-18:55 - 19:25 - -Man bijt hond
-19:25 - 20:00 - -Ingang Oost
-20:00 - 20:30 - -NOS-Journaal
- 20:30 - 21:05 - -Netwerk
-21:05 - 22:05 - -Memories
-22:05 - 22:55 - -Keyzer & De Boer Advocaten
-22:55 - 23:50 - -NCRV Dokument: Een familie van vaders
-23:50 - 00:20 - -Blauw bloed
- 00:20 - 00:50 - -Man bijt hond
-00:50 - 06:45 - -Nacht-tv: Netwerk herhaling
-
-
-
- [  -Back ] [ Home ] [ Logout ]
- 12:04, Monday 13th March - - - diff --git a/crawler/kiss/conf/xml/channels-favorites-graphic-output.xml b/crawler/kiss/conf/xml/channels-favorites-graphic-output.xml deleted file mode 100644 index 4c26a2b7..00000000 --- a/crawler/kiss/conf/xml/channels-favorites-graphic-output.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/conf/xml/channels-favorites-graphic.xml b/crawler/kiss/conf/xml/channels-favorites-graphic.xml deleted file mode 100644 index c07fa370..00000000 --- a/crawler/kiss/conf/xml/channels-favorites-graphic.xml +++ /dev/null @@ -1,416 +0,0 @@ - - - - - - - - KiSS - Favorite Channels - - -

- - - - -

- -
- - - - - - -
-

Favorite Channels

- -

-

- - - - - - - -
- - - What's On Now? - Favorite Shows - Movies - Sports - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-   - Prev -   Page 1/2   - Next -  
-
-
 NL1 - - Nederland 1 - - - -   - -   - -
 NL2 - - Nederland 2 - - - -   - -   - -
 NL3 - - Nederland 3 - - - -   - -   - -
 NET5 - - Net5 - - - -   - -   - -
 RTL4 - - RTL4 - - - -   - -   - -
 SBS6 - - SBS6 - - - -   - -   - -
 RTL5 - - RTL5 - - - -   - -   - -
 DISC - - Discovery Channel - - - -   - -   - -
 RTL7 - - RTL7 - - - -   - -   - -
 VERO - - Veronica - - - -   - -   - -
 MTV - - MTV - - - -   - -   - -
 BOX - - TheBox - - - -   - -   - -
 ESPO - - Eurosport - - - -   - -   - -
 CNN - - CNN - - - -   - -   - -
 BBC1 - - BBC1 - - - -   - -   - -
- - - - - -

-
- - Back - - Home - - Logout -   17:15, Wednesday 23rd August
- - diff --git a/crawler/kiss/conf/xml/channels-favorites-output.xml b/crawler/kiss/conf/xml/channels-favorites-output.xml deleted file mode 100644 index cc8195d3..00000000 --- a/crawler/kiss/conf/xml/channels-favorites-output.xml +++ /dev/null @@ -1,34 +0,0 @@ - - << >> << >>BackHomeLogout diff --git a/crawler/kiss/conf/xml/channels-favorites.xml b/crawler/kiss/conf/xml/channels-favorites.xml deleted file mode 100644 index 4021c21f..00000000 --- a/crawler/kiss/conf/xml/channels-favorites.xml +++ /dev/null @@ -1,83 +0,0 @@ - - - - - -KiSS - Favorite Channels - - - -

Favorite Channels

- -  -<<  Page 1/1   ->> 
-
- NL1 - -Nederland 1
-NL2 - -Nederland 2
-NL3 - -Nederland 3
- NET5 - -Net5
-RTL4 - -RTL4
-SBS6 - -SBS6
-RTL5 - -RTL5
- DISC - -Discovery Channel
-RTL7 - -RTL7
-VERO - -Veronica
-ESPO - -Eurosport
- MTV - -MTV
-BOX - -TheBox
-CNN - -CNN
-BBC1 - -BBC1
- BBC2 - -BBC2
-
-<<  Page 1/1   ->> 
-
- [ Back ] [ Home ] [ Logout ]
- 12:08, Monday 13th March - - - diff --git a/crawler/kiss/conf/xml/channels-whats-on.xml b/crawler/kiss/conf/xml/channels-whats-on.xml deleted file mode 100644 index 1869c114..00000000 --- a/crawler/kiss/conf/xml/channels-whats-on.xml +++ /dev/null @@ -1,53 +0,0 @@ - - - - - -KiSS - Favorite Channels - - - -

 Favorite Channels

- -

What's on?

- - -Right now - 11:43, Monday 13th March
- -Evening - Starting 20:00
- -Afternoon - Starting 16:00
- -Noon - Starting 12:00
- -Morning - Starting 6:00
- -Tomorrow  Wednesday  Thursday  Friday  Saturday  Sunday 
-
- [ Back ] [ Home ] [ Logout ]
- 11:43, Monday 13th March - - - diff --git a/crawler/kiss/conf/xml/login-graphic.xml b/crawler/kiss/conf/xml/login-graphic.xml deleted file mode 100644 index 1f983f66..00000000 --- a/crawler/kiss/conf/xml/login-graphic.xml +++ /dev/null @@ -1,215 +0,0 @@ - - - - - - - - KiSS - TV Guide - - -
-
-
- - - - - - -

-
-
-
- - - - - -
Welcome sf2np2ln9no1 / - web@brakkee.org   [  - Change email ]
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
  Favorite - Channels  
-
  -
  Favorite - Shows  
-
  -
  Movies  
-
- - - - - -
- - - -
- - - - -
  Sports  
-
- - - -
- - - - - -
-


- Recordings to be sent to the player: 0 [  - View ] [ Manual - recording ]
-
-

- -
-
- Time change: In order to see the correct time in the KiSS TV - Guide, do the following: - -
    -
  1. On your player press SETUP and set your timezone to reflect - summer time, for example for mainland Western Europe this should - be: "CEST (GMT+2)", UK and Ireland: "BST (GMT+1)", Finland and - Eastern Europe: "EEST (GMT+3)".
  2. - -
  3. Access the TV Guide at least once via your player.
  4. - -
  5. The KiSS TV Guide will now display the correct time for your tv - programs both via your player and through the web.
  6. -
- -
-
-
- KML favorites: 0 [  - View ]
-
-
- logout  16:29, - Wednesday 23rd August
- - diff --git a/crawler/kiss/conf/xml/login-mobile-output.xml b/crawler/kiss/conf/xml/login-mobile-output.xml deleted file mode 100644 index df82fb22..00000000 --- a/crawler/kiss/conf/xml/login-mobile-output.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/conf/xml/login-mobile.xml b/crawler/kiss/conf/xml/login-mobile.xml deleted file mode 100644 index 46ab328a..00000000 --- a/crawler/kiss/conf/xml/login-mobile.xml +++ /dev/null @@ -1,206 +0,0 @@ - - - - - - - -KiSS - TV Guide - - - - -

TV Guide

- - - - - -
Welcome sf2np2ln9no1 / -web@brakkee.org   [  -Change email ]
-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
  Favorite -Channels  
-
- -
- -
- -
-
  Favorite -Shows  
-
- -
- -
- -
- -
-
  Movies  
-
- -
-
  Sports  
-
- -
- - - - - -
-


-Recordings to be sent to the player: 0 [  -View ] [ Manual -recording ]
-
-  22:19, Monday -21st August

-
- - diff --git a/crawler/kiss/conf/xml/login-output.xml b/crawler/kiss/conf/xml/login-output.xml deleted file mode 100644 index e9a01521..00000000 --- a/crawler/kiss/conf/xml/login-output.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/conf/xml/login.xml b/crawler/kiss/conf/xml/login.xml deleted file mode 100644 index 26370797..00000000 --- a/crawler/kiss/conf/xml/login.xml +++ /dev/null @@ -1,73 +0,0 @@ - - - - - - KiSS - TV Guide - - - - -

TV Guide

- - Welcome sf2np2ln9no1 / - erik@brakkee.org   [  - Change email ]
-
- - -

Favorite Channels

- - - What's on now?
- - What's on?
- - Favorites - -

Favorite Shows

- - - What's on?
- - Search a show
- - Favorites
- - Add a favorite - -

Movies

- - - What's on? - -

Sports

- - - What's on? - -


- Recordings to be sent to the player: 0
- [  - View ] [ Manual - recording ]
-
- [ Logout ]
- 21:09, Sunday 12th March

- - - \ No newline at end of file diff --git a/crawler/kiss/conf/xml/mainpage.xml b/crawler/kiss/conf/xml/mainpage.xml deleted file mode 100644 index 49ae9c8d..00000000 --- a/crawler/kiss/conf/xml/mainpage.xml +++ /dev/null @@ -1,167 +0,0 @@ - - - - - - - - KiSS Technology Online Portal - - - -
-

 Web Services:
-
-

-
- - - - - - - -
-

   Login

-
- -
- - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

-

-
Player ID 
- or Email: 
*
-
Password:  - **
-

-
 

-
Save - PlayerID ***
-

-
 

-
Desktop mode
- Mobile - mode
- Text mode

-

-

-

-

-
-
- -
[ Text version ]
- -
- - - - - - - - - - - - - - - - -
* You can find your - player id by pressing Menu on your remote control > Online - KML Services > Reveal PlayerID.
- You can use your email address instead of player id only when you - have already logged in once and associated your email address with - your player id.
** You must have - already configured your password by going to the EPG start page - > Configure > Set Password
*** Saving player - id / email requires cookies
- -
- - \ No newline at end of file diff --git a/crawler/kiss/conf/xml/program-info-mobile-output.xml b/crawler/kiss/conf/xml/program-info-mobile-output.xml deleted file mode 100644 index a5e5d796..00000000 --- a/crawler/kiss/conf/xml/program-info-mobile-output.xml +++ /dev/null @@ -1,5 +0,0 @@ -IMDbWhen is it on?  KRO Filmtheater: Hollywood ending    Film  Komisch filmdrama De regisseur Val Waxman was ooit erg succesvol. - Tegenwoordig regisseert hij echter alleen nog maar tv-commercials. Eindelijk krijgt hij weer - eens een aanbod om een grote film te maken. Het lot wil echter dat Val op dat moment tijdelijk - blind wordt, als resultaat van zijn paranoia. Hij probeert samen met enkele vrienden op de set - zijn handicap te verbergen. diff --git a/crawler/kiss/conf/xml/program-info-mobile.xml b/crawler/kiss/conf/xml/program-info-mobile.xml deleted file mode 100644 index 262c57ed..00000000 --- a/crawler/kiss/conf/xml/program-info-mobile.xml +++ /dev/null @@ -1,131 +0,0 @@ - - - - - - - - - KiSS - Program info - - - - -

 Program info

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -  KRO Filmtheater: Hollywood ending  - - - - -  Nederland 1   - Prev -    - Next -   - -
- - - - - - -
- Sunday 20th August - - -   Film  
- - - - - - -
- 22:30 - 00:15 - - (01:45 hours) - -
    
-
- - - - - - -
Komisch filmdrama De regisseur Val Waxman was ooit erg succesvol. - Tegenwoordig regisseert hij echter alleen nog maar tv-commercials. Eindelijk krijgt hij weer - eens een aanbod om een grote film te maken. Het lot wil echter dat Val op dat moment tijdelijk - blind wordt, als resultaat van zijn paranoia. Hij probeert samen met enkele vrienden op de set - zijn handicap te verbergen.
- - - - - - -
IMDb ]  [ When is it on? ] 
- - - - - -

-
- - Back -   - Home -   - Logout -  23:50, Monday 21st August
- - diff --git a/crawler/kiss/conf/xml/program-info-output.xml b/crawler/kiss/conf/xml/program-info-output.xml deleted file mode 100644 index f7366652..00000000 --- a/crawler/kiss/conf/xml/program-info-output.xml +++ /dev/null @@ -1,12 +0,0 @@ - -<< ->>IMDb - - When is it on? -BackHomeLogoutKruispunt - Religieus - - Achtergronden uit kerk en samenleving. - - - diff --git a/crawler/kiss/conf/xml/program-info.xml b/crawler/kiss/conf/xml/program-info.xml deleted file mode 100644 index be5cc438..00000000 --- a/crawler/kiss/conf/xml/program-info.xml +++ /dev/null @@ -1,47 +0,0 @@ - - - - - -KiSS - Program info - - - -

 Program info

- -

Kruispunt

- -

Nederland 1   -<<   ->>

- -Monday 13th March
- Religieus
- 15:20 - 16:00 (40 minutes)
-
- Achtergronden uit kerk en samenleving.
-
-
-[ IMDb ]  [ Record ]  [ When is it on? ]
-
- [  -Back ] [ Home ] [ Logout ]
- 12:16, Monday 13th March - - - diff --git a/crawler/kiss/conf/xml/record-alreadyrecorded.xml b/crawler/kiss/conf/xml/record-alreadyrecorded.xml deleted file mode 100644 index d1b5e575..00000000 --- a/crawler/kiss/conf/xml/record-alreadyrecorded.xml +++ /dev/null @@ -1,23 +0,0 @@ - - - - - -KiSS - TV Guide - - -

Error

- -Show is already in the recording queue!
-
-[ -Back ]
- - - diff --git a/crawler/kiss/conf/xml/record-conflict.xml b/crawler/kiss/conf/xml/record-conflict.xml deleted file mode 100644 index 78af4d02..00000000 --- a/crawler/kiss/conf/xml/record-conflict.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - - -KiSS - TV Guide - - -

Error

- -This show conflicts with a recording that is already in the -recording queue!
-
-[ -Back ]
- - - diff --git a/crawler/kiss/conf/xml/record-ok.xml b/crawler/kiss/conf/xml/record-ok.xml deleted file mode 100644 index 674c9e32..00000000 --- a/crawler/kiss/conf/xml/record-ok.xml +++ /dev/null @@ -1,74 +0,0 @@ - - - - - -KiSS - TV Guide - Recordings - - - -

TV Guide - Recordings

- -

Recordings already sent to player

- -SBS6 - Lois & Clark: The new adventures of Superman - - 08:00 - 09:00  - - Fri 17th  - -Delete
- VERO - Brainiac - - 19:40 - 20:10  - - Fri 17th  - -Delete - Error (Recording conflict)
-VERO - Brainiac -  19:40 - 20:10  --  Fri 17th  - -Delete - Error (Recording conflict)
- DISC - Brainiac - - 22:00 - 23:00  - - Fri 17th  - -Delete - Error (Recording conflict)
-DISC - Brainiac -  22:00 - 23:00  --  Fri 17th  - -Delete - Error (Recording conflict)
- NL3 - The Kumars at no. 42 - - 15:55 - 16:26  - - Sat 18th  - -Delete
-VERO - Stargate SG-1 - - 18:10 - 18:55  - - Sat 18th  - -Delete
- VERO - Battlestar Galactica - - 19:35 - 20:25  - - Sat 18th  - -Delete
-
- -

Recordings to be sent to player

- -NL1 - Samen tegen Kanker - - 20:30 - 22:25  - - Sat 18th  - -Delete
-
-
- [  -Back ] [ Home ] [ Logout ]
- 19:46, Friday 17th March - - - diff --git a/crawler/kiss/conf/xml/report.xml b/crawler/kiss/conf/xml/report.xml deleted file mode 100644 index afb27fc8..00000000 --- a/crawler/kiss/conf/xml/report.xml +++ /dev/null @@ -1,47 +0,0 @@ - - - - Hello world! - and another message - - - - - Wintertijd - Some description MINSK - De presidentsverkiezingen in Wit-Rusland zijn zondag met ruime cijfers gewonnen door zittend president Aleksandr Loekasjenko. Dat bleek zondag uit exitpolls uitgevoerd in opdracht van het totalitaire regime. Het staatshoofd zou kunnen rekenen op ruim 82 procent van de stemmen. Volgens de eerste gedeeltelijke uitslagen zou Loekasjenko zelfs kunnen rekenen op bijna 89 procent. - Documentaire - Nederland 1 - - 23:25 - 00:10 - - - - - - - Brainiac - Humor - science - Discovery Channel - - 23:30 - 00:15 - - - - - Andere tijden - Documentaire - docu - Nederland 1 - - 23:30 - 00:15 - - - - - - - diff --git a/crawler/kiss/deps.xml b/crawler/kiss/deps.xml deleted file mode 100644 index 6e9095ab..00000000 --- a/crawler/kiss/deps.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/docs/README.txt b/crawler/kiss/docs/README.txt deleted file mode 100644 index 9bc261b2..00000000 --- a/crawler/kiss/docs/README.txt +++ /dev/null @@ -1,7 +0,0 @@ -This is the base documentation directory. - -skinconf.xml # This file customizes Forrest for your project. In it, you - # tell forrest the project name, logo, copyright info, etc - -sitemap.xmap # Optional. This sitemap is consulted before all core sitemaps. - # See http://forrest.apache.org/docs/project-sitemap.html diff --git a/crawler/kiss/docs/classes/CatalogManager.properties b/crawler/kiss/docs/classes/CatalogManager.properties deleted file mode 100644 index af7b5ab3..00000000 --- a/crawler/kiss/docs/classes/CatalogManager.properties +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2002-2005 The Apache Software Foundation or its licensors, -# as applicable. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#======================================================================= -# CatalogManager.properties for Catalog Entity Resolver. -# -# This is the default properties file for your project. -# This facilitates local configuration of application-specific catalogs. -# If you have defined any local catalogs, then they will be loaded -# before Forrest's core catalogs. -# -# See the Apache Forrest documentation: -# http://forrest.apache.org/docs/your-project.html -# http://forrest.apache.org/docs/validation.html - -# verbosity: -# The level of messages for status/debug (messages go to standard output). -# The setting here is for your own local catalogs. -# The verbosity of Forrest's core catalogs is controlled via -# main/webapp/WEB-INF/cocoon.xconf -# -# The following messages are provided ... -# 0 = none -# 1 = ? (... not sure yet) -# 2 = 1+, Loading catalog, Resolved public, Resolved system -# 3 = 2+, Catalog does not exist, resolvePublic, resolveSystem -# 10 = 3+, List all catalog entries when loading a catalog -# (Cocoon also logs the "Resolved public" messages.) -verbosity=1 - -# catalogs ... list of additional catalogs to load -# (Note that Apache Forrest will automatically load its own default catalog -# from main/webapp/resources/schema/catalog.xcat) -# Use either full pathnames or relative pathnames. -# pathname separator is always semi-colon (;) regardless of operating system -# directory separator is always slash (/) regardless of operating system -catalogs=../resources/schema/catalog.xcat - -# relative-catalogs -# If false, relative catalog URIs are made absolute with respect to the -# base URI of the CatalogManager.properties file. This setting only -# applies to catalog URIs obtained from the catalogs property in the -# CatalogManager.properties file -# Example: relative-catalogs=[yes|no] -relative-catalogs=no diff --git a/crawler/kiss/docs/content/test1.html b/crawler/kiss/docs/content/test1.html deleted file mode 100644 index 1a174a8b..00000000 --- a/crawler/kiss/docs/content/test1.html +++ /dev/null @@ -1,37 +0,0 @@ - - - - Raw un-processed HTML page (test1) - - -

raw un-processed HTML page (test1)

-

- This raw HTML page is linked to from xdocs/samples/static.xml - and from xdocs/samples/linking.xml -

-

All linked-to pages (for example: - <a href="test2.html">) are - also available. -

-
-

- [return to Index]
- [return to Linking demonstration] -

- - diff --git a/crawler/kiss/docs/content/xdocs/details.html b/crawler/kiss/docs/content/xdocs/details.html deleted file mode 100644 index d977ac74..00000000 --- a/crawler/kiss/docs/content/xdocs/details.html +++ /dev/null @@ -1,194 +0,0 @@ - - - -KiSS crawler report - - -

KiSS crawler report

-

Successfully recorded programs

- - - - - - - - - - - - - - - -
18:00 - 18:55: Stargate SG-1 (Veronica/Serie/soap)
-
-Sf-serie SG-1 krijgt een aanbod van een buitenaardse wereld voor een wondermedicijn. -
-
19:25 - 20:25: Stargate SG-1 (Veronica/Serie/soap)
-
-Sf-serie Tijdens een vlucht van de nieuwe X-303, codenaam Prometheus, wordt het schip overmand door NID-agenten. -
-
-
-

-

-

Conflicts with other recorded programs

- - - - - - - - -
20:00 - 20:45: Doctor Who (BBC1/Drama)
-
-Madame de Pompadour finds the court at Versailles under attack from sinister clockwork killers. Her only hope of salvation lies with the man who has haunted her dreams since childhood - a mysterious stranger known only as the Doctor. -
-
-
-

-

-

Possibly interesting programs

-

-
- -
-

-

Category: documentaires

-

- - - - - - - - - - - - - - -
11:50 - 12:30: Zembla (Nederland 3/Documentaire)
-
- - -
-
13:10 - 13:35: Andere tijden (Nederland 3/Film)
-
-Geschiedenisrubriek met reportages over (bijna) vergeten gebeurtenissen uit de twintigste eeuw. De redactie gaat op zoek naar ooggetuigen en betrokkenen en naar historische filmbeelden om aan de hand daarvan de verhalen van vroeger opnieuw te vertellen. -
-
-
-

-

Category: films

-

- - - - - - - - - - - - - - - - - - - - -
13:10 - 13:35: Andere tijden (Nederland 3/Film)
- -
-Geschiedenisrubriek met reportages over (bijna) vergeten gebeurtenissen uit de twintigste eeuw. De redactie gaat op zoek naar ooggetuigen en betrokkenen en naar historische filmbeelden om aan de hand daarvan de verhalen van vroeger opnieuw te vertellen. -
-
22:00 - 06:00: Face off (Veronica/Film)
-
-Actiefilm Een FBI-agent wil kost wat het kost een psychotische terrorist pakken, die verantwoordelijk is voor de moord op zijn zoontje. In de strijd om de terrorist in te rekenen raakt deze in een coma. Hij heeft de agent echter nog net kunnen vertellen dat ergens in Los Angeles een bom verborgen ligt. Om op het spoor van deze bom te komen vragen regeringsfunctionarissen of de FBI-agent zich uit wil geven als de moordenaar van zijn zoon. Door middel van een medische ingreep worden de gezichten van beiden verwisseld met alle gevolgen van dien. - -
-
23:25 - 01:00: Gossip (Net5/Film)
-
-Thriller Drie studenten doen voor een schoolproject een proef over roddelen. Ze verspreiden een gerucht om te zien hoe lang het duurt voordat deze zich heeft verspreid. Maar wat begint als een onschuldige roddel, escaleert tot een groot misverstand en leidt zelfs tot een arrestatie wegens verkrachting. Het drietal beseft dat hun vooropgezete plan desastreuze gevolgen heeft en dat hun experiment niet meer te stoppen is. -
-
-
-

-

Category: science fiction

-

- - - - - - - - -
08:00 - 09:00: Lois & Clark: The new adventures of Superman (SBS6/Serie/soap)
-
-Sf-serie Lex Luthor ontwikkelt verschillende tests om de kracht van Superman te doorgronden. -
-
-
-

-

Category: wetenschap

-

- - - - - - - - - - - - - - - - - - - - - - - - - - - -
11:30 - 12:25: Triumph of life (RTL4/Documentaire)
-
-Serie documentaires over de evolutie. Al het leven op aarde is ooit ontstaan uit een organisme dat zich door omstandigheden heeft kunnen ontwikkelen tot een wezen dat zich wist voort te planten. Dit ingewikkelde proces voltrok zich miljarden jaren geleden en is sindsdien gaande. Het werd in de negentiende eeuw voor het eerst in kaart gebracht door de Britse bioloog Charles Darwin. Sindsdien wordt de evolutietheorie wetenschappelijk onderzocht en betwijfeld, maar het feit is dat het leven zich in diverse vormen blijft ontwikkelen. -
-
00:00 - 00:30: Sex sense: Bi way (Discovery Channel/Wetenschap)
-
-Documentaireserie Onderzoek naar de wetenschap van seksualiteit, gecombineerd met levendige beelden en ondeugende humor. -
-
00:30 - 01:00: Sex sense: Baring it all (Discovery Channel/Wetenschap)
-
- -Documentaireserie Onderzoek naar de wetenschap van seksualiteit, gecombineerd met levendige beelden en ondeugende humor. -
-
00:40 - 02:10: Top secret! (SBS6/Comedy)
-
-Filmkomedie De knappe jaren '50 rock-'n-roll-ster Nick Rivers is in Oost-Duitsland om op te treden. Daar wordt hij verliefd op de dochter van een ontvoerde wetenschapper en komt hij in contact met het Franse verzet. - -
-
-
-

- - diff --git a/crawler/kiss/docs/content/xdocs/hello.pdf b/crawler/kiss/docs/content/xdocs/hello.pdf deleted file mode 100644 index 5ca4f313..00000000 --- a/crawler/kiss/docs/content/xdocs/hello.pdf +++ /dev/null @@ -1,70 +0,0 @@ -%PDF-1.3 -%ª«¬­ -4 0 obj -<< /Type /Info -/Producer (FOP 0.20.4) >> -endobj -5 0 obj -<< /Length 203 /Filter [ /ASCII85Decode /FlateDecode ] - >> -stream -Gar'!]afWZ&;9q-MRA)RFnblL2&]tQSZsjOOT[ck2SQkp(bfQ[R7ZPq=U24c0dqq_i?B[A.0s\)5f5 -endstream -endobj -6 0 obj -<< /Type /Page -/Parent 1 0 R -/MediaBox [ 0 0 595 842 ] -/Resources 3 0 R -/Contents 5 0 R ->> -endobj -7 0 obj -<< /Type /Font -/Subtype /Type1 -/Name /F1 -/BaseFont /Helvetica -/Encoding /WinAnsiEncoding >> -endobj -8 0 obj -<< /Type /Font -/Subtype /Type1 -/Name /F5 -/BaseFont /Times-Roman -/Encoding /WinAnsiEncoding >> -endobj -1 0 obj -<< /Type /Pages -/Count 1 -/Kids [6 0 R ] >> -endobj -2 0 obj -<< /Type /Catalog -/Pages 1 0 R - >> -endobj -3 0 obj -<< -/Font << /F1 7 0 R /F5 8 0 R >> -/ProcSet [ /PDF /ImageC /Text ] >> -endobj -xref -0 9 -0000000000 65535 f -0000000687 00000 n -0000000745 00000 n -0000000795 00000 n -0000000015 00000 n -0000000071 00000 n -0000000365 00000 n -0000000471 00000 n -0000000578 00000 n -trailer -<< -/Size 9 -/Root 2 0 R -/Info 4 0 R ->> -startxref -883 -%%EOF diff --git a/crawler/kiss/docs/content/xdocs/images/group-logo.gif b/crawler/kiss/docs/content/xdocs/images/group-logo.gif deleted file mode 100644 index f017f324..00000000 Binary files a/crawler/kiss/docs/content/xdocs/images/group-logo.gif and /dev/null differ diff --git a/crawler/kiss/docs/content/xdocs/images/group.svg b/crawler/kiss/docs/content/xdocs/images/group.svg deleted file mode 100644 index 584cedb8..00000000 --- a/crawler/kiss/docs/content/xdocs/images/group.svg +++ /dev/null @@ -1,82 +0,0 @@ - - - - - - - - Anteater logo - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/docs/content/xdocs/images/icon.png b/crawler/kiss/docs/content/xdocs/images/icon.png deleted file mode 100644 index 3be8bbbe..00000000 Binary files a/crawler/kiss/docs/content/xdocs/images/icon.png and /dev/null differ diff --git a/crawler/kiss/docs/content/xdocs/images/project-logo.gif b/crawler/kiss/docs/content/xdocs/images/project-logo.gif deleted file mode 100644 index a60277a4..00000000 Binary files a/crawler/kiss/docs/content/xdocs/images/project-logo.gif and /dev/null differ diff --git a/crawler/kiss/docs/content/xdocs/images/project.svg b/crawler/kiss/docs/content/xdocs/images/project.svg deleted file mode 100644 index 01abcdbb..00000000 --- a/crawler/kiss/docs/content/xdocs/images/project.svg +++ /dev/null @@ -1,82 +0,0 @@ - - - - - - - - Anteater logo - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/docs/content/xdocs/images/usemap.gif b/crawler/kiss/docs/content/xdocs/images/usemap.gif deleted file mode 100644 index c10732ca..00000000 Binary files a/crawler/kiss/docs/content/xdocs/images/usemap.gif and /dev/null differ diff --git a/crawler/kiss/docs/content/xdocs/index.xml b/crawler/kiss/docs/content/xdocs/index.xml deleted file mode 100644 index 54dd1d23..00000000 --- a/crawler/kiss/docs/content/xdocs/index.xml +++ /dev/null @@ -1,313 +0,0 @@ - - - - -
- Automatic Recording for KiSS Hard Disk Recorders -
- - KiSS makes regular updates to their site that sometimes require adaptations to the - crawler. If it stops working, check out the most recent version here. -
- Changelog -
- 21 November 2006 -
    -
  • Corrected the config.xml again.
  • -
  • Corrected errors in the documentation for the web application. It starts running at 19:00 - and not at 5:00.
  • -
-
-
- 19 November 2006 -
    -
  • Corrected the config.xml file to deal with changes in the login procedure.
  • -
-
-
- 17 November 2006 -
    -
  • Corrected the packed distributions. The standalone distribution had an error in the - scripts and was missing libraries
  • - -
-
-
- 7 September 2006 -
    -
  • KiSS modified the login procedure. It is now working again.
  • -
  • Generalized the startup scripts. They should now be insensitive to the specific - libraries used.
  • -
-
-
- 31 August 2006 -
    -
  • Added windows bat file for running the crawler under windows. Very add-hoc, will be - generalized.
  • -
-
-
- 24 August 2006 -
    -
  • The crawler now uses desktop login for crawling. Also, it is much more efficient since - it no longer needs to crawl the individual programs. This is because the channel page - includes descriptions of programs in javascript popups which can be used by the crawler. - The result is a significant reduction of the load on the KiSS EPG site. Also, the delay - between requests has been increased to further reduce load on the KiSS EPG site.
  • -
  • The crawler now crawls programs for tomorrow instead of for today.
  • -
  • The web based crawler is configured to run only between 7pm and 12pm. It used to run - at 5am.
  • -
-
- -
- 13-20 August 2006 -

There were several changes to the login procedure, requiring modifications to the - crawler.

-
    -
  • The crawler now uses the 'Referer' header field correctly at login.
  • -
  • KiSS now uses hidden form fields in their login process which are now also handled - correctly by the crawler.
  • -
-
-
-
- Overview - -

In 2005, KiSS introduced the ability to schedule recordings - on KiSS hard disk recorder (such as the DP-558) through a web site on the internet. When a - new recording is scheduled through the web site, the KiSS recorder finds out about this new - recording by polling a server on the internet. This is a really cool feature since it - basically allows programming the recorder when away from home.

-

After using this feature for some time, I started noticing regular patterns. Often you - are looking for the same programs and for certain types of programs. So, wouldn't it be nice - to have a program do this work for you and automatically record programs and notify you of - possibly interesting ones?

-

This is where the KiSS crawler comes in. This is a simple crawler which logs on to the - KiSS electronic programme guide web site and gets programme information from there. Then - based on that it automatically records programs for you or sends notifications about - interesting ones.

-

In its current version, the crawler can be used in two ways:

-
    -
  • standalone program: - A standalone program run from the command-line or as a scheduled task.
  • -
  • web application: A web application running on a java application - server. With this type of use, the crawler also features an automatic retry mechanism in - case of failures, as well as a simple web interface.
  • -
-
- -
- Downloading - -

At this moment, no formal releases have been made and only the latest version can be - downloaded.

-

The easy way to start is the standalone program - binary version or using the web application.

-

The latest source can be obtained from subversion with the URL - https://wamblee.org/svn/public/utils. The subversion repository allows - read-only access to anyone.

-

The application was developed and tested on SuSE linux 10.1 with - JBoss 4.0.4 application - server. An application server or servlet container is only required for the - web application. The crawler requires at least a Java Virtual Machine - 1.5 or greater to run.

-
- -
- Configuring the crawler - -

The crawler comes with three configuration files:

-
    -
  • crawler.xml: basic crawler configuration tailored to the KiSS electronic - programme guide.
  • -
  • programs.xml: containing a description of which programs must be recorded - and which programs are interesting.
  • -
  • org.wamblee.crawler.properties: Containing a configuration
  • -
-

For the standalone program, all configuration files are in the conf - directory. For the web application, the properties files is located in the - WEB-INF/classes directory of the web application, and - crawler.xml and programs.xml are located outside of the web - application at a location configured in the properties file.

- - -
- Crawler configuration <code>crawler.xml</code> - -

First of all, copy the config.xml.example file to config.xml. - After that, edit the first entry of that file and replace user and - passwd with your personal user id and password for the KiSS Electronic - Programme Guide.

-
- -
- Program configuration -

Interesting TV shows are described using program elements. Each - program element contains one or more match elements that - describe a condition that the interesting program must match.

-

Matching can be done on the following properties of a program:

- - - - - - - - - - - - - - - - - - - - - -
Field nameDescription
nameProgram name
descriptionProgram description
channelChannel name
keywordsKeywords/classification of the program.
-

The field to match is specified using the field attribute of the - match element. If no field name is specified then the program name is - matched. Matching is done by converting the field value to lowercase and then doing a - perl-like regular expression match of the provided value. As a result, the content of the - match element should be specified in lower case otherwise the pattern will never match. If - multiple match elements are specified for a given program - element, then all matches must apply for a program to be interesting.

-

Example patterns:

- - - - - - - - - - - - - -
PatternExamples of matching field values
the.*x.*files"The X files", "The X-Files: the making of"
star trek"Star Trek Voyager", "Star Trek: The next generation"
- -

It is possible that different programs cannot be recorded since they overlap. To deal - with such conflicts, it is possible to specify a priority using the priority - element. Higher values of the priority value mean a higher priority. If two programs have - the same priority, then it is (more or less) unspecified which of the two will be - recorded, but it will at least record one program. If no priority is specified, then the - priority is 1 (one).

- -

Since it is not always desirable to try to record every program that matches the - criteria, it is also possible to generate notifications for interesting programs only - without recording them. This is done by specifying the action alement with - the content notify. By default, the action is - record. To make the mail reports more readable it is possible to also assign - a category to a program for grouping interesting programs. This can be done using the - category element. Note that if the action is - notify. then the priority element is not used.

- -
- -
- Notification configuration -

Edit the configuration file org.wamblee.crawler.properties. The properties - file is self-explanatory.

-
-
- - - - -
- Installing and running the crawler - -
- Standalone application -

In the binary distribution, execute the run script for your operating - system (run.bat for windows, and run.sh for unix).

-
- -
- Web application -

After deploying the web application, navigate to the application in your browser (e.g. - http://localhost:8080/wamblee-crawler-kissweb). The screen should show an - overview of the last time it ran (if it ran before) as well as a button to run the crawler - immediately. Also, the result of the last run can be viewed. The crawler will run - automatically starting after 19:00, - and will retry at 1 hour intervals in case - of failure to retrieve programme information. -

- -

- Since the crawler checks the status at - 1 hour intervals it can run for the first time anytime between 19:00 and 20:00. This is done - on purpose since it means that crawlers run by different people will not all start running - simultaneously and is thus more friendly to the KiSS servers.

-
- -
- Source distribution -

With the source code, build everything with maven2 as follows:

- - mvn -Dmaven.test.skip=true install - cd crawler - mvn package assembly:assembly - -

- After this, locate the - binary distribution in the target subdirectory of the crawler - directory. Then - proceed as for the binary distribution.

- -
- -
- General usage -

When the crawler runs, it retrieves the programs for tomorrow. -

- If you deploy the web application today, it will run automatically on the next (!) - day. This even holds if you deploy the application before the normal scheduled time. -
- - -
- -
- Examples - -

The best example is in the distribution itself. It is my personal - programs.xml file.

-
- -
- Contributing - -

You are always welcome to contribute. If you find a problem just tell me about it and if - you have ideas am I always interested to hear about them.

-

If you are a programmer and have a fix for a bug, just send me a patch and if you are - fanatic enough and have ideas, I can also give you write access to the repository.

-
- - - -
diff --git a/crawler/kiss/docs/content/xdocs/main.html b/crawler/kiss/docs/content/xdocs/main.html deleted file mode 100644 index e700881a..00000000 --- a/crawler/kiss/docs/content/xdocs/main.html +++ /dev/null @@ -1,79 +0,0 @@ - - - - - - - KiSS Crawler overview page - - - - - - - - - - - - -

KiSS Crawler Overview

- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- Currently running: - - false -
- Last executed at: - - Sat May 06 05:18:54 CEST 2006 -
- Last result: - - true -
- Last message: - - -
- Last report: - - details -
- -
- - -
- - - diff --git a/crawler/kiss/docs/content/xdocs/samples/ascii-art.xml b/crawler/kiss/docs/content/xdocs/samples/ascii-art.xml deleted file mode 100644 index 4f984d07..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/ascii-art.xml +++ /dev/null @@ -1,56 +0,0 @@ - - - - -
- Ascii Art sample -
- -
- Sample Ascii Art -

To create a .png image like the one below with ASCII art, just save - the text file with the .aart extension and then link from any page - as an image (<image src="asci-art-file.png"/>).

-

cocoon pyramid of management-(logic-content-style)

-

Here is the source file that has created the above image.

- - - +-------------------+ - | Management | - +-+-------+-------+-+ - | | | - | | | - +-------+ +----+----+ +-------+ - | logic +--+ content +--+ style | - +-------+ +---------+ +-------+ - - -

An ascii art pad recognized following ascii characters:

-
    -
  • '-' horizontal SVG line
  • -
  • '|' vertical SVG line
  • -
  • '+' corner
  • -
  • \ oblique line
  • -
  • String starting with letter, digit, or '_' is converted to a SVG text.
  • -
-
- -
- Copyright 2002-2004 The Apache Software Foundation or its licensors, as applicable. -
-
diff --git a/crawler/kiss/docs/content/xdocs/samples/cocoon-pyramid.aart b/crawler/kiss/docs/content/xdocs/samples/cocoon-pyramid.aart deleted file mode 100644 index 090a0e0e..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/cocoon-pyramid.aart +++ /dev/null @@ -1,11 +0,0 @@ - - +-------------------+ - | Management | - +-+-------+-------+-+ - | | | - | | | - +-------+ +----+----+ +-------+ - | logic +--+ content +--+ style | - +-------+ +---------+ +-------+ - - diff --git a/crawler/kiss/docs/content/xdocs/samples/custom.xml b/crawler/kiss/docs/content/xdocs/samples/custom.xml deleted file mode 100644 index dff5776e..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/custom.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - -Hello XML Custom World!! - diff --git a/crawler/kiss/docs/content/xdocs/samples/customSchema.xml b/crawler/kiss/docs/content/xdocs/samples/customSchema.xml deleted file mode 100644 index 81c7c1a9..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/customSchema.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - -
- Custom Schema -
- -

Forrest comes with a set of schemas for common documents, however, if you have existing documents - that use a different schema you will want to tell Forrest how to work with them. The best way of doing - this is to build a plugin - so that you can easily reuse the functionality on different projects. Plugins also allow you to share - this new functionality with other users, and to benefit from their contributions to your work.

- -

If you don't want to build a plugin you can make Forrest process them within your project sitemap - (but this won't really save you any work since the process is almost the same). This sample site has - a demonstration of using a custom DTD. If you request <a href="custom.html"> - you can see the results. Take a look at the project sitemap.xmap to see how it is done.

- - Adding custom schemas with a plugin has the added benefit of being able to add the schema - definition to the catalog file rather than having to reference it directly from within the XML - document. - -
diff --git a/crawler/kiss/docs/content/xdocs/samples/document-v13.xml b/crawler/kiss/docs/content/xdocs/samples/document-v13.xml deleted file mode 100644 index 9ca3809f..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/document-v13.xml +++ /dev/null @@ -1,381 +0,0 @@ - - - - -
- The Apache Forrest xdocs document-v1.3 DTD - The content of this document doesn't make any sense at all. - This is a demonstration document using all possible elements in - the current Apache Forrest xdocs document-v13.dtd - -
- - - This is a demonstration document using all possible elements in the - current Apache Forrest xdocs document-v13.dtd - (See the DTD changes section at the bottom.) - -
- Sample Content -

Hint: See the xml source to see how the various - elements are used and see the - - DTD reference documentation. -

-
- Block and inline elements -

This is a simple paragraph. Most documents contain a fair amount of - paragraphs. Paragraphs are called <p>.

-

With the <p xml:space="preserve"> attribute, you can declare - that whitespace should be preserved, without implying it is in any other - way special.

-

- This next paragraph has a class attribute of 'quote'. CSS can - be used to present this <p class='quote'> in - a different style than the other paragraphs. The handling of - this quoted paragraph is defined in the <extra-css> - element in the skinconf.xml. -

-

- Anyway, like I was sayin', shrimp is the fruit of the sea. You can - barbecue it, boil it, broil it, bake it, sautee it. Dey's uh, - shrimp-kabobs, shrimp creole, shrimp gumbo. Pan fried, deep fried, - stir-fried. There's pineapple shrimp, lemon shrimp, coconut shrimp, - pepper shrimp, shrimp soup, shrimp stew, shrimp salad, shrimp and - potatoes, shrimp burger, shrimp sandwich. That- that's about it. -

-

A number of in-line elements are available in the DTD, we will show them - inside an unordered list (<ul>):

-
    -
  • Here is a simple list item (<li>).
  • -
  • Have you seen the use of the <code> element in the - previous item?
  • -
  • Also, we have <sub> and <sup> - elements to show content above or below the text - baseline.
  • -
  • There is a facility to emphasize certain words using the - <em> <strong> - elements.
  • -
  • We can use - - <icon>s too.
  • -
  • Another possibility is the <img> element: - another feather, - which offers the ability to refer to an image map.
  • -
  • We have elements for hyperlinking: -
    -
    <link href="faq.html">
    -
    Use this to - link - to another document. As per normal, this will open the new document - in the same browser window.
    - -
    <link href="#section">
    -
    Use this to - link - to the named anchor in the current document. -
    - -
    <link href="faq.html#forrest">
    -
    Use this to - link - to another document and go to the named anchor. This will open - the new document in the same browser window. -
    - -
    <jump href="faq.html">
    -
    Use this to - jump - to another document and optionally go to a named - anchor - within that document. This will open the new document in the same - browser window. So what is the difference between link and jump? - The jump behaves differently, in that it will replace any frames - in the current window. - This is the equivalent of - <a ... target="_top"> -
    - -
    <fork href="faq.html">
    -
    Use this to - fork - your webbrowser to another document. This will open the document - in a new, unnamed browser window. - This is the equivalent of - <a ... target="_blank"> -
    -
  • - -
  • Oh, by the way, a definition list <dl> was used inside - the previous list item. We could put another -
      -
    • unordered list
    • -
    • inside the list item
    • -
    - - - - -
    A sample nested table
    Or even tables.. -
    inside tables..
    -
    or inside lists, but I believe this liberty gets quickly quite - hairy as you see.
    -
  • -
-

So far for the in-line elements, let's look at some paragraph-level - elements.

- The <fixme> element is used for stuff - which still needs work. Mind the author attribute! - Use the <note> element to draw attention to something, e.g. ...The <code> element is used when the author can't - express himself clearly using normal sentences ;-) - Sleep deprivation can be the result of being involved in an open - source project. (a.k.a. the <warning> element). - - If you want your own labels for notes and - warnings, specify them using the label attribute. - -

Apart from unordered lists, we have ordered lists too, of course.

-
    -
  1. Item 1
  2. -
  3. Item 2
  4. -
  5. This should be 3 if my math is still OK.
  6. -
-
- -
- Various presentation formats - -

This sample document, written in document-v13 XML can be presented - via Forrest in a number of different formats. The links in the - following list show this document in each of the currently available - formats.

- -

Each of the formats can be made available as a link near the top of - the page. Actual placement of those links depends on the skin - currently in use. Those links are enabled in the skinconf.xml via the - <disable-XXX-link> elements in the skinconf.xml

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Presentation FormatDescriptionskinconf.xml Element
HTMLThis document in HTML format. Always generated by default. Cannot be turned off.
XMLThis document in its raw XML format.<disable-xml-link>. By default, set to true, meaning - that this link will not be shown.
PDFThis document as Adobe PDF<disable-pdf-link>. By default, set to false, meaning - that this link will be shown.
Text

This document as straight text.

-

For additional information see the Forrest text-output - plugin.

<disable-txt-link>. By default, set to true, meaning - that this link will not be shown.
POD

This document as Perl POD (Plain Old Documentation). Text - with minimal formatting directives. If on a *nix system with perl - installed, see "man perlpod".

-

For additional information see the Forrest pod-output - plugin.

<disable-pod-link>. By default, set to true, meaning - that this link will not be shown.
-
-
- Using sections -

You can use sections to put some structure in your document. For some - strange historical reason, the section title is an attribute of the - <section> element.

-
-
- Sections, the sequel -

Just some second section.

-
- Section 2.1 -

Which contains a subsection (2.1).

-
-
- -
- Showing preformatted source code -

Enough about these sections. Let's have a look at more interesting - elements, <source> for instance:

- -// This example is from the book _Java in a Nutshell_ by David Flanagan. -// Written by David Flanagan. Copyright (c) 1996 O'Reilly & Associates. -// You may study, use, modify, and distribute this example for any purpose. -// This example is provided WITHOUT WARRANTY either expressed or implied. - -import java.applet.*; // Don't forget these import statements! -import java.awt.*; - -public class FirstApplet extends Applet { - // This method displays the applet. - // The Graphics class is how you do all drawing in Java. - public void paint(Graphics g) { - g.drawString("Hello World", 25, 50); - } -} -

CDATA sections are used within - <source> elements so that you can write pointy - brackets without needing to escape them with messy - &lt; entities ... -

- - easy - -]]> -

Please take care to still use a sensible line-length within your - source elements.

-
- -
- Using tables -

And now for a table:

- - - - - - - - - - - - - - - - -
Table caption
heading cell 1heading cell 2heading cell 3
data cellthis data cell spans two columns
- Tables can be nested: - - - - - - - - - - -
column 1column 2
cell Acell B
-
-
  • and can include most other elements
  • such as lists
-
-
- - -
- Using figures -

And a <figure> to end all of this. - Note that this can also be implemented with an - <img> element. -

-
-
-
- -
- DTD changes -

See the generated - - DTD reference documentation. -

-
- Changes since document-v12 -

- All v1.2 docs will work fine as v1.3 DTD. The main change is the - addition of a @class attribute to every element, which enables the - "extra-css" section in the skinconf to be put to good use. -

-
-
- Changes since document-v11 -

- doc-v12 enhances doc-v11 by relaxing various restrictions that were - found to be unnecessary. -

-
    -
  • - Links ((link|jump|fork) and inline elements (br|img|icon|acronym) are - allowed inside title. -
  • -
  • - Paragraphs (p|source|note|warning|fixme), table and figure|anchor are - allowed inside li. -
  • -
  • - Paragraphs (p|source|note|warning|fixme), lists (ol|ul|dl), table, - figure|anchor are allowed inside definition lists (dd) and tables (td - and dh). -
  • -
  • - Inline content - (strong|em|code|sub|sup|br|img|icon|acronym|link|jump|fork) is - allowed in strong and em. -
  • -
-
-
- -
- This is a legal notice, so it is important. -
-
diff --git a/crawler/kiss/docs/content/xdocs/samples/embedded_html.html b/crawler/kiss/docs/content/xdocs/samples/embedded_html.html deleted file mode 100644 index 29b2d6a5..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/embedded_html.html +++ /dev/null @@ -1,111 +0,0 @@ - - - - - Embedded HTML demonstration page - - - -

Embedded HTML demonstration page

- -

An HTML document is used as the source for this page, and translated -to the intermediate Apache Forrest xdocs document structure. The sitemap then -does the normal aggregation with the navigation content and application of -the skin. -

- -

-The html is being interpreted by Forrest and transformed to the -intermediate Apache xdocs document structure. That stylesheet cannot deal -with every possibility in unstructured html, so it tries to guess how to -build <section> elements and such. -It needs <h1> (<h2> etc.) headings in the source html -in order to identify sections. Patches are welcome to enhance -that transformer. -

- -

-You can still take advantage of Forrest's -"site:" -method of linking, for example: -<a href="site:index"> -

- -
-XHTML can also be used, but it is just treated as interpreted -html. Future versions of Forrest will take much more advantage of XHTML. - -
- -

Some example uses of HTML

-

-There are situations when the Apache Forrest xdocs DTD is not sufficient. -The use of embedded HTML enables you to use HTML code in these situations. -

- -

Embedded applets and Javascript

- -

-See the -Javascript alert pop-up -

- -

HTML forms for user interaction

-

-Search the Forrest website via Google: - -

- - - - -Google Search - -  - -
- -

- -

-See a demonstration of "html" and "html forms" with our -Forrest download mirror -facility and the -explanation howto document. -

- -

Invalid HTML

-

-This paragraph has a missing closing tag for the <p> element. If you look -at the XML created by Forrest you'll notice that -Forrest has fixed this. - -

Potentially Invalid XDocs

- -However, it should also be noted that the resultant XML is not a valid document -since it contains the additional HTML elements. If you are intending to use -the intermediate XDocs for any purpose be aware of this fact. - -

Other non-standard html-type abilities

-

-Use other HTML delights (???) and tricks. -

- - diff --git a/crawler/kiss/docs/content/xdocs/samples/faq.xml b/crawler/kiss/docs/content/xdocs/samples/faq.xml deleted file mode 100644 index 62b41ca5..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/faq.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - Frequently Asked Questions - - - Documentation - - - How can I help write documentation? - - -

- This project uses Apache Forrest to - generate documentation from XML. Please download a copy of Forrest, - which can be used to validate, develop and render a project site. -

- - - - - - diff --git a/crawler/kiss/docs/content/xdocs/samples/hello-v10.dtd b/crawler/kiss/docs/content/xdocs/samples/hello-v10.dtd deleted file mode 100644 index ee62644d..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/hello-v10.dtd +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - diff --git a/crawler/kiss/docs/content/xdocs/samples/helloAgain.pdf b/crawler/kiss/docs/content/xdocs/samples/helloAgain.pdf deleted file mode 100644 index 5ca4f313..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/helloAgain.pdf +++ /dev/null @@ -1,70 +0,0 @@ -%PDF-1.3 -%ª«¬­ -4 0 obj -<< /Type /Info -/Producer (FOP 0.20.4) >> -endobj -5 0 obj -<< /Length 203 /Filter [ /ASCII85Decode /FlateDecode ] - >> -stream -Gar'!]afWZ&;9q-MRA)RFnblL2&]tQSZsjOOT[ck2SQkp(bfQ[R7ZPq=U24c0dqq_i?B[A.0s\)5f5 -endstream -endobj -6 0 obj -<< /Type /Page -/Parent 1 0 R -/MediaBox [ 0 0 595 842 ] -/Resources 3 0 R -/Contents 5 0 R ->> -endobj -7 0 obj -<< /Type /Font -/Subtype /Type1 -/Name /F1 -/BaseFont /Helvetica -/Encoding /WinAnsiEncoding >> -endobj -8 0 obj -<< /Type /Font -/Subtype /Type1 -/Name /F5 -/BaseFont /Times-Roman -/Encoding /WinAnsiEncoding >> -endobj -1 0 obj -<< /Type /Pages -/Count 1 -/Kids [6 0 R ] >> -endobj -2 0 obj -<< /Type /Catalog -/Pages 1 0 R - >> -endobj -3 0 obj -<< -/Font << /F1 7 0 R /F5 8 0 R >> -/ProcSet [ /PDF /ImageC /Text ] >> -endobj -xref -0 9 -0000000000 65535 f -0000000687 00000 n -0000000745 00000 n -0000000795 00000 n -0000000015 00000 n -0000000071 00000 n -0000000365 00000 n -0000000471 00000 n -0000000578 00000 n -trailer -<< -/Size 9 -/Root 2 0 R -/Info 4 0 R ->> -startxref -883 -%%EOF diff --git a/crawler/kiss/docs/content/xdocs/samples/index.xml b/crawler/kiss/docs/content/xdocs/samples/index.xml deleted file mode 100644 index 41f7ac5c..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/index.xml +++ /dev/null @@ -1,29 +0,0 @@ - - - - -
- Samples -
- -
- If something goes wrong.. -

Patches are welcome: Forrest FAQ

-
- -
diff --git a/crawler/kiss/docs/content/xdocs/samples/linking.xml b/crawler/kiss/docs/content/xdocs/samples/linking.xml deleted file mode 100644 index 31b82af9..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/linking.xml +++ /dev/null @@ -1,353 +0,0 @@ - - - - -
- Demonstration of linking -
- - -
- Overview -

Forrest has many powerful techniques for linking between documents - and for managing the site navigation. This document demonstrates those - techniques. - The document "Menus and Linking" - has the full details. -

-
- -
- Building and maintaining consistent URI space -

- When Forrest builds your site, it starts from the front page. Like - a robot, it traverses all of the links that it finds in the documents - and builds the corresponding pages. Any new links are further traversed. -

-

- Sometimes those links lead to documents that are generated directly - from xml source files, sometimes they are generated from other source - via an intermediate xml format. Other times the links lead to raw - un-processed content. -

-

- The site navigation configuration file "site.xml" provides - a way to manage this URI space. In the future, when documents are - re-arranged and renamed, the site.xml configuration will enable this - smoothly. -

-
- -
- Mapping the local resource space to the final URI space -

- For both generated and raw (un-processed) files, the top-level of the - URI space corresponds to the "content/xdocs/" directory, - i.e. the location of the "site.xml" configuration file. -

- - In versions prior to 0.7 raw un-processed content was stored in - the "content/" directory. In 0.7 onwards, raw - un-processed data is stored alongside the xdocs. In addition, - in 0.6 and earlier, HTML documents could be stored in the xdocs - directory and served without processing. If you - you wish to emulate the behaviour of 0.6 and earlier see the - next section. - -

- A diagram will help. -

- The final URI space ------------------- ------------------- -Generated content ... - content/xdocs/index.xml index.html - content/xdocs/samples/index.xml samples/index.html - content/xdocs/samples/faq.xml samples/faq.html - content/xdocs/test1.html test1.html - content/xdocs/samples/test3.html samples/test3.html - content/xdocs/samples/subdir/test4.html samples/subdir/test4.html - -Raw un-processed content ... - content/xdocs/hello.pdf hello.pdf - content/xdocs/hello.sxw hello.sxw - content/xdocs/subdir/hello.sxw subdir/hello.sxw -]]> - -
- How Plugins May Affect The URI Space -

By using Forrest Input Plugins - you can process some file formats, such as - OpenOffice.org documents and produce processed content from them. For example, - the file content/xdocs/hello.sxw can be used to produce a - skinned version of the document at with the name hello.html. - Similarly, you can use Forrest Output - Plugins to create different output formats such as PDF, in this - case content/xdocs/hello.sxw can produce - hello.pdf.

- -

However, this does not affect the handling of raw content. That is, you - can still retrieve the raw un-processed version with, for example, - hello.sxw. If you want to prevent the user retrieving the - un-processed version you will have to create matchers that intercept - these requests within your project sitemap.

-
- -
- -
- Basic link to internal generated pages -

- When this type of link is encountered, Forrest will look for a - corresponding xml file, relative to this document (i.e. in - content/xdocs/samples/). -

-

A generated document in the current directory, which corresponds to - content/xdocs/samples/sample.html ... -

- ]]>sample.html]]> -

In a sub-directory, which corresponds to - content/xdocs/samples/subdir/index.html ... -

- ]]>subdir/index.html]]> -
- -
- Basic link to raw un-processed content -

- Raw content files are not intended for any processing, they are just - linked to (e.g. pre-prepared PDFs, zip archives). - These files are placed alongside your normal content in the - "content/xdocs" directory. -

-

A raw document in the current directory, which corresponds to - content/xdocs/samples/helloAgain.pdf ... -

- ]]>helloAgain.pdf]]> -

A raw document in a sub-directory, which corresponds to - content/xdocs/samples/subdir/hello.zip ... -

- ]]>subdir/hello.zip]]> -

A raw document at the next level up, which corresponds to - content/hello.pdf ... -

- ]]>../hello.pdf]]> - -
- Serving (X)HTML content without Skinning - -

Prior to version 0.7, the raw un-processed content was stored in - the "content/" directory. In 0.7 onwards, raw - un-processed data is stored alongside the xdocs. In addition - in 0.6 and earlier, HTML files could be stored in the xdocs - directory and they would be served without further processing. - As described above, this is not the case in 0.7 where HTML files - are, by default, skinned by Forrest.

- -

If you - you wish to emulate the behaviour of 0.6 and earlier then you - must add the following to your project sitemap.

- - -<map:match pattern="**.html"> - <map:select type="exists"> - <map:when test="{project:content}{0}"> - <map:read src="{project:content}/{0}" mime-type="text/html"/> - <!-- - Use this instead if you want JTidy to clean up your HTML - <map:generate type="html" src="{project:content}/{0}" /> - <map:serialize type="html"/> - --> - </map:when> - <map:when test="{project:content.xdocs}{0}"> - <map:read src="{project:content.xdocs}/{0}" mime-type="text/html"/> - <!-- - Use this instead if you want JTidy to clean up your HTML - <map:generate type="html" src="{project:content.xdocs}/{0}" /> - <map:serialize type="html"/> - --> - </map:when> - </map:select> -</map:match> - - -

The above allows us to create links to un-processed skinned files stored - in the {project:content} or {project:content.xdocs} - directory. For example: - <a href="/test1.html">HTML content</a>. However, it will - break the 0.7 behaviour of skinning HTML content. For this reason the old - ".ehtml" extension can be used to embed HTML content in a Forrest skinned - site

- -

Note that you can change the matchers above to selectively serve some - content as raw un-processed content, whilst still serving other content - as skinned documents. For example, the following snippet would allow - you to serve the content of an old, deprecated site without processing - from Forrest, whilst still allowing all other content to be processed - by Forrest in the normal way:

- - -<map:match pattern="old_site/**.html"> - <map:select type="exists"> - <map:when test="{project:content}{1}.html"> - <map:read src="{project:content}/{1}.html" mime-type="text/html"/> - <!-- - Use this instead if you want JTidy to clean up your HTML - <map:generate type="html" src="{project:content}/{0}" /> - <map:serialize type="html"/> - --> - </map:when> -</map:match> - - -

For example, HTML content.

-
-
- -
- Full URL to external documents -

A full URL ...

- ]]>http://forrest.apache.org/]]> -

A full URL with a fragment identifier ...

- ]]>http://forrest.apache.org/faq.html#link_raw]]> -

- Note that Forrest does not traverse external links to look for - other links. -

-
- -
- Using site.xml to manage the links -

As you will have discovered, using pathnames with ../../ etc. will - get very nasty. Real problems occur when you use a smart text editor - that tries to manage the links for you. For example, it will have - trouble linking to the raw content files which are not yet in their - final location. -

-

- Links and filenames are bound to change and re-arrange. It is - essential to only change those links in one central place, not in every - document. -

-

- The "site.xml" configuration file to the rescue. It maps - symbolic names to actual resources. -

- -
- Basic link to internal generated pages -

This single entry ...

- ]]> -

- enables a simple link to a generated document, which corresponds to - content/xdocs/index.xml ... -

- ]]>site:index]]> -
- -
- Group some items -

This compound entry ...

- - - ... - -]]> -

- enables a link to a generated document, which corresponds to - content/xdocs/samples/index.xml ... -

- ]]>site:samples]]> -

- and a link to a generated document, which corresponds to - content/xdocs/samples/faq.xml ... -

- -]]>site:faq]]> -which can also be a complete reference -]]>site:samples/faq]]> - -
- -
- Fragment identifiers -

This compound entry ...

- - - -
- - ... - -]]> -

- enables a link to a fragment identifier within the - samples/sample.html document ... -

- ]]>site:samples/sample/section]]> -
- -
- Define items for raw content -

This entry ...

- ]]> -

- enables a link to a raw document, which corresponds to - content/hello.pdf ... -

- ]]>site:hello_print]]> - -
- -
- External links -

This compound entry ...

- - - - - - -]]> -

- enables a link to an external URL ... -

- ]]>ext:forrest]]> -

- and a link to another external URL ... -

- -]]>ext:linking]]> -which can also be a complete reference -]]>ext:forrest/linking]]> - -

- and a link to another external URL with a fragment identifier ... -

- -]]>ext:webapp]]> -which can also be a complete reference -]]>ext:forrest/webapp]]> - -
-
- - diff --git a/crawler/kiss/docs/content/xdocs/samples/sample.xml b/crawler/kiss/docs/content/xdocs/samples/sample.xml deleted file mode 100644 index 52774f7d..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/sample.xml +++ /dev/null @@ -1,407 +0,0 @@ - - - - -
- The Apache Forrest xdocs document-v2.0 DTD - The content of this document doesn't make any sense at all. - This is a demonstration document using all possible elements in - the current Apache Forrest xdocs document-v20.dtd - -
- - - This is a demonstration document using all possible elements in the - current Apache Forrest xdocs document-v20.dtd - (See the DTD changes section at the bottom.) - -
- Sample Content -

Hint: See the xml source to see how the various - elements are used and see the - - DTD reference documentation. -

-
- Block and inline elements -

This is a simple paragraph. Most documents contain a fair amount of - paragraphs. Paragraphs are called <p>.

-

With the <p xml:space="preserve"> attribute, you can declare - that whitespace should be preserved, without implying it is in any other - way special.

-

- This next paragraph has a class attribute of 'quote'. CSS can - be used to present this <p class='quote'> in - a different style than the other paragraphs. The handling of - this quoted paragraph is defined in the <extra-css> - element in the skinconf.xml. -

-

- Anyway, like I was sayin', shrimp is the fruit of the sea. You can - barbecue it, boil it, broil it, bake it, sautee it. Dey's uh, - shrimp-kabobs, shrimp creole, shrimp gumbo. Pan fried, deep fried, - stir-fried. There's pineapple shrimp, lemon shrimp, coconut shrimp, - pepper shrimp, shrimp soup, shrimp stew, shrimp salad, shrimp and - potatoes, shrimp burger, shrimp sandwich. That- that's about it. -

-

A number of in-line elements are available in the DTD, we will show them - inside an unordered list (<ul>):

-
    -
  • Here is a simple list item (<li>).
  • -
  • Have you seen the use of the <code> element in the - previous item?
  • -
  • Also, we have <sub> and <sup> - elements to show content above or below the text - baseline.
  • -
  • There is a facility to emphasize certain words using the - <em> <strong> - elements.
  • -
  • We can use - - <icon>s too.
  • -
  • Another possibility is the <img> element: - another feather, - which offers the ability to refer to an image map.
  • -
  • We have elements for hyperlinking: -
    -
    <a href="faq.html">
    -
    Use this to - link - to another document. As per normal, this will open the new document - in the same browser window.
    - -
    <a href="#section">
    -
    Use this to - link - to the named anchor in the current document. -
    - -
    <a href="faq.html#forrest">
    -
    Use this to - link - to another document and go to the named anchor. This will open - the new document in the same browser window. -
    -
    Targetted window control with jump and fork.
    -
    See demonstration - using class attribute on links. -
    -
  • - -
  • Oh, by the way, a definition list <dl> was used inside - the previous list item. We could put another -
      -
    • unordered list
    • -
    • inside the list item
    • -
    - - - - -
    A sample nested table
    Or even tables.. -
    inside tables..
    -
    or inside lists, but I believe this liberty gets quickly quite - hairy as you see.
    -
  • -
-

So far for the in-line elements, let's look at some paragraph-level - elements.

- The <fixme> element is used for stuff - which still needs work. Mind the author attribute! - Use the <note> element to draw attention to something, e.g. ...The <code> element is used when the author can't - express himself clearly using normal sentences ;-) - Sleep deprivation can be the result of being involved in an open - source project. (a.k.a. the <warning> element). - - If you want your own labels for notes and - warnings, specify them using the label attribute. - -

Apart from unordered lists, we have ordered lists too, of course.

-
    -
  1. Item 1
  2. -
  3. Item 2
  4. -
  5. This should be 3 if my math is still OK.
  6. -
-
- -
- Various presentation formats - -

This sample document, written in document-v20 XML can be presented - via Forrest in a number of different formats. The links in the - following list show this document in each of the currently available - formats.

- -

Each of the formats can be made available as a link near the top of - the page. Actual placement of those links depends on the skin - currently in use. Those links are enabled in the skinconf.xml via the - <disable-XXX-link> elements in the skinconf.xml

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Presentation FormatDescriptionskinconf.xml Element
HTMLThis document in HTML format. Always generated by default. Cannot be turned off.
XMLThis document in its raw XML format.<disable-xml-link>. By default, set to true, meaning - that this link will not be shown.
PDFThis document as Adobe PDF<disable-pdf-link>. By default, set to false, meaning - that this link will be shown.
Text

This document as straight text.

-

For additional information see the Forrest text-output - plugin.

<disable-txt-link>. By default, set to true, meaning - that this link will not be shown.
POD

This document as Perl POD (Plain Old Documentation). Text - with minimal formatting directives. If on a *nix system with perl - installed, see "man perlpod".

-

For additional information see the Forrest pod-output - plugin.

<disable-pod-link>. By default, set to true, meaning - that this link will not be shown.
-
-
- Using sections -

You can use sections to put some structure in your document. For some - strange historical reason, the section title is an attribute of the - <section> element.

-
-
- Sections, the sequel -

Just some second section.

-
- Section 2.1 -

Which contains a subsection (2.1).

-
-
- -
- Showing preformatted source code -

Enough about these sections. Let's have a look at more interesting - elements, <source> for instance:

- -// This example is from the book _Java in a Nutshell_ by David Flanagan. -// Written by David Flanagan. Copyright (c) 1996 O'Reilly & Associates. -// You may study, use, modify, and distribute this example for any purpose. -// This example is provided WITHOUT WARRANTY either expressed or implied. - -import java.applet.*; // Don't forget these import statements! -import java.awt.*; - -public class FirstApplet extends Applet { - // This method displays the applet. - // The Graphics class is how you do all drawing in Java. - public void paint(Graphics g) { - g.drawString("Hello World", 25, 50); - } -} -

CDATA sections are used within - <source> elements so that you can write pointy - brackets without needing to escape them with messy - &lt; entities ... -

- - easy - -]]> -

Please take care to still use a sensible line-length within your - source elements.

-
- -
- Using tables -

And now for a table:

- - - - - - - - - - - - - - - - -
Table caption
heading cell 1heading cell 2heading cell 3
data cellthis data cell spans two columns
- Tables can be nested: - - - - - - - - - - -
column 1column 2
cell Acell B
-
-
  • and can include most other elements
  • such as lists
-
-
- - -
- Using figures -

And a <figure> to end all of this. - Note that this can also be implemented with an - <img> element. -

-
-
- -
- -
- DTD changes -

See the generated - - DTD reference documentation. -

-
- Changes between document-v13 and document-v20 - -
-
- Changes between document-v12 and document-v13 -

- All v1.2 docs will work fine as v1.3 DTD. The main change is the - addition of a @class attribute to every element, which enables the - "extra-css" section in the skinconf to be put to good use. -

-
-
- Changes between document-v11 and document-v12 -

- doc-v12 enhances doc-v11 by relaxing various restrictions that were - found to be unnecessary. -

-
    -
  • - Links ((link|jump|fork) and inline elements (br|img|icon|acronym) are - allowed inside title. -
  • -
  • - Paragraphs (p|source|note|warning|fixme), table and figure|anchor are - allowed inside li. -
  • -
  • - Paragraphs (p|source|note|warning|fixme), lists (ol|ul|dl), table, - figure|anchor are allowed inside definition lists (dd) and tables (td - and dh). -
  • -
  • - Inline content - (strong|em|code|sub|sup|br|img|icon|acronym|link|jump|fork) is - allowed in strong and em. -
  • -
-
-
- -
- This is a legal notice, so it is important. -
-
diff --git a/crawler/kiss/docs/content/xdocs/samples/static.xml b/crawler/kiss/docs/content/xdocs/samples/static.xml deleted file mode 100644 index 9553525c..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/static.xml +++ /dev/null @@ -1,62 +0,0 @@ - - - - -
- Static content - including raw un-processed files and documents -
- -
- Linking to static content -

- You can place some types of raw content into the xdocs directory. For example, - you can place a PDF file in src/documentation/content/xdocs and link - to it normally, - <a href="../hello.pdf">hello.pdf</a> - However, note that if the file is one that Forrest is able to process, for example - an HTML file, these files will be processed accordingly.

- -

- It is also worth noting that files in the xdocs directory will only be copied - into your final site if there is a link to them somewhere in the site. See the next - section for details of how to include content that is not linked.

- -

- For more information see the - Linking demonstration.

-
- -
- Including Static Content that is Not Linked - -

- You can include raw HTML, PDFs, plain-text, and other files. In your final site by - placing them in the src/documentation/content directory. Files in this - directory will be copied over automatically but will not be processed in any way by - Forrest, that is they will be linked to as raw files.

- -

- You can also have sub-directories such as - src/documentation/content/samples/subdir/ which - reflects your main - xdocs/ tree. The raw files will then end up - beside your documents. -

-
- -
diff --git a/crawler/kiss/docs/content/xdocs/samples/subdir/book-sample.xml b/crawler/kiss/docs/content/xdocs/samples/subdir/book-sample.xml deleted file mode 100644 index f5396c66..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/subdir/book-sample.xml +++ /dev/null @@ -1,47 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/docs/content/xdocs/samples/subdir/hello.zip b/crawler/kiss/docs/content/xdocs/samples/subdir/hello.zip deleted file mode 100644 index b4fb6aae..00000000 Binary files a/crawler/kiss/docs/content/xdocs/samples/subdir/hello.zip and /dev/null differ diff --git a/crawler/kiss/docs/content/xdocs/samples/subdir/index.xml b/crawler/kiss/docs/content/xdocs/samples/subdir/index.xml deleted file mode 100644 index d060da5b..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/subdir/index.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - -
- Page generated from a sub-directory - - - - -
- - -
- A sub-directory -

This was generated from a sub-directory.

-

When creating new subdirectories, remember that these must - be declared in site.xml or each directory must have a book.xml file. -

-
- -
- diff --git a/crawler/kiss/docs/content/xdocs/samples/usemap.xml b/crawler/kiss/docs/content/xdocs/samples/usemap.xml deleted file mode 100644 index 50bf2afa..00000000 --- a/crawler/kiss/docs/content/xdocs/samples/usemap.xml +++ /dev/null @@ -1,61 +0,0 @@ - - - - -
- Interactive client-side imagemaps - the usemap attribute -
- -
- Imagemap demo -

- usemap demo -

-

- - Rectangle - Circle - Default - -

-
-
- Source code - - usemap demo -

-

- - Rectangle - Circle - Default - -

-]]> -
- -
diff --git a/crawler/kiss/docs/content/xdocs/site.xml b/crawler/kiss/docs/content/xdocs/site.xml deleted file mode 100644 index 4c286509..00000000 --- a/crawler/kiss/docs/content/xdocs/site.xml +++ /dev/null @@ -1,109 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/docs/content/xdocs/tabs.xml b/crawler/kiss/docs/content/xdocs/tabs.xml deleted file mode 100644 index e56b9c56..00000000 --- a/crawler/kiss/docs/content/xdocs/tabs.xml +++ /dev/null @@ -1,53 +0,0 @@ - - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/docs/resources/schema/catalog.xcat b/crawler/kiss/docs/resources/schema/catalog.xcat deleted file mode 100644 index 91da4103..00000000 --- a/crawler/kiss/docs/resources/schema/catalog.xcat +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - - - - - diff --git a/crawler/kiss/docs/resources/schema/hello-v10.dtd b/crawler/kiss/docs/resources/schema/hello-v10.dtd deleted file mode 100644 index 4fab270f..00000000 --- a/crawler/kiss/docs/resources/schema/hello-v10.dtd +++ /dev/null @@ -1,51 +0,0 @@ - - - - - - - - - - - - diff --git a/crawler/kiss/docs/resources/stylesheets/hello2document.xsl b/crawler/kiss/docs/resources/stylesheets/hello2document.xsl deleted file mode 100644 index 1a8e453b..00000000 --- a/crawler/kiss/docs/resources/stylesheets/hello2document.xsl +++ /dev/null @@ -1,39 +0,0 @@ - - - - - - - - - -
- - <xsl:value-of select="greeting"/> - -
- - - -
-
- -
diff --git a/crawler/kiss/docs/sitemap.xmap b/crawler/kiss/docs/sitemap.xmap deleted file mode 100644 index 76557f9b..00000000 --- a/crawler/kiss/docs/sitemap.xmap +++ /dev/null @@ -1,77 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/docs/skinconf.xml b/crawler/kiss/docs/skinconf.xml deleted file mode 100644 index a3127e6d..00000000 --- a/crawler/kiss/docs/skinconf.xml +++ /dev/null @@ -1,441 +0,0 @@ - - - - - - - - - - - - true - - false - - true - - true - - - true - - - true - - - false - - - true - .at. - - - true - - - KiSS Crawler - Automatic recording for KiSS harddisk recorders - http://kiss.wamblee.org - images/project.png - - - - wamblee.org - - http://wamblee.org - images/group.png - - - - - - - - - - - 2006 - wamblee.org - - - - - - - - - - - - - - - - - - Send feedback about the website to: - - - - - - - - - p.quote { - margin-left: 2em; - padding: .5em; - background-color: #f0f0f0; - font-family: monospace; - } - - #footer a { color: #0F3660; } - #footer a:visited { color: #009999; } - - - - - - - - - - - - - - - - - - - - - - - - Page 1 - - - - 1in - 1in - 1.25in - 1in - - - - false - - - false - - - - - - Built with Apache Forrest - http://forrest.apache.org/ - images/built-with-forrest-button.png - 88 - 31 - - - - - - diff --git a/crawler/kiss/docs/translations/langcode.xml b/crawler/kiss/docs/translations/langcode.xml deleted file mode 100644 index ed09f417..00000000 --- a/crawler/kiss/docs/translations/langcode.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - - - English - Espanol - Italiano - diff --git a/crawler/kiss/docs/translations/languages_en.xml b/crawler/kiss/docs/translations/languages_en.xml deleted file mode 100644 index 1037e0e4..00000000 --- a/crawler/kiss/docs/translations/languages_en.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - English - Spanish - Dutch - diff --git a/crawler/kiss/docs/translations/languages_es.xml b/crawler/kiss/docs/translations/languages_es.xml deleted file mode 100644 index a5d17dd0..00000000 --- a/crawler/kiss/docs/translations/languages_es.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - Inglés - Español - Holandés - diff --git a/crawler/kiss/docs/translations/menu.xml b/crawler/kiss/docs/translations/menu.xml deleted file mode 100644 index 003da620..00000000 --- a/crawler/kiss/docs/translations/menu.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - - About - Index - Changes - Todo - Samples - Apache document - Static content - Linking - Wiki page - Ihtml page - Ehtml page - FAQ - Simplifed Docbook - XSP page - diff --git a/crawler/kiss/docs/translations/menu_af.xml b/crawler/kiss/docs/translations/menu_af.xml deleted file mode 100644 index 108faa75..00000000 --- a/crawler/kiss/docs/translations/menu_af.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - - Aangaande - Inhoud - Veranderinge - Om te doen - Voorbeelde - Apache dokument - Statise Inhoud - Linking - Wiki bladsy - Ihtml bladsy - Ehtml bladsy - FAQ - Vereenvoudigde Docbook - XSP bladsy - diff --git a/crawler/kiss/docs/translations/menu_de.xml b/crawler/kiss/docs/translations/menu_de.xml deleted file mode 100644 index 3cb39f49..00000000 --- a/crawler/kiss/docs/translations/menu_de.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - - Über - Index - Änderungen - Todo - Beispiele - Apache Dokumentationsseite - Statischer Inhalt - Linking - Wiki Seite - ihtml Seite - ehtml Seite - FAQ - Vereinfachte Docbook - XSP Seite - diff --git a/crawler/kiss/docs/translations/menu_es.xml b/crawler/kiss/docs/translations/menu_es.xml deleted file mode 100644 index 690808dd..00000000 --- a/crawler/kiss/docs/translations/menu_es.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - - Acerca de - Indice - Cambios - Tareas pendientes - Ejemplos - Documento Apache - Contenido Estático - Linking - Página Wiki - Página ihtml - Página ehtml - Preguntas Frecuentes - Página Simplifed Docbook - Página XSP - diff --git a/crawler/kiss/docs/translations/menu_it.xml b/crawler/kiss/docs/translations/menu_it.xml deleted file mode 100644 index fce8cccb..00000000 --- a/crawler/kiss/docs/translations/menu_it.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - - Riguardo a - Indice - Cambiamenti - Cose da fare - Esempi - Apache document - Contenuto Statico - Linking - Pagina Wiki - Pagina ihtml - Pagina ehtml - Domande frequenti - Simplifed Docbook - Pagina XSP - diff --git a/crawler/kiss/docs/translations/menu_no.xml b/crawler/kiss/docs/translations/menu_no.xml deleted file mode 100644 index 7b094ed2..00000000 --- a/crawler/kiss/docs/translations/menu_no.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - - Om - Indeks - Endringer - Oppgave liste - Eksempler - Apache Dokument - Statisk innhold - Linking - Wiki side - ihtml side - ehtml side - FAQ - Simplifed Docbook - XSP side - diff --git a/crawler/kiss/docs/translations/menu_ru.xml b/crawler/kiss/docs/translations/menu_ru.xml deleted file mode 100644 index 7454faca..00000000 --- a/crawler/kiss/docs/translations/menu_ru.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - - О проекте - Содержание - Изменения - План - Примеры - Страница документа Apache - Статическое содержание - Linking - Страница Wiki - Страница ihtml - Страница ehtml - Вопросы/Ответы - Docbook страница - XSP страница - diff --git a/crawler/kiss/docs/translations/menu_sk.xml b/crawler/kiss/docs/translations/menu_sk.xml deleted file mode 100644 index 801c5e27..00000000 --- a/crawler/kiss/docs/translations/menu_sk.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - - O programe - Zoznám - Zmeny - Úlohy - Príklady - Apache Document - Statický Obsah - Linking - Wiki stránka - ihtml stránka - ehtml stránka - Casté Otázky - Simplifed Docbook stránka - XSP stránka - diff --git a/crawler/kiss/docs/translations/tabs.xml b/crawler/kiss/docs/translations/tabs.xml deleted file mode 100644 index 1cfbca61..00000000 --- a/crawler/kiss/docs/translations/tabs.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - Home - Samples - Apache XML Projects - diff --git a/crawler/kiss/docs/translations/tabs_es.xml b/crawler/kiss/docs/translations/tabs_es.xml deleted file mode 100644 index adf5a65a..00000000 --- a/crawler/kiss/docs/translations/tabs_es.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - Inicio - Ejemplos - Projectos XML Apache - diff --git a/crawler/kiss/forrest.properties b/crawler/kiss/forrest.properties deleted file mode 100644 index ab7e802a..00000000 --- a/crawler/kiss/forrest.properties +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright 2002-2005 The Apache Software Foundation or its licensors, -# as applicable. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -############## -# Properties used by forrest.build.xml for building the website -# These are the defaults, un-comment them only if you need to change them. -############## - -# Prints out a summary of Forrest settings for this project -#forrest.echo=true - -# Project name (used to name .war file) -#project.name=my-project - -# Specifies name of Forrest skin to use -# See list at http://forrest.apache.org/docs/skins.html -#project.skin=pelt - -# Descriptors for plugins and skins -# comma separated list, file:// is supported -#forrest.skins.descriptors=http://forrest.apache.org/skins/skins.xml,file:///c:/myskins/skins.xml -#forrest.plugins.descriptors=http://forrest.apache.org/plugins/plugins.xml,http://forrest.apache.org/plugins/whiteboard-plugins.xml - -############## -# behavioural properties -#project.menu-scheme=tab_attributes -#project.menu-scheme=directories - -############## -# layout properties - -# Properties that can be set to override the default locations -# -# Parent properties must be set. This usually means uncommenting -# project.content-dir if any other property using it is uncommented - -project.build-dir=${project.home}/target/forrest - -#project.status=status.xml -project.content-dir=docs -#project.raw-content-dir=${project.content-dir}/content -#project.conf-dir=${project.content-dir}/conf -#project.sitemap-dir=${project.content-dir} -#project.xdocs-dir=${project.content-dir}/content/xdocs -#project.resources-dir=${project.content-dir}/resources -#project.stylesheets-dir=${project.resources-dir}/stylesheets -#project.images-dir=${project.resources-dir}/images -#project.schema-dir=${project.resources-dir}/schema -#project.skins-dir=${project.content-dir}/skins -#project.skinconf=${project.content-dir}/skinconf.xml -#project.lib-dir=${project.content-dir}/lib -#project.classes-dir=${project.content-dir}/classes -#project.translations-dir=${project.content-dir}/translations - -############## -# validation properties - -# This set of properties determine if validation is performed -# Values are inherited unless overridden. -# e.g. if forrest.validate=false then all others are false unless set to true. -#forrest.validate=true -#forrest.validate.xdocs=${forrest.validate} -#forrest.validate.skinconf=${forrest.validate} -#forrest.validate.sitemap=${forrest.validate} -#forrest.validate.stylesheets=${forrest.validate} -#forrest.validate.skins=${forrest.validate} -#forrest.validate.skins.stylesheets=${forrest.validate.skins} - -# *.failonerror=(true|false) - stop when an XML file is invalid -#forrest.validate.failonerror=true - -# *.excludes=(pattern) - comma-separated list of path patterns to not validate -# e.g. -#forrest.validate.xdocs.excludes=samples/subdir/**, samples/faq.xml -#forrest.validate.xdocs.excludes= - - -############## -# General Forrest properties - -# The URL to start crawling from -#project.start-uri=linkmap.html - -# Set logging level for messages printed to the console -# (DEBUG, INFO, WARN, ERROR, FATAL_ERROR) -#project.debuglevel=ERROR - -# Max memory to allocate to Java -#forrest.maxmemory=64m - -# Any other arguments to pass to the JVM. For example, to run on an X-less -# server, set to -Djava.awt.headless=true -#forrest.jvmargs= - -# The bugtracking URL - the issue number will be appended -#project.bugtracking-url=http://issues.apache.org/bugzilla/show_bug.cgi?id= -#project.bugtracking-url=http://issues.apache.org/jira/browse/ - -# The issues list as rss -#project.issues-rss-url= - -#I18n Property. Based on the locale request for the browser. -#If you want to use it for static site then modify the JVM system.language -# and run once per language -#project.i18n=true - -# The names of plugins that are required to build the project -# comma separated list (no spaces) -# You can request a specific version by appending "-VERSION" to the end of -# the plugin name. If you exclude a version number the latest released version -# will be used, however, be aware that this may be a development version. In -# a production environment it is recomended that you specify a known working -# version. -# Run "forrest available-plugins" for a list of plug-ins currently available -project.required.plugins=org.apache.forrest.plugin.output.pdf - -# Proxy configuration -# proxy.host= -# proxy.port= diff --git a/crawler/kiss/pom.xml b/crawler/kiss/pom.xml deleted file mode 100644 index 9bfa02f8..00000000 --- a/crawler/kiss/pom.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - org.wamblee - wamblee-crawler - 0.2-SNAPSHOT - - - 4.0.0 - org.wamblee - wamblee-crawler-kiss - jar - /crawler/kiss - http://wamblee.org - - - - org.wamblee - wamblee-crawler-basic - - - org.wamblee - wamblee-support-spring - 0.2-SNAPSHOT - - - commons-email - commons-email - - - - - diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/AbstractVisitor.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/AbstractVisitor.java deleted file mode 100644 index eae55898..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/AbstractVisitor.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.guide; - -/** - * Abstract visitor of the tv guide with default looping behavior. - * - * @author Erik Brakkee - */ -public abstract class AbstractVisitor implements Visitor { - - /** - * Constructs the visitor. - * - */ - protected AbstractVisitor() { - // Empty - } - - /** - * Visits the channel by visiting all programs of the channel. - * - * @param aChannel - * Channel to visit. - */ - public void visitChannel(Channel aChannel) { - for (Program program : aChannel.getPrograms()) { - program.accept(this); - } - } - - /** - * Visits the TV guide by visiting all channels of the guide. - * - * @param aGuide - * TV guide to visit. - */ - public void visitTvGuide(TVGuide aGuide) { - for (Channel channel : aGuide.getChannels()) { - channel.accept(this); - } - } - -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/Channel.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/Channel.java deleted file mode 100644 index c3d8061d..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/Channel.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.guide; - -import java.util.Collections; -import java.util.List; - - -/** - * Represents the programme for a tv channel. - * - * @author Erik Brakkee - */ -public class Channel { - - /** - * TV channel name. - */ - private String _name; - - /** - * List of programs in chronological order. - */ - private List _programs; - - /** - * Constructs the channel. - * @param aName Channel name. - * @param aPrograms Programs. - */ - public Channel(String aName, List aPrograms) { - _name = aName; - _programs = aPrograms; - } - - /** - * Gets the channel name. - * @return channel name. - */ - public String getName() { - return _name; - } - - /** - * Gets the list of program. - * @return Programs. - */ - public List getPrograms() { - return Collections.unmodifiableList(_programs); - } - - /** - * Accepts a visitor. - * @param aVisitor Visitor. - */ - public void accept(Visitor aVisitor) { - aVisitor.visitChannel(this); - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/MatchVisitor.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/MatchVisitor.java deleted file mode 100644 index 53a84039..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/MatchVisitor.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.guide; - -import java.util.ArrayList; -import java.util.List; - -import org.wamblee.conditions.Condition; - -/** - * Visitor which determines the interesting programs in the TV guide. - * - * @author Erik Brakkee - */ -public class MatchVisitor extends AbstractVisitor { - - /** - * Criterion that determines which programs are interesting. - */ - private Condition _matcher; - - /** - * List of interesting programs. - */ - private List _programs; - - /** - * Constructs the visitor. - * @param aMatcher Condition describing interesting programs. - */ - public MatchVisitor(Condition aMatcher) { - _matcher = aMatcher; - _programs = new ArrayList(); - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.kiss.Visitor#visitProgram(org.wamblee.crawler.kiss.Program) - */ - public void visitProgram(Program aProgram) { - if (_matcher.matches(aProgram)) { - _programs.add(aProgram); - } - } - - /** - * Gets the list of interesting programs. To be called after applying - * the visitor on a tv guide. - * @return List of interesting programs. - */ - public List getMatches() { - return _programs; - } - -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/PrintVisitor.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/PrintVisitor.java deleted file mode 100644 index 5f054910..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/PrintVisitor.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.guide; - -import java.io.PrintStream; - - -/** - * Print visitor for pretty printing the TV guide. - * - * @author Erik Brakkee - */ -public class PrintVisitor extends AbstractVisitor { - - /** - * Stream to print the guide on. - */ - private PrintStream _stream; - - /** - * Constructs the print visitor. - * @param aStream Stream to print on. - */ - public PrintVisitor(PrintStream aStream) { - _stream = aStream; - } - - /* (non-Javadoc) - * @see org.wamblee.crawler.kiss.Visitor#visitProgram(org.wamblee.crawler.kiss.Program) - */ - public void visitProgram(Program aProgram) { - _stream.println(" " + aProgram.toString()); - } - - /* (non-Javadoc) - * @see org.wamblee.crawler.kiss.AbstractVisitor#visitChannel(org.wamblee.crawler.kiss.Channel) - */ - @Override - public void visitChannel(Channel aChannel) { - _stream.println(aChannel.getName()); - super.visitChannel(aChannel); - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/Program.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/Program.java deleted file mode 100644 index 0def7388..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/Program.java +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.guide; - -import java.util.Comparator; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.dom4j.DocumentFactory; -import org.dom4j.Element; -import org.wamblee.crawler.Action; -import org.wamblee.crawler.Page; -import org.wamblee.crawler.PageException; -import org.wamblee.crawler.kiss.main.SystemProperties; - -/** - * Represents a television program. - * - * @author Erik Brakkee - */ -public class Program { - - private static final String ELEM_PROGRAM = "program"; - - private static final String ELEM_NAME = "name"; - - private static final String ELEM_KEYWORDS = "keywords"; - - private static final String ELEM_DESCRIPTION = "description"; - - private static final String ELEM_CHANNEL = "channel"; - - private static final String ELEM_INTERVAL = "interval"; - - private static final String ELEM_END_TIME = "end"; - - private static final String ELEM_BEGIN_TIME = "begin"; - - /** - * Lexicographical comparison of programs based on (time, title, channel). - * - */ - public static class TimeComparator implements Comparator { - - /** - * Lexicographical comparison based on start time, program name, and - * channel. - * - * @param aProgram1 - * First program. - * @param aProgram2 - * Second program. - * @return See {@link Comparator#compare(T, T)} - */ - public int compare(Program aProgram1, Program aProgram2) { - int value = aProgram1.getInterval().getBegin().compareTo( - aProgram2.getInterval().getBegin()); - if (value != 0) { - return value; - } - value = aProgram1.getName().compareTo(aProgram2.getName()); - if (value != 0) { - return value; - } - return aProgram1.getChannel().compareTo(aProgram2.getChannel()); - } - } - - private static final Log LOG = LogFactory.getLog(Program.class); - - /** - * Name of the record action on the program details page. - */ - private static final String RECORD_ACTION = "record"; - - /** - * Result of recording a program. - * - */ - public enum RecordingResult { - /** - * Successfully recorded. - */ - OK, - - /** - * Already recorded program. - */ - DUPLICATE, - - /** - * Recording conflict with another program. - */ - CONFLICT, - - /** - * Program occurred in the past. - */ - OLDSHOW, - - /** - * Program could not be recorded for technical reasons. - */ - ERROR; - }; - - /** - * Indent string to use for pretty printing. - */ - private static final String INDENT = " "; - - /** - * Channel the program is on. - */ - private String _channel; - - /** - * Program name. - */ - private String _name; - - /** - * Program description. - */ - private String _description; - - /** - * Keywords or classification of the program. - */ - private String _keywords; - - /** - * Time interval for the program (from/to). - */ - private TimeInterval _interval; - - /** - * Action to execute to obtain program information and/or record the - * program. - */ - private Action _programInfo; - - /** - * Constructs the program. - * - * @param aChannel - * Channel name. - * @param aName - * Program name. - * @param aDescription - * Description. - * @param aKeywords - * Keywords/classification. - * @param aInterval - * Time interval. - * @param aProgramInfo - * Action to execute for detailed program information or for - * recording the page. - */ - public Program(String aChannel, String aName, String aDescription, - String aKeywords, TimeInterval aInterval, Action aProgramInfo) { - _channel = aChannel; - _name = aName; - _description = aDescription; - _keywords = aKeywords; - _interval = aInterval; - _programInfo = aProgramInfo; - } - - /** - * Gets the channel. - * - * @return Channel. - */ - public String getChannel() { - return _channel; - } - - /** - * Gets the program name. - * - * @return Name. - */ - public String getName() { - return _name; - } - - /** - * Gets the description. - * - * @return Description. - */ - public String getDescription() { - return _description; - } - - /** - * Gets the keywords/classification. - * - * @return Keywords/classification - */ - public String getKeywords() { - return _keywords; - } - - /** - * Gets the time interval. - * - * @return Time interval. - */ - public TimeInterval getInterval() { - return _interval; - } - - /** - * Checks if recording is possible. - * - * @return True iff recording is possible. - */ - public boolean isRecordingPossible() { - try { - return _programInfo.execute().getAction(RECORD_ACTION) != null; - } catch (PageException e) { - return false; - } - } - - /** - * Records the show. - * - * @return Status describing the result of recording. - */ - public RecordingResult record() { - LOG.info("Recording " + this); - if (SystemProperties.isRecordDisabled()) { - return RecordingResult.OK; - } - try { - Action record = _programInfo.execute().getAction(RECORD_ACTION); - if (record == null) { - LOG.info(" result: " + RecordingResult.OLDSHOW); - return RecordingResult.OLDSHOW; - } - Page result = record.execute(); - RecordingResult recordingResult = RecordingResult.valueOf(result - .getContent().getText()); - LOG.info(" result: " + recordingResult); - return recordingResult; - } catch (PageException e) { - LOG.warn("Technical problem recording program: '" + this + "'", e); - LOG.info(" result: " + RecordingResult.ERROR); - return RecordingResult.ERROR; - } - } - - /** - * Accepts the visitor. - * - * @param aVisitor - * Visitor. - */ - public void accept(Visitor aVisitor) { - aVisitor.visitProgram(this); - } - - /* - * (non-Javadoc) - * - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - return _interval + " - " + _name + " (" + _channel + "/" + _keywords - + ")" + "\n" - + (INDENT + _description).replaceAll("\n", "\n" + INDENT); - } - - /* - * (non-Javadoc) - * - * @see java.lang.Object#equals(java.lang.Object) - */ - @Override - public boolean equals(Object aObject) { - if (!(aObject instanceof Program)) { - return false; - } - Program program = (Program) aObject; - return getName().equals(program.getName()) - && _programInfo.equals(program._programInfo); - } - - /* - * (non-Javadoc) - * - * @see java.lang.Object#hashCode() - */ - @Override - public int hashCode() { - return getName().hashCode(); - } - - /** - * Converts program information to XML. - * - * @return XML representation of program information. - */ - public Element asXml() { - DocumentFactory factory = DocumentFactory.getInstance(); - Element program = factory.createElement(ELEM_PROGRAM); - program.addElement(ELEM_NAME).setText(getName()); - program.addElement(ELEM_DESCRIPTION).setText(getDescription()); - program.addElement(ELEM_KEYWORDS).setText(getKeywords()); - program.addElement(ELEM_CHANNEL).setText(getChannel()); - Element interval = program.addElement(ELEM_INTERVAL); - interval.addElement(ELEM_BEGIN_TIME).setText( - getInterval().getBegin().toString()); - interval.addElement(ELEM_END_TIME).setText( - getInterval().getEnd().toString()); - return program; - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/TVGuide.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/TVGuide.java deleted file mode 100644 index dcb79d8e..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/TVGuide.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.guide; - -import java.util.Collections; -import java.util.List; - - -/** - * The TV guide. - * - * @author Erik Brakkee - */ -public class TVGuide { - - /** - * List of channels. - */ - private List _channels; - - /** - * Constructs the guide. - * @param aChannels Channels of the guide. - */ - public TVGuide(List aChannels) { - _channels = aChannels; - } - - /** - * Gets the channels. - * @return Channels. - */ - public List getChannels() { - return Collections.unmodifiableList(_channels); - } - - /** - * Accepts the visitor. - * @param aVisitor Visitor. - */ - public void accept(Visitor aVisitor) { - aVisitor.visitTvGuide(this); - } - -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/Time.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/Time.java deleted file mode 100644 index df8f666b..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/Time.java +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.guide; - -import java.text.DecimalFormat; -import java.text.NumberFormat; - -/** - * TIme at which a program starts or ends. - * - * @author Erik Brakkee - */ -public class Time implements Comparable { - - /** - * - */ - private static final int HOURS_PER_DAY = 24; - - /** - * - */ - private static final int EARLY_HOUR = 3; - - /** - * Number of seconds per minute. - */ - private static final double SECONDS_PER_MINUTE = 60.0; - - /** - * Hour of the time. - */ - private int _hour; - - /** - * Minute of the hour. - */ - private int _minute; - - /** - * Constructs the time. - * - * @param aHour - * Hour. - * @param aMinute - * Minute. - */ - public Time(int aHour, int aMinute) { - _hour = aHour; - _minute = aMinute; - } - - /** - * Gets the hour. - * - * @return Hour. - */ - public int getHour() { - return _hour; - } - - /** - * Gets te minute. - * - * @return Minute. - */ - public int getMinute() { - return _minute; - } - - /* - * (non-Javadoc) - * - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - NumberFormat format = new DecimalFormat("00"); - return format.format(_hour) + ":" + format.format(_minute); - } - - /** - * Convert time to floating point value. Useful for comparing two times. - * - * @return Converted value. - */ - float asFloat() { - int hour = _hour; - // Hack to make sure that programs appearing shortly after midnight are - // sorted - // after those running during the day. - if (hour <= EARLY_HOUR) { - hour += HOURS_PER_DAY; - } - return (float) hour + (float) _minute / (float) SECONDS_PER_MINUTE; - } - - /* - * (non-Javadoc) - * - * @see java.lang.Object#equals(java.lang.Object) - */ - @Override - public boolean equals(Object aObject) { - if (!(aObject instanceof Time)) { - return false; - } - return toString().equals(aObject.toString()); - } - - /** - * Compares based on time. - * - * @param aObject - * Time object to compare to. - * @return See {@link Comparable#compareTo(T)}. - */ - public int compareTo(Object aObject) { - if (!(aObject instanceof Time)) { - throw new IllegalArgumentException("object not an instance of Time"); - } - Time time = (Time) aObject; - return new Float(asFloat()).compareTo(new Float(time.asFloat())); - } - - /* - * (non-Javadoc) - * - * @see java.lang.Object#hashCode() - */ - @Override - public int hashCode() { - return toString().hashCode(); - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/TimeInterval.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/TimeInterval.java deleted file mode 100644 index 71a17802..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/TimeInterval.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.guide; - -/** - * Time interval. - * - * @author Erik Brakkee - */ -public class TimeInterval { - - /** - * Begin time. - */ - private Time _begin; - - /** - * End time. - */ - private Time _end; - - /** - * Construts the interval. - * - * @param aBegin - * Start time. - * @param aEnd - * End time. - */ - public TimeInterval(Time aBegin, Time aEnd) { - _begin = aBegin; - _end = aEnd; - } - - /** - * Gets the begin time. - * - * @return Begin time. - */ - public Time getBegin() { - return _begin; - } - - /** - * Gets the end time. - * - * @return End time. - */ - public Time getEnd() { - return _end; - } - - /* - * (non-Javadoc) - * - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - return _begin + " - " + _end; - } - - /** - * Determines if there is an overlap between the current interval and given - * one. - * - * @param aInterval - * Interval to compare with. - * @return True iff there is overlap - */ - public boolean overlap(TimeInterval aInterval) { - - if (isUncertain() || aInterval.isUncertain()) { - // Optimistic assume there is no overlap if one of the intervals is - // uncertain. - return false; - } - - if (_end.asFloat() <= aInterval._begin.asFloat() - || aInterval._end.asFloat() <= _begin.asFloat()) { - return false; - } - - return true; - } - - /** - * Determines if the actual time that the program corresponds to is - * uncertain due to the representation of a period of more than 24 hours - * using a 24 hour clock. - * - * @return True iff the interval is uncertain. - */ - boolean isUncertain() { - return _begin.asFloat() > _end.asFloat(); - } - - /* - * (non-Javadoc) - * - * @see java.lang.Object#equals(java.lang.Object)j - */ - @Override - public boolean equals(Object aObject) { - if (!(aObject instanceof TimeInterval)) { - return false; - } - return aObject.toString().equals(aObject.toString()); - } - - /* - * (non-Javadoc) - * - * @see java.lang.Object#hashCode() - */ - @Override - public int hashCode() { - return _begin.hashCode(); - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/Visitor.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/Visitor.java deleted file mode 100644 index 1629f70d..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/Visitor.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.guide; - - -/** - * Visitor of the TV guide. - * - * @author Erik Brakkee - */ -public interface Visitor { - - /** - * Visits a program. - * @param aProgram Program. - */ - void visitProgram(Program aProgram); - - /** - * Visits a channel. - * @param aChannel Channel. - */ - void visitChannel(Channel aChannel); - - /** - * Visits the guide. - * @param aGuide Guide. - */ - void visitTvGuide(TVGuide aGuide); -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/package.html b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/package.html deleted file mode 100644 index 5f9643fd..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/guide/package.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - wamblee.org - - - -This package contains the object model for the TV guide and the classes for -searching the TV guide for relevant programs. - - - -@since - - - - diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/InterestingProgramAction.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/InterestingProgramAction.java deleted file mode 100644 index 2f45460f..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/InterestingProgramAction.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.main; - -import org.wamblee.crawler.kiss.guide.Program; - -/** - * Represents an action to execute for an interesting program. - * - * @author Erik Brakkee - */ -public class InterestingProgramAction implements ProgramAction { - - /** - * Category under which the interesting program is listed. - */ - private String _category; - - /** - * Constructs the action. - * - * @param aCategory - * Category of the program. Useful for structuring the output. - */ - public InterestingProgramAction(String aCategory) { - _category = aCategory; - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.kiss.ProgramAction#execute(org.wamblee.crawler.kiss.Program, - * org.wamblee.crawler.kiss.Report) - */ - public void execute(Program aProgram, ProgramActionExecutor aReport) { - if (aProgram.isRecordingPossible()) { - aReport.interestingProgram(_category, aProgram); - } - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/KissCrawler.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/KissCrawler.java deleted file mode 100644 index 9fc9a777..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/KissCrawler.java +++ /dev/null @@ -1,371 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.main; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import javax.mail.MessagingException; - -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.NameValuePair; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.dom4j.Element; -import org.wamblee.crawler.Action; -import org.wamblee.crawler.Configuration; -import org.wamblee.crawler.Crawler; -import org.wamblee.crawler.Page; -import org.wamblee.crawler.PageException; -import org.wamblee.crawler.impl.ConfigurationParser; -import org.wamblee.crawler.impl.CrawlerImpl; -import org.wamblee.crawler.kiss.guide.Channel; -import org.wamblee.crawler.kiss.guide.PrintVisitor; -import org.wamblee.crawler.kiss.guide.Program; -import org.wamblee.crawler.kiss.guide.TVGuide; -import org.wamblee.crawler.kiss.guide.Time; -import org.wamblee.crawler.kiss.guide.TimeInterval; -import org.wamblee.crawler.kiss.notification.NotificationException; -import org.wamblee.crawler.kiss.notification.Notifier; -import org.wamblee.general.BeanFactory; -import org.wamblee.xml.ClasspathUriResolver; -import org.wamblee.xml.XslTransformer; - -/** - * The KiSS crawler for automatic recording of interesting TV shows. - * - * - * @author Erik Brakkee - */ -public class KissCrawler { - - private static final Log LOG = LogFactory.getLog(KissCrawler.class); - - /** - * Start URL of the electronic programme guide. - */ - private static final String START_URL = "http://epg.kml.kiss-technology.com/login.php"; - - /** - * Default socket timeout to use. - */ - private static final int SOCKET_TIMEOUT = 10000; - - /** - * Regular expression for matching time interval strings in the retrieved - * pages. - */ - private static final String TIME_REGEX = "[^0-9]*([0-9]{2}):([0-9]{2})[^0-9]*([0-9]{2}):([0-9]{2}).*"; - - /** - * Compiled pattern for the time regular expression. - */ - private Pattern _pattern; - - /** - * Runs the KiSS crawler. - * - * @param aArgs - * Arguments: First argument is the crawler configuration file, - * and second is the program configuration file. - * @throws Exception - * In case of problems. - */ - public static void main(String[] aArgs) throws Exception { - String crawlerConfig = new File(aArgs[0]).getCanonicalPath(); - String programConfig = new File(aArgs[1]).getCanonicalPath(); - - BeanFactory factory = new StandaloneCrawlerBeanFactory(); - Notifier notifier = factory.find(Notifier.class); - new KissCrawler(START_URL, SOCKET_TIMEOUT, crawlerConfig, - programConfig, notifier, new Report()); - } - - /** - * Constructs the crawler. This retrieves the TV guide by crawling the KiSS - * EPG guide, filters the guide for interesting programs, tries to record - * them, and sends a summary mail to the user. - * - * @param aCrawlerConfig - * Configuration file for the crawler. - * @param aProgramConfig - * Configuration file describing interesting shows. - * @param aNotifier - * Object used to send notifications of the results. - * @param aReport - * Report to use. - * @throws IOException - * In case of problems reading files. - * @throws NotificationException - * In case notification fails. - * @throws PageException - * In case of problems retrieving the TV guide. - */ - public KissCrawler(String aCrawlerConfig, String aProgramConfig, - Notifier aNotifier, Report aReport) throws IOException, - NotificationException, PageException { - this(START_URL, SOCKET_TIMEOUT, aCrawlerConfig, aProgramConfig, - aNotifier, aReport); - } - - /** - * Constructs the crawler. This retrieves the TV guide by crawling the KiSS - * EPG guide, filters the guide for interesting programs, tries to record - * them, and sends a summary mail to the user. - * - * @param aStartUrl - * Start URL of the electronic programme guide. - * @param aSocketTimeout - * Socket timeout to use. - * @param aCrawlerConfig - * Configuration file for the crawler. - * @param aProgramConfig - * Configuration file describing interesting shows. - * @param aNotifier - * Object used to send notifications of the results. - * @param aReport - * Report to use. - * @throws IOException - * In case of problems reading files. - * @throws NotificationException - * In case notification fails. - * @throws PageException - * In case of problems retrieving the TV guide. - */ - public KissCrawler(String aStartUrl, int aSocketTimeout, - String aCrawlerConfig, String aProgramConfig, Notifier aNotifier, - Report aReport) throws IOException, NotificationException, - PageException { - - _pattern = Pattern.compile(TIME_REGEX); - - try { - HttpClient client = new HttpClient(); - // client.getHostConfiguration().setProxy("127.0.0.1", 3128); - client.getParams().setParameter("http.socket.timeout", - SOCKET_TIMEOUT); - - XslTransformer transformer = new XslTransformer( - new ClasspathUriResolver()); - - Crawler crawler = createCrawler(aCrawlerConfig, client, transformer); - InputStream programConfigFile = new FileInputStream(new File( - aProgramConfig)); - ProgramConfigurationParser parser = new ProgramConfigurationParser(); - parser.parse(programConfigFile); - List programFilters = parser.getFilters(); - - try { - Page page = getStartPage(aStartUrl, crawler, aReport); - TVGuide guide = createGuide(page, aReport); - PrintVisitor printer = new PrintVisitor(System.out); - guide.accept(printer); - processResults(programFilters, guide, aNotifier, aReport); - } catch (PageException e) { - aReport.addMessage("Problem getting TV guide", e); - LOG.info("Problem getting TV guide", e); - throw e; - } - aNotifier.send(aReport.asXml()); - } finally { - System.out.println("Crawler finished"); - } - } - - /** - * Records interesting shows. - * - * @param aProgramCondition - * Condition determining which shows are interesting. - * @param aGuide - * Television guide. - * @throws MessagingException - * In case of problems sending a summary mail. - */ - private void processResults(List aProgramCondition, - TVGuide aGuide, Notifier aNotifier, Report aReport) { - ProgramActionExecutor executor = new ProgramActionExecutor(aReport); - for (ProgramFilter filter : aProgramCondition) { - List programs = filter.apply(aGuide); - ProgramAction action = filter.getAction(); - for (Program program : programs) { - action.execute(program, executor); - } - } - executor.commit(); - - } - - /** - * Creates the crawler. - * - * @param aCrawlerConfig - * Crawler configuration file. - * @param aOs - * Logging output stream for the crawler. - * @param aClient - * HTTP Client to use. - * @return Crawler. - * @throws FileNotFoundException - * In case configuration files cannot be found. - */ - private Crawler createCrawler(String aCrawlerConfig, HttpClient aClient, - XslTransformer aTransformer) throws FileNotFoundException { - ConfigurationParser parser = new ConfigurationParser(aTransformer); - InputStream crawlerConfigFile = new FileInputStream(new File( - aCrawlerConfig)); - Configuration config = parser.parse(crawlerConfigFile); - Crawler crawler = new CrawlerImpl(aClient, config); - return crawler; - } - - /** - * Gets the start page of the electronic programme guide. This involves - * login and navigation to a suitable start page after logging in. - * - * @param aStartUrl - * URL of the electronic programme guide. - * @param aCrawler - * Crawler to use. - * @param aReport - * Report to use. - * @return Starting page. - */ - private Page getStartPage(String aStartUrl, Crawler aCrawler, Report aReport) - throws PageException { - try { - Page page = aCrawler.getPage(aStartUrl, new NameValuePair[0]); - page = page.getAction("login").execute(); - Action favorites = page.getAction("channels-favorites"); - if (favorites == null) { - String msg = "Channels favorites action not found on start page"; - throw new PageException(msg); - } - return favorites.execute(); - } catch (PageException e) { - String msg = "Could not complete login to electronic programme guide."; - throw new PageException(msg, e); - } - } - - /** - * Creates the TV guide by web crawling. - * - * @param aPage - * Starting page. - * @param aReport - * Report to use. - * @return TV guide. - * @throws PageException - * In case of problem getting the tv guide. - */ - private TVGuide createGuide(Page aPage, Report aReport) - throws PageException { - LOG.info("Obtaining full TV guide"); - Action[] actions = aPage.getActions(); - if (actions.length == 0) { - LOG.error("No channels found"); - throw new PageException("No channels found"); - } - List channels = new ArrayList(); - for (Action action : actions) { - try { - LOG.info("Getting channel info for '" + action.getName() + "'"); - Action tomorrow = action.execute().getAction("tomorrow"); - if (tomorrow == null) { - throw new PageException("Channel summary page for '" - + action.getName() - + "' does not contain required information"); - } - Channel channel = createChannel(action.getName(), tomorrow - .execute(), aReport); - channels.add(channel); - if (SystemProperties.isDebugMode()) { - break; // Only one channel is crawled. - } - } catch (PageException e) { - aReport.addMessage("Could not create channel information for '" - + action.getName() + "'"); - LOG.error("Could not create channel information for '" - + action.getName() + "'", e); - } - } - return new TVGuide(channels); - } - - /** - * Create channel information for a specific channel. - * - * @param aChannel - * Channel name. - * @param aPage - * Starting page for the channel. - * @return Channel. - */ - private Channel createChannel(String aChannel, Page aPage, Report aReport) { - LOG.info("Obtaining program for " + aChannel); - Action[] programActions = aPage.getActions(); - List programs = new ArrayList(); - for (Action action : programActions) { - String time = action.getContent().element("time").getText().trim(); - Matcher matcher = _pattern.matcher(time); - if (matcher.matches()) { - Time begin = new Time(Integer.parseInt(matcher.group(1)), - Integer.parseInt(matcher.group(2))); - Time end = new Time(Integer.parseInt(matcher.group(3)), Integer - .parseInt(matcher.group(4))); - TimeInterval interval = new TimeInterval(begin, end); - String description = ""; - String keywords = ""; - - if (!SystemProperties.isNoProgramDetailsRequired()) { - Element descriptionElem = action.getContent().element( - "description"); - if (descriptionElem == null) { - try { - Page programInfo = action.execute(); - description = programInfo.getContent().element( - "description").getText().trim(); - keywords = programInfo.getContent().element( - "keywords").getText().trim(); - } catch (PageException e) { - String msg = "Program details could not be determined for '" - + action.getName() + "'"; - aReport.addMessage(msg, e); - LOG.warn(msg, e); - } - } else { - description = descriptionElem.getTextTrim(); - } - } - Program program = new Program(aChannel, action.getName(), - description, keywords, interval, action); - - LOG.info("Got program " + program); - programs.add(program); - } - } - return new Channel(aChannel, programs); - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/KissCrawlerBootstrapper.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/KissCrawlerBootstrapper.java deleted file mode 100644 index 0f9b52d3..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/KissCrawlerBootstrapper.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.main; - -import java.io.File; - -import org.wamblee.general.ClassLoaderUtils; - -/** - * Bootstrapper for the kiss crawler which adds all files in the directory - * given by the first argument to the classpath. - */ -public class KissCrawlerBootstrapper { - - public static void main(String[] aArgs) throws Exception { - File libdir = new File(aArgs[0]); - if ( !libdir.isDirectory() ) { - throw new IllegalArgumentException("'" + aArgs[0] + "' is not a directory."); - } - ClassLoaderUtils.addJarsInDirectory(libdir); - String[] args = new String[2]; - args[0] = aArgs[1]; - args[1] = aArgs[2]; - KissCrawler.main(args); - } - -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/ProgramAction.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/ProgramAction.java deleted file mode 100644 index 73769c1d..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/ProgramAction.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.main; - -import org.wamblee.crawler.kiss.guide.Program; - -/** - * Represents an action configured for a program. - * - * @author Erik Brakkee - */ -public interface ProgramAction { - - /** - * Executes the action. - * - * @param aProgram - * Program to execute the action for. - * @param aExecutor - * Executor to use. - */ - void execute(Program aProgram, ProgramActionExecutor aExecutor); -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/ProgramActionExecutor.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/ProgramActionExecutor.java deleted file mode 100644 index 68a4c13d..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/ProgramActionExecutor.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.main; - -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.TreeSet; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.wamblee.crawler.kiss.guide.Program; -import org.wamblee.crawler.kiss.guide.TimeInterval; -import org.wamblee.crawler.kiss.guide.Program.RecordingResult; - -/** - * Provides execution of actions for programs. Actions use this class to tell - * the executor what to do. The executor then decides on exactly what to do and - * in what order and makes decisions in case of conflicts. - */ -public class ProgramActionExecutor { - - private static final Log LOG = LogFactory - .getLog(ProgramActionExecutor.class); - - /** - * Map of priority to set of programs. - */ - private Map> _showsToRecord; - - /** - * Report to use. - */ - private Report _report; - - /** - * Constructs the program action executor. - * - * @param aReport Report to use. - */ - public ProgramActionExecutor(Report aReport) { - _showsToRecord = new TreeMap>(); - _report = aReport; - } - - /** - * Called by an action to indicate the desire to record a program. - * - * @param aPriority - * Priority of the program. Used to resolve conflicts. - * @param aProgram - * Program to record. - */ - public void recordProgram(int aPriority, Program aProgram) { - LOG.info("priority = " + aPriority + ", program: " + aProgram); - // Putting -priority into the set makes sure that iteration order - // over the priorities will go from higher priority to lower priority. - Set programs = _showsToRecord.get(-aPriority); - if (programs == null) { - programs = new TreeSet(new Program.TimeComparator()); - _showsToRecord.put(-aPriority, programs); - } - programs.add(aProgram); - } - - /** - * Called by an action to indicate that a program is interesting. - * - * @param aCategory - * Category of the program. - * @param aProgram - * Program. - */ - public void interestingProgram(String aCategory, Program aProgram) { - LOG.info("category = '" + aCategory + "', program: " + aProgram); - _report.interestingProgram(aCategory, aProgram); - } - - /** - * Makes sure that the actions are performed. - */ - public void commit() { - Set previouslyRecorded = new HashSet(); - for (Integer priority : _showsToRecord.keySet()) { - for (Program program : _showsToRecord.get(priority)) { - TimeInterval interval = program.getInterval(); - if (recordingConflictExists(previouslyRecorded, interval)) { - _report.setRecordingResult(RecordingResult.CONFLICT, program); - } else { - RecordingResult result = program.record(); - _report.setRecordingResult(result, program); - previouslyRecorded.add(interval); - } - } - } - } - - /** - * Checks an interval for overlap with a previously recorded program. - * - * @param aPreviouslyRecorded - * Previously recorded programs. - * @param aInterval - * Interval. - * @return True iff there is a recording conflict. - */ - private boolean recordingConflictExists( - Set aPreviouslyRecorded, TimeInterval aInterval) { - for (TimeInterval recordedInterval : aPreviouslyRecorded) { - if (aInterval.overlap(recordedInterval)) { - return true; - } - } - return false; - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/ProgramConfigurationParser.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/ProgramConfigurationParser.java deleted file mode 100644 index 1eb971c2..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/ProgramConfigurationParser.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.main; - -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.dom4j.Attribute; -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.Element; -import org.dom4j.io.SAXReader; -import org.wamblee.conditions.AndCondition; -import org.wamblee.conditions.Condition; -import org.wamblee.conditions.PropertyRegexCondition; -import org.wamblee.crawler.kiss.guide.Program; - -/** - * Parse the configuration of desired programs. - * - * @author Erik Brakkee - */ -class ProgramConfigurationParser { - private static final int DEFAULT_PRIORITY = 1; - - // Configuration of interesting programs. - - private static final String ELEM_PROGRAM = "program"; - - private static final String ELEM_PRIORITY = "priority"; - - private static final String ELEM_PATTERN = "match"; - - private static final String ELEM_ACTION = "action"; - - private static final String ELEM_CATEGORY = "category"; - - private static final String ACTION_NOTIFY = "notify"; - - private List _filters; - - ProgramConfigurationParser() { - _filters = null; - } - - /** - * Parses the condition used to match the desired programs. - * - * @param aStream - * Input stream to parse from. - * @return Condition. - */ - void parse(InputStream aStream) { - List filters = new ArrayList(); - try { - SAXReader reader = new SAXReader(); - Document document = reader.read(aStream); - - Element root = document.getRootElement(); - - for (Iterator i = root.elementIterator(ELEM_PROGRAM); i.hasNext();) { - Element program = (Element) i.next(); - - Element categoryElem = program.element(ELEM_CATEGORY); - String category = ""; - if (categoryElem != null) { - category = categoryElem.getText().trim(); - } - - Element actionElem = program.element(ELEM_ACTION); - int priority = DEFAULT_PRIORITY; - String priorityString = program.elementTextTrim(ELEM_PRIORITY); - if ( priorityString != null ) { - priority = Integer.valueOf(priorityString); - } - ProgramAction action = new RecordProgramAction(priority); - if (actionElem != null) { - if (actionElem.getText().equals(ACTION_NOTIFY)) { - action = new InterestingProgramAction(category); - } - } - - List> regexConditions = new ArrayList>(); - for (Iterator j = program.elementIterator(ELEM_PATTERN); j - .hasNext();) { - Element patternElem = (Element) j.next(); - String fieldName = "name"; - Attribute fieldAttribute = patternElem.attribute("field"); - if (fieldAttribute != null) { - fieldName = fieldAttribute.getText(); - } - String pattern = ".*(" + patternElem.getText() + ").*"; - regexConditions.add(new PropertyRegexCondition( - fieldName, pattern, true)); - } - Condition condition = new AndCondition( - regexConditions); - filters.add(new ProgramFilter(condition, action)); - } - _filters = filters; - } catch (DocumentException e) { - throw new RuntimeException("Error parsing program configuraiton", e); - } - } - - /** - * Returns the list of program filters. - * - * @return Filter list. - */ - public List getFilters() { - return _filters; - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/ProgramFilter.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/ProgramFilter.java deleted file mode 100644 index 31f58629..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/ProgramFilter.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.main; - -import java.util.List; - -import org.wamblee.conditions.Condition; -import org.wamblee.crawler.kiss.guide.MatchVisitor; -import org.wamblee.crawler.kiss.guide.Program; -import org.wamblee.crawler.kiss.guide.TVGuide; - -/** - * Obtains a list of interesting programs from a TV guide and decides what to do - * with them. - * - * @author Erik Brakkee - */ -public class ProgramFilter { - - private Condition _condition; - - private ProgramAction _action; - - /** - * Constructs the program filter. - * @param aCondition Condition used to find interesting programs. - * @param aAction Corresponding action to execute for matching programs. - */ - public ProgramFilter(Condition aCondition, ProgramAction aAction) { - _condition = aCondition; - _action = aAction; - } - - /** - * Gets the action. - * @return Action. - */ - public ProgramAction getAction() { - return _action; - } - - /** - * Applies the filter to a TV guide. - * @param aGuide TV guide. - * @return List of matching programs. - */ - public List apply(TVGuide aGuide) { - MatchVisitor matcher = new MatchVisitor(_condition); - aGuide.accept(matcher); - return matcher.getMatches(); - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/RecordProgramAction.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/RecordProgramAction.java deleted file mode 100644 index d4a41894..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/RecordProgramAction.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.main; - -import org.wamblee.crawler.kiss.guide.Program; - -/** - * Represents an action to record a program. - * - * @author Erik Brakkee - */ -public class RecordProgramAction implements ProgramAction { - - private int _priority; - - /** - * Constructs the action. - * @param aPriority Priority of the recording action. Higher values have higher - * priority. - */ - public RecordProgramAction(int aPriority) { - _priority = aPriority; - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.kiss.ProgramAction#execute(org.wamblee.crawler.kiss.Program, - * org.wamblee.crawler.kiss.Report) - */ - public void execute(Program aProgram, ProgramActionExecutor aReport) { - aReport.recordProgram(_priority, aProgram); - } - -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/Report.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/Report.java deleted file mode 100644 index 78c532b2..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/Report.java +++ /dev/null @@ -1,189 +0,0 @@ -/* - * - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.main; - -import java.util.ArrayList; -import java.util.EnumMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.TreeSet; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.dom4j.DocumentFactory; -import org.dom4j.Element; -import org.wamblee.crawler.kiss.guide.Program; -import org.wamblee.crawler.kiss.guide.Program.RecordingResult; - -/** - * Represents a report on the actions of the crawler. - * - * @author Erik Brakkee - */ -public class Report { - - private static final Log LOG = LogFactory - .getLog(Report.class); - - /** - * A map of category name to a set of program. Useful for displaying the - * output of possibly interesting programs on a per category basis. - */ - private Map> _interestingShows; - - /** - * Map or recording result to a set of programs. - */ - private EnumMap> _recordings; - - /** - * Messages generated while doing all the work. - */ - private List _messages; - - /** - * Constructs the report. - * - */ - public Report() { - _interestingShows = new TreeMap>(); - _recordings = new EnumMap>( - RecordingResult.class); - for (RecordingResult result : RecordingResult.values()) { - _recordings.put(result, new TreeSet( - new Program.TimeComparator())); - } - _messages = new ArrayList(); - } - - /** - * Adds a message. - * - * @param aMessage - * Message to add. - */ - public void addMessage(String aMessage) { - _messages.add(aMessage); - } - - /** - * Adds a message. - * - * @param aMessage - * Message to add. - * @param aException Exception that caused the problem. - */ - public void addMessage(String aMessage, Exception aException) { - String msg = aMessage; - for (Throwable e = aException; e != null; e = e.getCause()) { - msg += ": " + e.getMessage(); - } - addMessage(msg); - } - - /** - * Called to indicate that a program is interesting. - * - * @param aCategory - * Category of the program. - * @param aProgram - * Program. - */ - public void interestingProgram(String aCategory, Program aProgram) { - LOG.info("category = '" + aCategory + "', program: " + aProgram); - Set programs = _interestingShows.get(aCategory); - if (programs == null) { - programs = new TreeSet(new Program.TimeComparator()); - _interestingShows.put(aCategory, programs); - } - programs.add(aProgram); - } - - /** - * Called to specify the result of recording a program. - * @param aResult Result. - * @param aProgram Program. - */ - public void setRecordingResult(RecordingResult aResult, Program aProgram) { - _recordings.get(aResult).add(aProgram); - } - - - /** - * Get report as XML. - * - * @return XML report - */ - public Element asXml() { - DocumentFactory factory = DocumentFactory.getInstance(); - Element report = factory.createElement("report"); - - if (_messages.size() > 0) { - Element messages = report.addElement("messages"); - for (String message : _messages) { - messages.addElement("message").setText(message); - } - } - - Set reportedPrograms = new HashSet(); - - for (RecordingResult result : RecordingResult.values()) { - if (_recordings.get(result).size() > 0) { - Element recordingResult = report.addElement("recorded") - .addAttribute("result", result.toString()); - - for (Program program : _recordings.get(result)) { - recordingResult.add(program.asXml()); - reportedPrograms.add(program); - } - } - } - - if (_interestingShows.size() > 0) { - Element interesting = report.addElement("interesting"); - for (String category : _interestingShows.keySet()) { - Element categoryElem = interesting; - if (category.length() > 0) { - categoryElem = interesting.addElement("category"); - categoryElem.addAttribute("name", category); - } - for (Program program : _interestingShows.get(category)) { - if (!reportedPrograms.contains(program)) { - categoryElem.add(program.asXml()); - } else { - LOG.info("Category '" + category + "', program " - + program + " already reported"); - } - } - if (categoryElem.elements().size() == 0) { - // Remove empty category element. - LOG - .info("Removing element for category '" + category - + "'"); - interesting.remove(categoryElem); - } - } - - } - - return report; - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/StandaloneCrawlerBeanFactory.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/StandaloneCrawlerBeanFactory.java deleted file mode 100644 index cb0e8481..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/StandaloneCrawlerBeanFactory.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.main; - -import org.wamblee.general.spring.SpringBeanFactory; - -/** - * Bean factory used for the standalone crawler application. - * - * @author Erik Brakkee - */ -public class StandaloneCrawlerBeanFactory extends SpringBeanFactory { - - private static final String LOCATOR = "crawler-standalone.xml"; - private static final String FACTORY_NAME = "crawlerStandalone"; - - /** - * Constructs the factory. - * - */ - public StandaloneCrawlerBeanFactory() { - super(LOCATOR, FACTORY_NAME); - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/SystemProperties.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/SystemProperties.java deleted file mode 100644 index 8d412601..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/SystemProperties.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.main; - -/** - * Access to system properties for the crawler. - * - * @author Erik Brakkee - */ -public final class SystemProperties { - - private static final String DEBUG_PROPERTY = "kiss.debug"; - - private static final String NO_PROGRAM_DETAILS = "kiss.nodetails"; - - private static final String DISABLE_RECORD = "kiss.norecord"; - - /** - * Disabled constructor. - * - */ - private SystemProperties() { - // Empty. - } - - /** - * Determines if the system is run in debug mode. When in debug mode, less - * extensive crawling is done. - * - * @return True iff we are running in debug mode. - */ - public static boolean isDebugMode() { - return System.getProperties().getProperty(DEBUG_PROPERTY) != null; - } - - /** - * Determines if no program details are required. - * - * @return True iff no program details are required. - */ - public static boolean isNoProgramDetailsRequired() { - return System.getProperties().getProperty(NO_PROGRAM_DETAILS) != null; - } - - /** - * Determines if recording is disabled. - * - * @return True iff no recording should be done. - */ - public static boolean isRecordDisabled() { - return System.getProperties().getProperty(DISABLE_RECORD) != null; - } - -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/package.html b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/package.html deleted file mode 100644 index 79b98c3e..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/main/package.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - wamblee.org - - - -This package contains the crawling logic of the KiSS EPG site as well -as the configuration classes. - - - -@since - - - - diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/MailNotifier.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/MailNotifier.java deleted file mode 100644 index 3f1a93bc..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/MailNotifier.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.wamblee.crawler.kiss.notification; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Date; - -import javax.mail.internet.AddressException; -import javax.mail.internet.InternetAddress; -import javax.xml.transform.TransformerException; - -import org.apache.commons.mail.EmailException; -import org.apache.commons.mail.HtmlEmail; -import org.dom4j.Element; -import org.wamblee.xml.XslTransformer; - -/** - * A notifier that uses SMTP to notify users by mail. - * - * - * @author Erik Brakkee - */ -public class MailNotifier implements Notifier { - - private String _from; - - private String _to; - - private String _subject; - - private String _htmlXslt; - - private String _textXslt; - - private MailServer _server; - - private XslTransformer _transformer; - - /** - * Constructs the notifier. - * - * @param aFrom - * Sender mail address to use. - * @param aTo - * Recipient mail address to use. - * @param aSubject - * Subject to use in the email. - * @param aHtmlXslt - * XSLT file to transform the report into HTML. - * @param aTextXslt - * XSLT file to transform the report into text. - * @param aServer - * Mail server to use. - * @param aTransformer Transformer to use. - */ - public MailNotifier(String aFrom, String aTo, String aSubject, - String aHtmlXslt, String aTextXslt, MailServer aServer, XslTransformer aTransformer) { - _from = aFrom; - _to = aTo; - _subject = aSubject; - _htmlXslt = aHtmlXslt; - _textXslt = aTextXslt; - _server = aServer; - _transformer = aTransformer; - } - - /* - * (non-Javadoc) - * - * @see org.wamblee.crawler.kiss.Notifier#send(org.dom4j.Element) - */ - public void send(Element aReport) throws NotificationException { - HtmlEmail mail = new HtmlEmail(); - try { - mail.setFrom(_from); - mail - .setTo(Arrays - .asList(new InternetAddress[] { new InternetAddress( - _to) })); - mail.setSentDate(new Date()); - mail.setSubject(_subject); - - String htmlText = transformReport(aReport, _htmlXslt); - String plainText = transformReport(aReport, _textXslt); - - mail.setHtmlMsg(htmlText); - mail.setTextMsg(plainText); - - _server.send(mail); - } catch (EmailException e) { - throw new NotificationException(e.getMessage(), e); - } catch (TransformerException e) { - throw new NotificationException(e.getMessage(), e); - } catch (IOException e) { - throw new NotificationException(e.getMessage(), e); - } catch (AddressException e) { - throw new NotificationException(e.getMessage(), e); - } - } - - /** - * Transforms a report into a destination format. - * - * @param aReport - * Report to transform - * @param aXslt - * XSLT to use. - * @return Transformed result. - * @throws IOException - * In case of IO problems. - * @throws TransformerException - * In case of problems transforming. - */ - private String transformReport(Element aReport, String aXslt) - throws IOException, TransformerException { - String reportXmlText = aReport.asXML(); - return _transformer.textTransform(reportXmlText.getBytes(), _transformer.resolve(aXslt)); - } - - /* (non-Javadoc) - * @see org.wamblee.crawler.kiss.notification.Notifier#asHtml(org.dom4j.Element) - */ - public String asHtml(Element aReport) throws IOException, TransformerException { - return transformReport(aReport, _htmlXslt); - } - - /* (non-Javadoc) - * @see org.wamblee.crawler.kiss.notification.Notifier#asText(org.dom4j.Element) - */ - public String asText(Element aReport) throws IOException, TransformerException { - return transformReport(aReport, _textXslt); - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/MailServer.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/MailServer.java deleted file mode 100644 index 50292529..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/MailServer.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.wamblee.crawler.kiss.notification; - -import java.util.Properties; - -import javax.mail.Session; - -import org.apache.commons.mail.Email; -import org.apache.commons.mail.EmailException; - -/** - * Mail server. - * - * @author Erik Brakkee - */ -public class MailServer { - - private String _host; - - private int _port; - - private String _username; - - private String _password; - - /** - * Constructs the mail server. - * - * @param aHost - * Host name of the SMTP server. - * @param aPort - * Port name of the SMTP server. - * @param aUsername - * Username to use for authentication or null if no - * authentication is required. - * @param aPassword - * Password to use for authentication or null if no authenticatio - * is required. - */ - public MailServer(String aHost, int aPort, String aUsername, - String aPassword) { - _host = aHost; - _port = aPort; - _username = aUsername; - _password = aPassword; - } - - /** - * Sends an e-mail. - * - * @param aMail - * Mail to send. - * @throws EmailException - * In case of problems sending the mail. - */ - public void send(Email aMail) throws EmailException { - Properties props = new Properties(); - props.put("mail.transport.protocol", "smtp"); - props.put("mail.smtp.host", _host); - props.put("mail.smtp.port", "" + _port); - - Session mailSession = Session.getInstance(props, - new UsernamePasswordAuthenticator(_username, _password)); - aMail.setMailSession(mailSession); - aMail.send(); - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/NotificationException.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/NotificationException.java deleted file mode 100644 index 51f17911..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/NotificationException.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.wamblee.crawler.kiss.notification; - -/** - * Notification exception thrown in case of problems sending a notification to a - * user. - * - * - * @author Erik Brakkee - */ -public class NotificationException extends Exception { - - /** - * Constructs the notification. - * - * @param aMsg - * Message. - */ - public NotificationException(String aMsg) { - super(aMsg); - } - - /** - * Constructs the notification. - * - * @param aMsg - * Message. - * @param aCause - * Cause. - */ - public NotificationException(String aMsg, Throwable aCause) { - super(aMsg, aCause); - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/Notifier.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/Notifier.java deleted file mode 100644 index fcdc14a7..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/Notifier.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.wamblee.crawler.kiss.notification; - -import java.io.IOException; - -import javax.xml.transform.TransformerException; - -import org.dom4j.Element; - -/** - * Object used to send notifications about the actions of the crawler. - * - * - * @author Erik Brakkee - */ -public interface Notifier { - - /** - * Sends a notification. - * - * @param aReport - * Report to send. - */ - void send(Element aReport) throws NotificationException; - - /** - * Converts the report to html. - * @param aReport Report to convert. - * @return - */ - String asHtml(Element aReport) throws IOException, TransformerException; - - /** - * Converts the report to text. - * @param aReport Report to convert. - * @return - */ - String asText(Element aReport) throws IOException, TransformerException; -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/UsernamePasswordAuthenticator.java b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/UsernamePasswordAuthenticator.java deleted file mode 100644 index 58fd5b3c..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/UsernamePasswordAuthenticator.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.wamblee.crawler.kiss.notification; - -import javax.mail.Authenticator; -import javax.mail.PasswordAuthentication; - -/** - * Authenticator to supply username and password to the mail server (if needed). - * - * - * @author Erik Brakkee - */ -public class UsernamePasswordAuthenticator extends Authenticator { - - private String _username; - - private String _password; - - /** - * Constructs the authenticator. - * - * @param aUsername - * User name. - * @param aPassword - * Password. - */ - public UsernamePasswordAuthenticator(String aUsername, String aPassword) { - _username = aUsername; - _password = aPassword; - } - - /* - * (non-Javadoc) - * - * @see javax.mail.Authenticator#getPasswordAuthentication() - */ - @Override - protected PasswordAuthentication getPasswordAuthentication() { - if (_username == null) { - return null; - } - return new PasswordAuthentication(_username, _password); - } -} diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/package.html b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/package.html deleted file mode 100644 index e685c6a1..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/notification/package.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - wamblee.org - - - -Contains the classes for notifying users of the results of crawling. - - - -@since - - - - diff --git a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/package.html b/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/package.html deleted file mode 100644 index 91bbeabd..00000000 --- a/crawler/kiss/src/main/java/org/wamblee/crawler/kiss/package.html +++ /dev/null @@ -1,30 +0,0 @@ - - - - wamblee.org - - - -This package provides a crawler for the KiSS electronic program guide. -It provides automatic recording of programs that satisfy criteria specified -by the user. - -The following packages are defined: -
    -
  • {@link org.wamblee.crawler.kiss.main}: Contains the crawling functionality and - configuration classes. -
  • -
  • {@link org.wamblee.crawler.kiss.guide}: Contains the TV guide object model and the - classes for searching relevant programs in the guide. -
  • -
  • {@link org.wamblee.crawler.kiss.notification}: Contains the classes for - notification. -
  • -
- - - -@since - - - - diff --git a/crawler/kiss/src/main/resources/channel-overview.xsl b/crawler/kiss/src/main/resources/channel-overview.xsl deleted file mode 100644 index 59c807b1..00000000 --- a/crawler/kiss/src/main/resources/channel-overview.xsl +++ /dev/null @@ -1,74 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - right-now - - - evening - - - afternoon - - - noon - - - morning - - - tomorrow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/src/main/resources/channel-right-now-graphic.xsl b/crawler/kiss/src/main/resources/channel-right-now-graphic.xsl deleted file mode 100644 index 6b5070c4..00000000 --- a/crawler/kiss/src/main/resources/channel-right-now-graphic.xsl +++ /dev/null @@ -1,69 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - program-info - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/src/main/resources/channel-right-now-mobile.xsl b/crawler/kiss/src/main/resources/channel-right-now-mobile.xsl deleted file mode 100644 index ea5ba680..00000000 --- a/crawler/kiss/src/main/resources/channel-right-now-mobile.xsl +++ /dev/null @@ -1,55 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - program-info - - - - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/src/main/resources/channel-right-now-output.xml b/crawler/kiss/src/main/resources/channel-right-now-output.xml deleted file mode 100644 index 6597e6ad..00000000 --- a/crawler/kiss/src/main/resources/channel-right-now-output.xml +++ /dev/null @@ -1,112 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -BackHomeLogout diff --git a/crawler/kiss/src/main/resources/channel-right-now.xsl b/crawler/kiss/src/main/resources/channel-right-now.xsl deleted file mode 100644 index 5885f71e..00000000 --- a/crawler/kiss/src/main/resources/channel-right-now.xsl +++ /dev/null @@ -1,51 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - program-info - - - - - - - - - - - - - - - diff --git a/crawler/kiss/src/main/resources/channels-favorites-graphic.xsl b/crawler/kiss/src/main/resources/channels-favorites-graphic.xsl deleted file mode 100644 index 75865029..00000000 --- a/crawler/kiss/src/main/resources/channels-favorites-graphic.xsl +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/src/main/resources/channels-favorites.xsl b/crawler/kiss/src/main/resources/channels-favorites.xsl deleted file mode 100644 index ec9b1d6d..00000000 --- a/crawler/kiss/src/main/resources/channels-favorites.xsl +++ /dev/null @@ -1,43 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - channel-overview - - - - - diff --git a/crawler/kiss/src/main/resources/crawler-standalone.xml b/crawler/kiss/src/main/resources/crawler-standalone.xml deleted file mode 100644 index 860240bb..00000000 --- a/crawler/kiss/src/main/resources/crawler-standalone.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - org.wamblee.crawler.properties.xml - org.wamblee.crawler.notification.xml - - - - - \ No newline at end of file diff --git a/crawler/kiss/src/main/resources/identity.xsl b/crawler/kiss/src/main/resources/identity.xsl deleted file mode 100644 index 822e8e83..00000000 --- a/crawler/kiss/src/main/resources/identity.xsl +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - diff --git a/crawler/kiss/src/main/resources/login-graphic.xsl b/crawler/kiss/src/main/resources/login-graphic.xsl deleted file mode 100644 index 692d196c..00000000 --- a/crawler/kiss/src/main/resources/login-graphic.xsl +++ /dev/null @@ -1,128 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - channels-whats-on-now - - - channels-whats-on - - - channels-favorites - - - - - - - - - - shows-whats-on - - - shows-search - - - shows-favorites - - - shows-add-favorite - - - - - - - movies-whats-on - - - - - - sports-whats-on - - - - logout - - - manual-recording - - - view-recordings - - - unknown - - - - - - - - - - - - diff --git a/crawler/kiss/src/main/resources/login-mobile.xsl b/crawler/kiss/src/main/resources/login-mobile.xsl deleted file mode 100644 index 70f4a5b6..00000000 --- a/crawler/kiss/src/main/resources/login-mobile.xsl +++ /dev/null @@ -1,105 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - channels-whats-on-now - - - channels-whats-on - - - channels-favorites - - - - - - - - - shows-whats-on - - - shows-search - - - shows-favorites - - - shows-add-favorite - - - - - - - movies-whats-on - - - - - - sports-whats-on - - - - logout - - - manual-recording - - - view-recordings - - - unknown - - - - - - - - - - - - diff --git a/crawler/kiss/src/main/resources/login.xsl b/crawler/kiss/src/main/resources/login.xsl deleted file mode 100644 index 069cb939..00000000 --- a/crawler/kiss/src/main/resources/login.xsl +++ /dev/null @@ -1,81 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - channels-whats-on-now - - - channels-whats-on - - - channels-favorites - - - - shows-whats-on - - - shows-search - - - shows-favorites - - - shows-add-favorite - - - - movies-whats-on - - - - - sports-whats-on - - - - logout - - - manual-recording - - - view-recordings - - - unknown - - - - - - - - - - - - diff --git a/crawler/kiss/src/main/resources/mainpage.xsl b/crawler/kiss/src/main/resources/mainpage.xsl deleted file mode 100644 index 11c81fcd..00000000 --- a/crawler/kiss/src/main/resources/mainpage.xsl +++ /dev/null @@ -1,44 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - login - - login - - - - - - - - - - - - - - - - diff --git a/crawler/kiss/src/main/resources/org.wamblee.crawler.notification.xml b/crawler/kiss/src/main/resources/org.wamblee.crawler.notification.xml deleted file mode 100644 index 3b8d58a3..00000000 --- a/crawler/kiss/src/main/resources/org.wamblee.crawler.notification.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - - - - - ${org.wamblee.crawler.smtp.host} - ${org.wamblee.crawler.smtp.port} - ${org.wamblee.crawler.smtp.username} - ${org.wamblee.crawler.smtp.password} - - - - - - - - - - - ${org.wamblee.crawler.notification.from} - ${org.wamblee.crawler.notification.to} - ${org.wamblee.crawler.notification.subject} - reportToHtml.xsl - reportToText.xsl - - - - \ No newline at end of file diff --git a/crawler/kiss/src/main/resources/org.wamblee.crawler.properties.xml b/crawler/kiss/src/main/resources/org.wamblee.crawler.properties.xml deleted file mode 100644 index e92b028d..00000000 --- a/crawler/kiss/src/main/resources/org.wamblee.crawler.properties.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - org.wamblee.crawler.properties - - - - \ No newline at end of file diff --git a/crawler/kiss/src/main/resources/program-info-mobile.xsl b/crawler/kiss/src/main/resources/program-info-mobile.xsl deleted file mode 100644 index d92391f4..00000000 --- a/crawler/kiss/src/main/resources/program-info-mobile.xsl +++ /dev/null @@ -1,63 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - record - - - - - - recorded - - - - - - - - - diff --git a/crawler/kiss/src/main/resources/program-info.xsl b/crawler/kiss/src/main/resources/program-info.xsl deleted file mode 100644 index ec0f9797..00000000 --- a/crawler/kiss/src/main/resources/program-info.xsl +++ /dev/null @@ -1,56 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - record - - - - - - recorded - - - - - - - - - diff --git a/crawler/kiss/src/main/resources/recorded.xsl b/crawler/kiss/src/main/resources/recorded.xsl deleted file mode 100644 index 866c51bb..00000000 --- a/crawler/kiss/src/main/resources/recorded.xsl +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - - - - DUPLICATE - - - CONFLICT - - - ERROR - - - - - OK - - - - - - - - - - - - TEXT NODE - - - diff --git a/crawler/kiss/src/main/resources/reportToHtml.xsl b/crawler/kiss/src/main/resources/reportToHtml.xsl deleted file mode 100644 index 23bc1730..00000000 --- a/crawler/kiss/src/main/resources/reportToHtml.xsl +++ /dev/null @@ -1,111 +0,0 @@ - - - - - - - KiSS crawler report - - -

KiSS crawler report

- - -

Possibly interesting programs

- -
- - No suitable programs found - - - - - -
- - -

- - - -
-
-

-
- - -

- - - Successfully recorded programs - - - - Already recorded programs - - - - Conflicts with other recorded programs - - - - Programs that could not be recorded for - technical reasons. - - - -

-
- - - - - : - - (/) - - - - -
- - - -
- - - - -
- - - - - - - - - - - -

Category:

- -
- - -

Messages

-
    - -
  • - - -
  • -
    -
-
-
- diff --git a/crawler/kiss/src/main/resources/reportToText.xsl b/crawler/kiss/src/main/resources/reportToText.xsl deleted file mode 100644 index 741a678d..00000000 --- a/crawler/kiss/src/main/resources/reportToText.xsl +++ /dev/null @@ -1,121 +0,0 @@ - - - - - - - KiSS crawler report - - - - - - Possibly interesting programs - - - - - - - - - - - - - - - - Successfully recorded programs - - - - - - Already recorded programs - - - - - - Conflicts with other recorded programs - - - - - - Programs that could not be recorded for technical reasons. - - - - - - - - - - * - - - : - - ( - - / - ) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Category: - - - - - - - Messages - - - - * - - - - - - diff --git a/crawler/kiss/src/main/resources/utilities.xsl b/crawler/kiss/src/main/resources/utilities.xsl deleted file mode 100644 index c98c1139..00000000 --- a/crawler/kiss/src/main/resources/utilities.xsl +++ /dev/null @@ -1,143 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/crawler/kissweb/pom.xml b/crawler/kissweb/pom.xml deleted file mode 100644 index 5f3af106..00000000 --- a/crawler/kissweb/pom.xml +++ /dev/null @@ -1,56 +0,0 @@ - - - - org.wamblee - wamblee-crawler - 0.2-SNAPSHOT - - - 4.0.0 - org.wamblee - wamblee-crawler-kissweb - war - /crawler/kissweb - http://wamblee.org - - - - org.wamblee - wamblee-crawler-kiss - - - quartz - quartz - - - javax.servlet - servlet-api - - - jstl - jstl - - - taglibs - standard - - - - - - - - org.apache.maven.plugins - maven-war-plugin - - ${basedir}/src/webapp/WEB-INF/web.xml - wamblee-crawler-kissweb - src/webapp - - - - - - diff --git a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerExecutor.java b/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerExecutor.java deleted file mode 100644 index c5dba6fd..00000000 --- a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerExecutor.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.scheduling; - -import java.util.Date; - -import org.wamblee.crawler.kiss.main.Report; - -/** - * Encapsulates the actual execution of the crawler. - * This interface makes it possible to test the scheduling logic - * in isolation. - * - */ -public interface CrawlerExecutor { - - /** - * Executes the crawler. - * @param aDate Date the crawler is being triggered. - * @param The report from the crawler. - * @throws Exception - */ - void execute(Date aDate, Report aReport) throws Exception; -} diff --git a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerExecutorImpl.java b/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerExecutorImpl.java deleted file mode 100644 index d79e7149..00000000 --- a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerExecutorImpl.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.scheduling; - -import java.util.Date; - -import org.wamblee.crawler.kiss.main.KissCrawler; -import org.wamblee.crawler.kiss.main.Report; -import org.wamblee.crawler.kiss.notification.Notifier; - -/** - * Implementation which executes the KiSS crawler for retrieving web content. - * - * @author Erik Brakkee - */ -public class CrawlerExecutorImpl implements CrawlerExecutor { - - private String _crawlerConfig; - private String _programConfig; - private Notifier _notifier; - - /** - * Constructs the crawler executor. - * @param aCrawlerConfig Crawler configuration file. - * @param aProgramConfig Program configuration file. - * @param aNotifier Object used to send notifications. - */ - public CrawlerExecutorImpl(String aCrawlerConfig, String aProgramConfig, Notifier aNotifier) { - _crawlerConfig = aCrawlerConfig; - _programConfig = aProgramConfig; - _notifier = aNotifier; - } - - /* (non-Javadoc) - * @see org.wamblee.crawler.kiss.scheduling.CrawlerScheduler.CrawlerExecutor#execute(java.util.Date) - */ - public void execute(Date aDate, Report aReport) throws Exception { - new KissCrawler(_crawlerConfig, _programConfig, _notifier, aReport); - } -} diff --git a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerScheduler.java b/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerScheduler.java deleted file mode 100644 index 8774dcde..00000000 --- a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerScheduler.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.scheduling; - -/** - * Interface to the scheduler specific for working with the crawler. - * - * @author Erik Brakkee - */ -public interface CrawlerScheduler { - - /** - * Initializes the scheduler. - * @throws Exception In case of problems. - */ - void initialize() throws Exception; - - /** - * Checks if the crawler is running. - * @return True iff the crawler is running. - * @throws Exception In case of problems. - */ - boolean isCrawlerRunning() throws Exception; - - /** - * Schedules the crawler for immediate execution. - * @throws Exception In case of problems. - */ - void scheduleNow() throws Exception; - - /** - * Shuts down the scheduler. - * @throws Exception In case of problems. - */ - void shutdown() throws Exception; -} diff --git a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerStatus.java b/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerStatus.java deleted file mode 100644 index 065a2294..00000000 --- a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/CrawlerStatus.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.scheduling; - -import java.io.Serializable; -import java.util.Calendar; -import java.util.Date; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.wamblee.crawler.kiss.main.Report; - -/** - * This class encapsulates the logic for deciding whether to - * run the crawler. This provides the mechanism to keep the - * scheduler simple (e.g. scheduling every hour) and providing - * more complex logic for determining whether to run the - * crawler. - */ -public class CrawlerStatus implements Serializable { - - private static final Log LOG = LogFactory.getLog(CrawlerStatus.class); - - private CrawlerExecutor _crawler; - private Date _lastExecuted; - private boolean _lastResult; - private Exception _lastException; - private Report _lastReport; - private int _hourMin; - private int _hourMax; - private boolean _mustExecute; - - /** - * Constructs the scheduler. - * The crawler will run if it is triggered in the range between the minimum (included) - * and maximum (included) hour of the day if either - *
    - *
  • it is triggered for the first time on the current day.
  • - *
  • an earlier crawling attempt on the same day failed.
  • - *
- * @param aCrawler The interface through which the crawler is executed. - * @param aHourMin The crawler may only run if hour >= aHourMin - * @param aHourMax The crawler may only run if hour <= aHourMax - */ - public CrawlerStatus(CrawlerExecutor aCrawler, int aHourMin, int aHourMax) { - _crawler = aCrawler; - _lastExecuted = new Date(); - _lastResult = true; // the crawler will automatically run the next day. - _lastException = null; - _lastReport = null; - _hourMin = aHourMin; - _hourMax = aHourMax; - _mustExecute = false; - } - - /** - * Determines whether or not the crawler must be run the next time it is triggered. - * @param aMustExecute If true then the crawler will run the next time it is triggered - * by the scheduler. - */ - public void setMustExecute(boolean aMustExecute) { - _mustExecute = aMustExecute; - } - - /** - * Called by a scheduled job. This determines whether the crawler must be run or - * not. This encapsulates the rukes for retrying and scheduling the crawler. - * @param aDate Time at which we are executing now. - */ - public void execute(Date aDate) { - - if (mustExecute(aDate)) { - LOG.info("Executing crawler at " + aDate); - Report report = new Report(); - try { - _crawler.execute(aDate, report); - _lastResult = true; - _lastException = null; - } catch (Exception e) { - _lastResult = false; - _lastException = e; - } finally { - _lastExecuted = aDate; - _lastReport = report; - } - } - } - - /** - * Gets the time the crawler was last executed. - * @return Time of last execution. - */ - public Date getLastExecuted() { - return _lastExecuted; - } - - /** - * Gets the result of the last execution. - * @return True iff last execution was a success. - */ - public boolean getLastResult() { - return _lastResult; - } - - /** - * Gets the exception thrown by the last execution. - * @return null if the last execution was successful or an exception - * otherwise. - */ - public Exception getLastException() { - return _lastException; - } - - /** - * Gets the last report from the scheduler. - * @return Report. - */ - public Report getLastReport() { - return _lastReport; - } - - /** - * Determines whether or not the crawler must be run. - * @param aDate Current time. - * @return True iff the crawler must be run. - */ - private boolean mustExecute(Date aDate) { - if (_mustExecute) { - _mustExecute = false; - return true; - } - if ( _lastExecuted == null ) { - return false; // crawler must be started manually at least once after deployment. - } - Calendar calendar = Calendar.getInstance(); - calendar.setTime(aDate); - int hour = calendar.get(Calendar.HOUR_OF_DAY); - if ( hour < _hourMin ) { - return false; - } - if (hour > _hourMax ) { - return false; - } - - if ( !lastExecutionWasOnSameDay(aDate)) { - return true; // First execution of today. - } - // last execution was on the same day. - if ( !_lastResult ) { - return true; // last execution of today was unsuccessful, retry. - } - return false; // already run successfully today. - } - - /** - * Determines if the last execution was on the same day. - * @param aDate Current time. - * @return True iff last execution was on the same day. - */ - private boolean lastExecutionWasOnSameDay(Date aDate) { - if ( _lastExecuted == null ) { - return false; - } - int curDay = getDayOfYear(aDate); - int lastDay = getDayOfYear(_lastExecuted); - return curDay == lastDay; // check can be invalid only if scheduling interval is one year, - // which is ridiculous. - } - - /** - * Gets the day of the year - * @param aDate Date to compute day for. - */ - private int getDayOfYear(Date aDate) { - Calendar calendar = Calendar.getInstance(); - calendar.setTime(aDate); - return calendar.get(Calendar.DAY_OF_YEAR); - } -} diff --git a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/quartz/CrawlerJob.java b/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/quartz/CrawlerJob.java deleted file mode 100644 index 27c005ab..00000000 --- a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/quartz/CrawlerJob.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.scheduling.quartz; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.quartz.JobExecutionContext; -import org.quartz.JobExecutionException; -import org.quartz.StatefulJob; -import org.wamblee.crawler.kiss.scheduling.CrawlerStatus; -import org.wamblee.general.BeanKernel; - -/** - * Quartz job to execute the crawler. - * - * @author Erik Brakkee - */ -public class CrawlerJob implements StatefulJob { - - private static final Log LOG = LogFactory.getLog(CrawlerJob.class); - - /** - * Constructs the job. - * - */ - public CrawlerJob() { - // Empty. - } - - /* - * (non-Javadoc) - * - * @see org.quartz.Job#execute(org.quartz.JobExecutionContext) - */ - public void execute(JobExecutionContext aContext) - throws JobExecutionException { - LOG.info("Job triggered"); - try { - CrawlerStatus schedule = BeanKernel.getBeanFactory().find( - CrawlerStatus.class); - schedule.execute(aContext.getFireTime()); - } catch (Exception e) { - throw new JobExecutionException("Error executing crawler", e, false); - } - } -} diff --git a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/quartz/QuartzCrawlerScheduler.java b/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/quartz/QuartzCrawlerScheduler.java deleted file mode 100644 index 45c29717..00000000 --- a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/scheduling/quartz/QuartzCrawlerScheduler.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.scheduling.quartz; - -import java.util.Date; -import java.util.List; - -import org.quartz.JobDetail; -import org.quartz.Scheduler; -import org.quartz.SchedulerException; -import org.quartz.SchedulerFactory; -import org.quartz.SimpleTrigger; -import org.quartz.Trigger; -import org.quartz.TriggerUtils; -import org.quartz.impl.StdSchedulerFactory; -import org.wamblee.crawler.kiss.scheduling.CrawlerScheduler; - -/** - * Interface to the Quartz scheduler. - * - * @author Erik Brakkee - */ -public class QuartzCrawlerScheduler implements CrawlerScheduler { - - /** - * - */ - private static final String TRIGGER_NAME = "interval"; - - /** - * - */ - private static final String JOB_NAME = "kisscrawler"; - - private Scheduler _scheduler; - - private int _intervalInSeconds; - - /** - * Constructs the quartz interface. - * @param aIntervalInSeconds Scheduling interval in seconds. - * @throws SchedulerException - */ - public QuartzCrawlerScheduler(int aIntervalInSeconds) throws SchedulerException { - SchedulerFactory schedulerFactory = new StdSchedulerFactory(); - _scheduler = schedulerFactory.getScheduler(); - _intervalInSeconds = aIntervalInSeconds; - } - - /** - * Initializes the scheduler. - * @throws SchedulerException - */ - public void initialize() throws SchedulerException { - _scheduler.start(); - - JobDetail jobDetail = new JobDetail(JOB_NAME, null, CrawlerJob.class); - Trigger trigger = TriggerUtils.makeSecondlyTrigger(_intervalInSeconds); - //trigger.setStartTime(TriggerUtils.getEvenHourDate(new Date())); - trigger.setStartTime(new Date()); - trigger.setName(TRIGGER_NAME); - - _scheduler.scheduleJob(jobDetail, trigger); - } - - /* (non-Javadoc) - * @see org.wamblee.crawler.kiss.scheduling.CrawlerScheduler#isCrawlerRunning() - */ - public boolean isCrawlerRunning() throws Exception { - List jobs = _scheduler.getCurrentlyExecutingJobs(); - return jobs.size() > 0; - } - - /* (non-Javadoc) - * @see org.wamblee.crawler.kiss.scheduling.CrawlerScheduler#scheduleNow() - */ - public void scheduleNow() throws Exception { - Trigger trigger = new SimpleTrigger("immediate", null); - trigger.setJobName(JOB_NAME); - _scheduler.scheduleJob(trigger); - } - - /** - * Shuts down the scheduler. - * @throws SchedulerException - */ - public void shutdown() throws SchedulerException { - _scheduler.shutdown(); - } -} diff --git a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/servlet/Application.java b/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/servlet/Application.java deleted file mode 100644 index 85004098..00000000 --- a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/servlet/Application.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.servlet; - -import javax.servlet.ServletContextEvent; -import javax.servlet.ServletContextListener; - -import org.wamblee.crawler.kiss.scheduling.CrawlerScheduler; -import org.wamblee.general.BeanKernel; - -/** - * The mechanism for kick starting the scheduling of the KiSS crawler. - * - * @author Erik Brakkee - */ -public class Application implements ServletContextListener { - - /** - * Constructs the listener. - * - */ - public Application() { - // Empty. - } - - /* - * (non-Javadoc) - * - * @see javax.servlet.ServletContextListener#contextInitialized(javax.servlet.ServletContextEvent) - */ - public void contextInitialized(ServletContextEvent aEvent) { - aEvent.getServletContext().log("KiSS Crawler initializing"); - try { - getScheduler().initialize(); - } catch (Exception e) { - aEvent.getServletContext().log("Error scheduling job", e); - return; - } - aEvent.getServletContext().log("KiSS Crawler initialized"); - } - - /* - * (non-Javadoc) - * - * @see javax.servlet.ServletContextListener#contextDestroyed(javax.servlet.ServletContextEvent) - */ - public void contextDestroyed(ServletContextEvent aEvent) { - aEvent.getServletContext().log("KiSS Crawler shutting down"); - try { - getScheduler().shutdown(); - } catch (Exception e) { - aEvent.getServletContext().log("Error scheduling job", e); - return; - } - aEvent.getServletContext().log("KiSS Crawler shut down complete"); - } - - /** - * Gets the scheduler from Spring. - * @return Scheduler. - */ - private CrawlerScheduler getScheduler() { - return BeanKernel.getBeanFactory().find(CrawlerScheduler.class); - } - - public static void main(String[] aArgs) throws Exception { - Application application = new Application(); - application.getScheduler().initialize(); - } -} diff --git a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/servlet/CrawlerServlet.java b/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/servlet/CrawlerServlet.java deleted file mode 100644 index 72209398..00000000 --- a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/servlet/CrawlerServlet.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright 2006 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.wamblee.crawler.kiss.servlet; - -import java.io.IOException; -import java.io.OutputStream; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServlet; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.wamblee.crawler.kiss.main.Report; -import org.wamblee.crawler.kiss.notification.Notifier; -import org.wamblee.crawler.kiss.scheduling.CrawlerScheduler; -import org.wamblee.crawler.kiss.scheduling.CrawlerStatus; -import org.wamblee.general.BeanKernel; - -/** - * - * - * @author Erik Brakkee - */ -public class CrawlerServlet extends HttpServlet { - - /* - * (non-Javadoc) - * - * @see javax.servlet.http.HttpServlet#doPost(javax.servlet.http.HttpServletRequest, - * javax.servlet.http.HttpServletResponse) - */ - @Override - protected void doPost(HttpServletRequest aRequest, - HttpServletResponse aResponse) throws ServletException, IOException { - - CrawlerScheduler scheduler = BeanKernel.getBeanFactory().find( - CrawlerScheduler.class); - CrawlerStatus status = BeanKernel.getBeanFactory().find( - CrawlerStatus.class); - - try { - if (aRequest.getParameter("details") != null) { - Report report = status.getLastReport(); - if (report != null) { - Notifier notifier = BeanKernel.getBeanFactory().find(Notifier.class); - aResponse.setContentType("text/html"); - OutputStream os = aResponse.getOutputStream(); - os.write(notifier.asHtml(report.asXml()).getBytes()); - return; - } - } - if (aRequest.getParameter("runnow") != null) { - status.setMustExecute(true); - scheduler.scheduleNow(); - aResponse.sendRedirect(""); - return; - } - aRequest.setAttribute("running", scheduler.isCrawlerRunning()); - aRequest.setAttribute("lastExecuted", status.getLastExecuted()); - aRequest.setAttribute("lastResult", status.getLastResult()); - aRequest.setAttribute("lastException", status.getLastException()); - aRequest.setAttribute("lastReport", status.getLastReport()); - String msg = ""; - Throwable e = status.getLastException(); - while (e != null) { - msg = msg + e.getClass().getName() + ": " + e.getMessage() - + "
"; - e = e.getCause(); - } - aRequest.setAttribute("lastMessage", msg); - } catch (Exception e) { - throw new ServletException("Error getting status", e); - } - aRequest.getRequestDispatcher("WEB-INF/overview.jsp").forward(aRequest, - aResponse); - } - - /* - * (non-Javadoc) - * - * @see javax.servlet.http.HttpServlet#doGet(javax.servlet.http.HttpServletRequest, - * javax.servlet.http.HttpServletResponse) - */ - @Override - protected void doGet(HttpServletRequest aRequest, - HttpServletResponse aResponse) throws ServletException, IOException { - doPost(aRequest, aResponse); - } -} diff --git a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/spring/CrawlerBeanFactory.java b/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/spring/CrawlerBeanFactory.java deleted file mode 100644 index d4e2379c..00000000 --- a/crawler/kissweb/src/main/java/org/wamblee/crawler/kiss/spring/CrawlerBeanFactory.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2005 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.wamblee.crawler.kiss.spring; -import org.wamblee.general.spring.SpringBeanFactory; - - -/** - * Bean factory for the crawler application. - * - * @author Erik Brakkee - */ -public class CrawlerBeanFactory extends SpringBeanFactory { - private static final String SELECTOR_NAME = "beanRefContext.xml"; - private static final String FACTORY_NAME = "crawler"; - - /** - * Constructs the bean factory. - * - */ - public CrawlerBeanFactory() { - super(SELECTOR_NAME, FACTORY_NAME); - } -} diff --git a/crawler/kissweb/src/main/resources/beanRefContext.xml b/crawler/kissweb/src/main/resources/beanRefContext.xml deleted file mode 100644 index 2e139dbc..00000000 --- a/crawler/kissweb/src/main/resources/beanRefContext.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - org.wamblee.crawler.properties.xml - org.wamblee.crawler.notification.xml - org.wamblee.crawler.kiss.xml - - - - - \ No newline at end of file diff --git a/crawler/kissweb/src/main/resources/org.wamblee.beanfactory.properties b/crawler/kissweb/src/main/resources/org.wamblee.beanfactory.properties deleted file mode 100644 index 73239e65..00000000 --- a/crawler/kissweb/src/main/resources/org.wamblee.beanfactory.properties +++ /dev/null @@ -1,7 +0,0 @@ - -############################################################################## -# Class name of the beanfactory used by the crawler application -############################################################################## - -org.wamblee.beanfactory.class=org.wamblee.crawler.kiss.spring.CrawlerBeanFactory - diff --git a/crawler/kissweb/src/main/resources/org.wamblee.crawler.kiss.xml b/crawler/kissweb/src/main/resources/org.wamblee.crawler.kiss.xml deleted file mode 100644 index 022b4aa0..00000000 --- a/crawler/kissweb/src/main/resources/org.wamblee.crawler.kiss.xml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - 3600 - - - - - ${org.wamblee.crawler.config.epg} - ${org.wamblee.crawler.config.programs} - - - - - - - - 19 - 24 - - - diff --git a/crawler/kissweb/src/main/resources/org.wamblee.crawler.properties b/crawler/kissweb/src/main/resources/org.wamblee.crawler.properties deleted file mode 100644 index 0b98d7c3..00000000 --- a/crawler/kissweb/src/main/resources/org.wamblee.crawler.properties +++ /dev/null @@ -1,23 +0,0 @@ - - -############################################################################ -# Mail server configuration -############################################################################ -org.wamblee.crawler.smtp.host=shikra -org.wamblee.crawler.smtp.port=25 -org.wamblee.crawler.smtp.username= -org.wamblee.crawler.smtp.password= - -############################################################################ -# Mail notification configuration -############################################################################ -org.wamblee.crawler.notification.from=kiss@wamblee.org -org.wamblee.crawler.notification.to=erik@brakkee.org -org.wamblee.crawler.notification.subject=Recording summary for tomorrow - -############################################################################ -# Configuration of the crawler -############################################################################ -org.wamblee.crawler.config.epg=/home/erik/crawler/config.xml -org.wamblee.crawler.config.programs=/home/erik/crawler/programs.xml - diff --git a/crawler/kissweb/src/webapp/META-INF/MANIFEST.MF b/crawler/kissweb/src/webapp/META-INF/MANIFEST.MF deleted file mode 100644 index 5e949512..00000000 --- a/crawler/kissweb/src/webapp/META-INF/MANIFEST.MF +++ /dev/null @@ -1,3 +0,0 @@ -Manifest-Version: 1.0 -Class-Path: - diff --git a/crawler/kissweb/src/webapp/WEB-INF/overview.jsp b/crawler/kissweb/src/webapp/WEB-INF/overview.jsp deleted file mode 100644 index ce749492..00000000 --- a/crawler/kissweb/src/webapp/WEB-INF/overview.jsp +++ /dev/null @@ -1,74 +0,0 @@ - -<%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%> -<%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core" %> - - - - - KiSS Crawler overview page - - - - - - - - - - - -

KiSS Crawler Overview

- - - - - - - - - - - - - - - - - - - - - - - - -
- Currently running: - - -
- Last executed at: - - -
- Last result: - - -
- Last message: - - -
- Last report: - - details -
- -
- -
-
- - diff --git a/crawler/kissweb/src/webapp/WEB-INF/web.xml b/crawler/kissweb/src/webapp/WEB-INF/web.xml deleted file mode 100644 index 2ff8779f..00000000 --- a/crawler/kissweb/src/webapp/WEB-INF/web.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - org.wamblee.crawler.kiss.servlet.Application - - - - CrawlerServlet - org.wamblee.crawler.kiss.servlet.CrawlerServlet - - - - CrawlerServlet - / - - diff --git a/crawler/pom.xml b/crawler/pom.xml deleted file mode 100644 index b8bd38a5..00000000 --- a/crawler/pom.xml +++ /dev/null @@ -1,64 +0,0 @@ - - - org.wamblee - wamblee-utils - 0.2-SNAPSHOT - - 4.0.0 - org.wamblee - wamblee-crawler - pom - 0.2-SNAPSHOT - /crawler - http://wamblee.org - - basic - kiss - kissweb - - - - - - - - - org.wamblee - wamblee-crawler-basic - ${project.version} - - - org.wamblee - wamblee-crawler-kiss - ${project.version} - - - - - - - - - maven-assembly-plugin - 2.1 - - - src/assembly/kiss-application.xml - - - - - binpackage - package - - - - - - - - - - diff --git a/crawler/src/assembly/kiss-application.xml b/crawler/src/assembly/kiss-application.xml deleted file mode 100644 index 881e868d..00000000 --- a/crawler/src/assembly/kiss-application.xml +++ /dev/null @@ -1,60 +0,0 @@ - - kissbin - - zip - - false - - - - - org.wamblee:wamblee-crawler-kiss - - - true - lib - false - - - - - - - - kiss/conf/kiss - - conf - - config.xml.example - programs.xml - org.wamblee.crawler.properties - - - - - kiss/conf/kiss - - bin - - run.* - - - - - kiss/target - - lib - - *.jar - - - - - - - runtime - - - - - diff --git a/pom.xml b/pom.xml index 06a60969..b4270ec1 100644 --- a/pom.xml +++ b/pom.xml @@ -388,7 +388,6 @@ - org/wamblee/crawler/kiss/**/*.class org/wamblee/mythtv/**/*.class @@ -512,7 +511,6 @@ socketproxy - crawler mythtv