* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* @see org.wamblee.crawler.Crawler#getPage(java.lang.String)
*/
public Page getPage(String aUrl) throws PageException {
LOG.info("Getting page: url = '" + aUrl + "'");
PageRequest request = _config.getRequest(aUrl);
Document content = request.execute(aUrl, _client);
* @see org.wamblee.crawler.Crawler#getPage(java.lang.String)
*/
public Page getPage(String aUrl) throws PageException {
LOG.info("Getting page: url = '" + aUrl + "'");
PageRequest request = _config.getRequest(aUrl);
Document content = request.execute(aUrl, _client);
*/
public Page getPage(String aUrl, PageType aType) throws PageException {
LOG.info("Getting page: url = '" + aUrl + "', type = '" + aType + "'");
PageRequest request = _config.getRequest(aType);
Document content = request.execute(aUrl, _client);
*/
public Page getPage(String aUrl, PageType aType) throws PageException {
LOG.info("Getting page: url = '" + aUrl + "', type = '" + aType + "'");
PageRequest request = _config.getRequest(aType);
Document content = request.execute(aUrl, _client);
DOMReader reader = new DOMReader();
org.dom4j.Document dom4jDoc = reader.read(content);
Element root = dom4jDoc.getRootElement();
dom4jDoc.remove(root);
DOMReader reader = new DOMReader();
org.dom4j.Document dom4jDoc = reader.read(content);
Element root = dom4jDoc.getRootElement();
dom4jDoc.remove(root);
- * Perform crawling. Find references in the retrieved content and replace them
- * by the content they refer to by retrieving the appropriate pages as well.
- * @param content Content which must be made complete.
- * @return Fully processed content.
+ * Perform crawling. Find references in the retrieved content and replace
+ * them by the content they refer to by retrieving the appropriate pages as
+ * well.
+ *
+ * @param content
+ * Content which must be made complete.
+ * @return Fully processed content.
- private Element replaceReferencesWithContent(Element content) {
- return content; // TODO implement.
+ private Element replaceReferencesWithContent(Element content) {
+ return content; // TODO implement.