2 * Copyright 2005 the original author or authors.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package org.wamblee.crawler.impl;
19 import java.util.ArrayList;
20 import java.util.List;
22 import org.dom4j.DocumentHelper;
23 import org.dom4j.Element;
24 import org.dom4j.XPath;
25 import org.wamblee.crawler.Action;
26 import org.wamblee.crawler.Crawler;
27 import org.wamblee.crawler.Page;
28 import org.wamblee.crawler.PageType;
31 * Page implementation.
33 public class PageImpl implements Page {
35 private static final String ELEM_NAME = "action";
37 private static final String ATT_NAME = "name";
39 private static final String ATT_HREF = "reference";
41 private static final String ATT_TYPE = "type";
43 private Crawler _crawler;
45 private Element _content;
47 private Action[] _actions;
54 public PageImpl(Crawler aCrawler, Element aContent) {
57 _actions = computeActions();
63 * @see org.wamblee.crawler.Page#getLinkNames()
65 private Action[] computeActions() {
66 XPath xpath = DocumentHelper.createXPath(ELEM_NAME);
67 List<Element> results = (List<Element>) xpath.selectNodes(_content);
68 List<Action> names = new ArrayList<Action>();
69 for (Element elem : results) {
70 String name = elem.attributeValue(ATT_NAME);
71 String href = elem.attributeValue(ATT_HREF);
72 String type = elem.attributeValue(ATT_TYPE);
74 names.add(new ActionImpl(_crawler, elem, name, href));
77 names.add(new ActionImpl(_crawler, elem, name, href, new PageType(type)));
80 return names.toArray(new Action[0]);
86 * @see org.wamblee.crawler.Page#getContent()
88 public Element getContent() {
93 * @see org.wamblee.crawler.Page#getActions()
95 public Action[] getActions() {
101 * @see org.wamblee.crawler.Page#getAction(java.lang.String)
103 public Action getAction(String aName) {
104 List<Action> results = new ArrayList<Action>();
105 for (Action action: _actions) {
106 if ( action.getName().equals(aName)) {
110 if (results.size() == 0) {
113 if (results.size() > 1) {
114 throw new RuntimeException("Duplicate link '" + aName + "'");
116 return results.get(0);