From: Erik Brakkee
Date: Sat, 1 Apr 2006 19:22:53 +0000 (+0000)
Subject: Initial version of forrest site for kiss crawler.
X-Git-Tag: wamblee-utils-0.7~1035
X-Git-Url: http://wamblee.org/gitweb/?a=commitdiff_plain;h=8e2ae53a674e61a70dfbb4d33328ac5a5686a41d;p=utils
Initial version of forrest site for kiss crawler.
---
diff --git a/crawler/kiss/docs/README.txt b/crawler/kiss/docs/README.txt
new file mode 100644
index 00000000..9bc261b2
--- /dev/null
+++ b/crawler/kiss/docs/README.txt
@@ -0,0 +1,7 @@
+This is the base documentation directory.
+
+skinconf.xml # This file customizes Forrest for your project. In it, you
+ # tell forrest the project name, logo, copyright info, etc
+
+sitemap.xmap # Optional. This sitemap is consulted before all core sitemaps.
+ # See http://forrest.apache.org/docs/project-sitemap.html
diff --git a/crawler/kiss/docs/classes/CatalogManager.properties b/crawler/kiss/docs/classes/CatalogManager.properties
new file mode 100644
index 00000000..af7b5ab3
--- /dev/null
+++ b/crawler/kiss/docs/classes/CatalogManager.properties
@@ -0,0 +1,57 @@
+# Copyright 2002-2005 The Apache Software Foundation or its licensors,
+# as applicable.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#=======================================================================
+# CatalogManager.properties for Catalog Entity Resolver.
+#
+# This is the default properties file for your project.
+# This facilitates local configuration of application-specific catalogs.
+# If you have defined any local catalogs, then they will be loaded
+# before Forrest's core catalogs.
+#
+# See the Apache Forrest documentation:
+# http://forrest.apache.org/docs/your-project.html
+# http://forrest.apache.org/docs/validation.html
+
+# verbosity:
+# The level of messages for status/debug (messages go to standard output).
+# The setting here is for your own local catalogs.
+# The verbosity of Forrest's core catalogs is controlled via
+# main/webapp/WEB-INF/cocoon.xconf
+#
+# The following messages are provided ...
+# 0 = none
+# 1 = ? (... not sure yet)
+# 2 = 1+, Loading catalog, Resolved public, Resolved system
+# 3 = 2+, Catalog does not exist, resolvePublic, resolveSystem
+# 10 = 3+, List all catalog entries when loading a catalog
+# (Cocoon also logs the "Resolved public" messages.)
+verbosity=1
+
+# catalogs ... list of additional catalogs to load
+# (Note that Apache Forrest will automatically load its own default catalog
+# from main/webapp/resources/schema/catalog.xcat)
+# Use either full pathnames or relative pathnames.
+# pathname separator is always semi-colon (;) regardless of operating system
+# directory separator is always slash (/) regardless of operating system
+catalogs=../resources/schema/catalog.xcat
+
+# relative-catalogs
+# If false, relative catalog URIs are made absolute with respect to the
+# base URI of the CatalogManager.properties file. This setting only
+# applies to catalog URIs obtained from the catalogs property in the
+# CatalogManager.properties file
+# Example: relative-catalogs=[yes|no]
+relative-catalogs=no
diff --git a/crawler/kiss/docs/content/test1.html b/crawler/kiss/docs/content/test1.html
new file mode 100644
index 00000000..1a174a8b
--- /dev/null
+++ b/crawler/kiss/docs/content/test1.html
@@ -0,0 +1,37 @@
+
+
+
+ Raw un-processed HTML page (test1)
+
+
+
raw un-processed HTML page (test1)
+
+ This raw HTML page is linked to from xdocs/samples/static.xml
+ and from xdocs/samples/linking.xml
+
+
+
diff --git a/crawler/kiss/docs/content/xdocs/hello.pdf b/crawler/kiss/docs/content/xdocs/hello.pdf
new file mode 100644
index 00000000..5ca4f313
--- /dev/null
+++ b/crawler/kiss/docs/content/xdocs/hello.pdf
@@ -0,0 +1,70 @@
+%PDF-1.3
+%ª«¬
+4 0 obj
+<< /Type /Info
+/Producer (FOP 0.20.4) >>
+endobj
+5 0 obj
+<< /Length 203 /Filter [ /ASCII85Decode /FlateDecode ]
+ >>
+stream
+Gar'!]afWZ&;9q-MRA)RFnblL2&]tQSZsjOOT[ck2SQkp(bfQ[R7ZPq=U24c0dqq_i?B[A.0s\)5f5
+endstream
+endobj
+6 0 obj
+<< /Type /Page
+/Parent 1 0 R
+/MediaBox [ 0 0 595 842 ]
+/Resources 3 0 R
+/Contents 5 0 R
+>>
+endobj
+7 0 obj
+<< /Type /Font
+/Subtype /Type1
+/Name /F1
+/BaseFont /Helvetica
+/Encoding /WinAnsiEncoding >>
+endobj
+8 0 obj
+<< /Type /Font
+/Subtype /Type1
+/Name /F5
+/BaseFont /Times-Roman
+/Encoding /WinAnsiEncoding >>
+endobj
+1 0 obj
+<< /Type /Pages
+/Count 1
+/Kids [6 0 R ] >>
+endobj
+2 0 obj
+<< /Type /Catalog
+/Pages 1 0 R
+ >>
+endobj
+3 0 obj
+<<
+/Font << /F1 7 0 R /F5 8 0 R >>
+/ProcSet [ /PDF /ImageC /Text ] >>
+endobj
+xref
+0 9
+0000000000 65535 f
+0000000687 00000 n
+0000000745 00000 n
+0000000795 00000 n
+0000000015 00000 n
+0000000071 00000 n
+0000000365 00000 n
+0000000471 00000 n
+0000000578 00000 n
+trailer
+<<
+/Size 9
+/Root 2 0 R
+/Info 4 0 R
+>>
+startxref
+883
+%%EOF
diff --git a/crawler/kiss/docs/content/xdocs/images/group-logo.gif b/crawler/kiss/docs/content/xdocs/images/group-logo.gif
new file mode 100644
index 00000000..f017f324
Binary files /dev/null and b/crawler/kiss/docs/content/xdocs/images/group-logo.gif differ
diff --git a/crawler/kiss/docs/content/xdocs/images/group.svg b/crawler/kiss/docs/content/xdocs/images/group.svg
new file mode 100644
index 00000000..584cedb8
--- /dev/null
+++ b/crawler/kiss/docs/content/xdocs/images/group.svg
@@ -0,0 +1,82 @@
+
+
+
+
+
+
+
diff --git a/crawler/kiss/docs/content/xdocs/images/icon.png b/crawler/kiss/docs/content/xdocs/images/icon.png
new file mode 100644
index 00000000..3be8bbbe
Binary files /dev/null and b/crawler/kiss/docs/content/xdocs/images/icon.png differ
diff --git a/crawler/kiss/docs/content/xdocs/images/project-logo.gif b/crawler/kiss/docs/content/xdocs/images/project-logo.gif
new file mode 100644
index 00000000..a60277a4
Binary files /dev/null and b/crawler/kiss/docs/content/xdocs/images/project-logo.gif differ
diff --git a/crawler/kiss/docs/content/xdocs/images/project.svg b/crawler/kiss/docs/content/xdocs/images/project.svg
new file mode 100644
index 00000000..01abcdbb
--- /dev/null
+++ b/crawler/kiss/docs/content/xdocs/images/project.svg
@@ -0,0 +1,82 @@
+
+
+
+
+
+
+
diff --git a/crawler/kiss/docs/content/xdocs/images/usemap.gif b/crawler/kiss/docs/content/xdocs/images/usemap.gif
new file mode 100644
index 00000000..c10732ca
Binary files /dev/null and b/crawler/kiss/docs/content/xdocs/images/usemap.gif differ
diff --git a/crawler/kiss/docs/content/xdocs/index.xml b/crawler/kiss/docs/content/xdocs/index.xml
new file mode 100644
index 00000000..26bdd02a
--- /dev/null
+++ b/crawler/kiss/docs/content/xdocs/index.xml
@@ -0,0 +1,49 @@
+
+
+
+
+
+ Welcome to MyProj
+
+
+
+ Congratulations
+
You have successfully generated and rendered an Apache Forrest site.
+ This page is from the site template. It is found in
+ src/documentation/content/xdocs/index.xml
+ Please edit it and replace this text with content of your own.
+
+
+
+ Using examples as templates
+
+ This demo site has many examples. See the menu at the left.
+ The sources for these examples are in the directory
+ src/documentation/content/xdocs/
+
+
+ The sources for the Apache Forrest website are also included
+ in your distribution at $FORREST_HOME/site-author/
+
+
You can also extend the functionality of Forrest via
+ plugins,
+ these will often come with more samples for you to out.
+
+
+
diff --git a/crawler/kiss/docs/content/xdocs/samples/ascii-art.xml b/crawler/kiss/docs/content/xdocs/samples/ascii-art.xml
new file mode 100644
index 00000000..4f984d07
--- /dev/null
+++ b/crawler/kiss/docs/content/xdocs/samples/ascii-art.xml
@@ -0,0 +1,56 @@
+
+
+
+
+
+ Ascii Art sample
+
+
+
+ Sample Ascii Art
+
To create a .png image like the one below with ASCII art, just save
+ the text file with the .aart extension and then link from any page
+ as an image (<image src="asci-art-file.png"/>).
+
+
Here is the source file that has created the above image.
+
+
An ascii art pad recognized following ascii characters:
+
+
'-' horizontal SVG line
+
'|' vertical SVG line
+
'+' corner
+
\ oblique line
+
String starting with letter, digit, or '_' is converted to a SVG text.
Forrest comes with a set of schemas for common documents, however, if you have existing documents
+ that use a different schema you will want to tell Forrest how to work with them. The best way of doing
+ this is to build a plugin
+ so that you can easily reuse the functionality on different projects. Plugins also allow you to share
+ this new functionality with other users, and to benefit from their contributions to your work.
+
+
If you don't want to build a plugin you can make Forrest process them within your project sitemap
+ (but this won't really save you any work since the process is almost the same). This sample site has
+ a demonstration of using a custom DTD. If you request <a href="custom.html">
+ you can see the results. Take a look at the project sitemap.xmap to see how it is done.
+
+ Adding custom schemas with a plugin has the added benefit of being able to add the schema
+ definition to the catalog file rather than having to reference it directly from within the XML
+ document.
+
+
diff --git a/crawler/kiss/docs/content/xdocs/samples/document-v13.xml b/crawler/kiss/docs/content/xdocs/samples/document-v13.xml
new file mode 100644
index 00000000..9ca3809f
--- /dev/null
+++ b/crawler/kiss/docs/content/xdocs/samples/document-v13.xml
@@ -0,0 +1,381 @@
+
+
+
+
+
+ The Apache Forrest xdocs document-v1.3 DTD
+ The content of this document doesn't make any sense at all.
+ This is a demonstration document using all possible elements in
+ the current Apache Forrest xdocs document-v13.dtd
+
+
+
+
+ This is a demonstration document using all possible elements in the
+ current Apache Forrest xdocs document-v13.dtd
+ (See the DTD changes section at the bottom.)
+
+
+ Sample Content
+
Hint: See the xml source to see how the various
+ elements are used and see the
+
+ DTD reference documentation.
+
+
+ Block and inline elements
+
This is a simple paragraph. Most documents contain a fair amount of
+ paragraphs. Paragraphs are called <p>.
+
With the <p xml:space="preserve"> attribute, you can declare
+ that whitespace should be preserved, without implying it is in any other
+ way special.
+
+ This next paragraph has a class attribute of 'quote'. CSS can
+ be used to present this <p class='quote'> in
+ a different style than the other paragraphs. The handling of
+ this quoted paragraph is defined in the <extra-css>
+ element in the skinconf.xml.
+
+
+ Anyway, like I was sayin', shrimp is the fruit of the sea. You can
+ barbecue it, boil it, broil it, bake it, sautee it. Dey's uh,
+ shrimp-kabobs, shrimp creole, shrimp gumbo. Pan fried, deep fried,
+ stir-fried. There's pineapple shrimp, lemon shrimp, coconut shrimp,
+ pepper shrimp, shrimp soup, shrimp stew, shrimp salad, shrimp and
+ potatoes, shrimp burger, shrimp sandwich. That- that's about it.
+
+
A number of in-line elements are available in the DTD, we will show them
+ inside an unordered list (<ul>):
+
+
Here is a simple list item (<li>).
+
Have you seen the use of the <code> element in the
+ previous item?
+
Also, we have <sub> and <sup>
+ elements to show content above or below the text
+ baseline.
+
There is a facility to emphasize certain words using the
+ <em><strong>
+ elements.
+
We can use
+
+ <icon>s too.
+
Another possibility is the <img> element:
+ ,
+ which offers the ability to refer to an image map.
+
We have elements for hyperlinking:
+
+
<link href="faq.html">
+
Use this to
+ link
+ to another document. As per normal, this will open the new document
+ in the same browser window.
+
+
<link href="#section">
+
Use this to
+ link
+ to the named anchor in the current document.
+
+
+
<link href="faq.html#forrest">
+
Use this to
+ link
+ to another document and go to the named anchor. This will open
+ the new document in the same browser window.
+
+
+
<jump href="faq.html">
+
Use this to
+ jump
+ to another document and optionally go to a named
+ anchor
+ within that document. This will open the new document in the same
+ browser window. So what is the difference between link and jump?
+ The jump behaves differently, in that it will replace any frames
+ in the current window.
+ This is the equivalent of
+ <a ... target="_top">
+
+
+
<fork href="faq.html">
+
Use this to
+ fork
+ your webbrowser to another document. This will open the document
+ in a new, unnamed browser window.
+ This is the equivalent of
+ <a ... target="_blank">
+
+
+
+
Oh, by the way, a definition list <dl> was used inside
+ the previous list item. We could put another
+
+
unordered list
+
inside the list item
+
+
+
A sample nested table
+
Or even tables..
+
inside tables..
+
+
or inside lists, but I believe this liberty gets quickly quite
+ hairy as you see.
+
+
+
+
So far for the in-line elements, let's look at some paragraph-level
+ elements.
+ The <fixme> element is used for stuff
+ which still needs work. Mind the author attribute!
+ Use the <note> element to draw attention to something, e.g. ...The <code> element is used when the author can't
+ express himself clearly using normal sentences ;-)
+ Sleep deprivation can be the result of being involved in an open
+ source project. (a.k.a. the <warning> element).
+
+ If you want your own labels for notes and
+ warnings, specify them using the label attribute.
+
+
Apart from unordered lists, we have ordered lists too, of course.
+
+
Item 1
+
Item 2
+
This should be 3 if my math is still OK.
+
+
+
+
+ Various presentation formats
+
+
This sample document, written in document-v13 XML can be presented
+ via Forrest in a number of different formats. The links in the
+ following list show this document in each of the currently available
+ formats.
+
+
Each of the formats can be made available as a link near the top of
+ the page. Actual placement of those links depends on the skin
+ currently in use. Those links are enabled in the skinconf.xml via the
+ <disable-XXX-link> elements in the skinconf.xml
+
+
+
+
Presentation Format
+
+
Description
+
+
skinconf.xml Element
+
+
+
+
HTML
+
+
This document in HTML format.
+
+
Always generated by default. Cannot be turned off.
+
+
+
+
XML
+
+
This document in its raw XML format.
+
+
<disable-xml-link>. By default, set to true, meaning
+ that this link will not be shown.
+
+
+
+
PDF
+
+
This document as Adobe PDF
+
+
<disable-pdf-link>. By default, set to false, meaning
+ that this link will be shown.
+
+
+
+
Text
+
+
This document as straight text.
+
For additional information see the Forrest text-output
+ plugin.
+
+
<disable-txt-link>. By default, set to true, meaning
+ that this link will not be shown.
+
+
+
+
POD
+
+
This document as Perl POD (Plain Old Documentation). Text
+ with minimal formatting directives. If on a *nix system with perl
+ installed, see "man perlpod".
+
For additional information see the Forrest pod-output
+ plugin.
+
+
<disable-pod-link>. By default, set to true, meaning
+ that this link will not be shown.
+
+
+
+
+ Using sections
+
You can use sections to put some structure in your document. For some
+ strange historical reason, the section title is an attribute of the
+ <section> element.
+
+
+ Sections, the sequel
+
Just some second section.
+
+ Section 2.1
+
Which contains a subsection (2.1).
+
+
+
+
+ Showing preformatted source code
+
Enough about these sections. Let's have a look at more interesting
+ elements, <source> for instance:
+
+
CDATA sections are used within
+ <source> elements so that you can write pointy
+ brackets without needing to escape them with messy
+ < entities ...
+
+
+
Please take care to still use a sensible line-length within your
+ source elements.
+
+
+
+ Using tables
+
And now for a table:
+
+
Table caption
+
+
heading cell 1
+
heading cell 2
+
heading cell 3
+
+
+
data cell
+
this data cell spans two columns
+
+
+
+ Tables can be nested:
+
+
+
+
+
column 1
+
column 2
+
+
+
cell A
+
cell B
+
+
+
+
+
and can include most other elements
such as lists
+
+
+
+
+
+
+
+ Using figures
+
And a <figure> to end all of this.
+ Note that this can also be implemented with an
+ <img> element.
+
+
+
+
+
+
+ DTD changes
+
See the generated
+
+ DTD reference documentation.
+
+
+ Changes since document-v12
+
+ All v1.2 docs will work fine as v1.3 DTD. The main change is the
+ addition of a @class attribute to every element, which enables the
+ "extra-css" section in the skinconf to be put to good use.
+
+
+
+ Changes since document-v11
+
+ doc-v12 enhances doc-v11 by relaxing various restrictions that were
+ found to be unnecessary.
+
+
+
+ Links ((link|jump|fork) and inline elements (br|img|icon|acronym) are
+ allowed inside title.
+
+
+ Paragraphs (p|source|note|warning|fixme), table and figure|anchor are
+ allowed inside li.
+
+
+ Paragraphs (p|source|note|warning|fixme), lists (ol|ul|dl), table,
+ figure|anchor are allowed inside definition lists (dd) and tables (td
+ and dh).
+
+
+ Inline content
+ (strong|em|code|sub|sup|br|img|icon|acronym|link|jump|fork) is
+ allowed in strong and em.
+
An HTML document is used as the source for this page, and translated
+to the intermediate Apache Forrest xdocs document structure. The sitemap then
+does the normal aggregation with the navigation content and application of
+the skin.
+
+
+
+The html is being interpreted by Forrest and transformed to the
+intermediate Apache xdocs document structure. That stylesheet cannot deal
+with every possibility in unstructured html, so it tries to guess how to
+build <section> elements and such.
+It needs <h1> (<h2> etc.) headings in the source html
+in order to identify sections. Patches are welcome to enhance
+that transformer.
+
+
+
+XHTML can also be used, but it is just treated as interpreted
+html. Future versions of Forrest will take much more advantage of XHTML.
+
+
+
+
Some example uses of HTML
+
+There are situations when the Apache Forrest xdocs DTD is not sufficient.
+The use of embedded HTML enables you to use HTML code in these situations.
+
+This paragraph has a missing closing tag for the <p> element. If you look
+at the XML created by Forrest you'll notice that
+Forrest has fixed this.
+
+
Potentially Invalid XDocs
+
+However, it should also be noted that the resultant XML is not a valid document
+since it contains the additional HTML elements. If you are intending to use
+the intermediate XDocs for any purpose be aware of this fact.
+
+
Other non-standard html-type abilities
+
+Use other HTML .
+
+
+
diff --git a/crawler/kiss/docs/content/xdocs/samples/faq.xml b/crawler/kiss/docs/content/xdocs/samples/faq.xml
new file mode 100644
index 00000000..62b41ca5
--- /dev/null
+++ b/crawler/kiss/docs/content/xdocs/samples/faq.xml
@@ -0,0 +1,42 @@
+
+
+
+
+
+ Frequently Asked Questions
+
+
+ Documentation
+
+
+ How can I help write documentation?
+
+
+
+ This project uses Apache Forrest to
+ generate documentation from XML. Please download a copy of Forrest,
+ which can be used to validate, develop and render a project site.
+
Forrest has many powerful techniques for linking between documents
+ and for managing the site navigation. This document demonstrates those
+ techniques.
+ The document "Menus and Linking"
+ has the full details.
+
+
+
+
+ Building and maintaining consistent URI space
+
+ When Forrest builds your site, it starts from the front page. Like
+ a robot, it traverses all of the links that it finds in the documents
+ and builds the corresponding pages. Any new links are further traversed.
+
+
+ Sometimes those links lead to documents that are generated directly
+ from xml source files, sometimes they are generated from other source
+ via an intermediate xml format. Other times the links lead to raw
+ un-processed content.
+
+
+ The site navigation configuration file "site.xml" provides
+ a way to manage this URI space. In the future, when documents are
+ re-arranged and renamed, the site.xml configuration will enable this
+ smoothly.
+
+
+
+
+ Mapping the local resource space to the final URI space
+
+ For both generated and raw (un-processed) files, the top-level of the
+ URI space corresponds to the "content/xdocs/" directory,
+ i.e. the location of the "site.xml" configuration file.
+
+
+ In versions prior to 0.7 raw un-processed content was stored in
+ the "content/" directory. In 0.7 onwards, raw
+ un-processed data is stored alongside the xdocs. In addition,
+ in 0.6 and earlier, HTML documents could be stored in the xdocs
+ directory and served without processing. If you
+ you wish to emulate the behaviour of 0.6 and earlier see the
+ next section.
+
+
+ A diagram will help.
+
+
+
+
+ How Plugins May Affect The URI Space
+
By using Forrest Input Plugins
+ you can process some file formats, such as
+ OpenOffice.org documents and produce processed content from them. For example,
+ the file content/xdocs/hello.sxw can be used to produce a
+ skinned version of the document at with the name hello.html.
+ Similarly, you can use Forrest Output
+ Plugins to create different output formats such as PDF, in this
+ case content/xdocs/hello.sxw can produce
+ hello.pdf.
+
+
However, this does not affect the handling of raw content. That is, you
+ can still retrieve the raw un-processed version with, for example,
+ hello.sxw. If you want to prevent the user retrieving the
+ un-processed version you will have to create matchers that intercept
+ these requests within your project sitemap.
+
+
+
+
+
+ Basic link to internal generated pages
+
+ When this type of link is encountered, Forrest will look for a
+ corresponding xml file, relative to this document (i.e. in
+ content/xdocs/samples/).
+
+
A generated document in the current directory, which corresponds to
+ content/xdocs/samples/sample.html ...
+
+
+
In a sub-directory, which corresponds to
+ content/xdocs/samples/subdir/index.html ...
+
+
+
+
+
+ Basic link to raw un-processed content
+
+ Raw content files are not intended for any processing, they are just
+ linked to (e.g. pre-prepared PDFs, zip archives).
+ These files are placed alongside your normal content in the
+ "content/xdocs" directory.
+
+
A raw document in the current directory, which corresponds to
+ content/xdocs/samples/helloAgain.pdf ...
+
+
+
A raw document in a sub-directory, which corresponds to
+ content/xdocs/samples/subdir/hello.zip ...
+
+
+
A raw document at the next level up, which corresponds to
+ content/hello.pdf ...
+
Prior to version 0.7, the raw un-processed content was stored in
+ the "content/" directory. In 0.7 onwards, raw
+ un-processed data is stored alongside the xdocs. In addition
+ in 0.6 and earlier, HTML files could be stored in the xdocs
+ directory and they would be served without further processing.
+ As described above, this is not the case in 0.7 where HTML files
+ are, by default, skinned by Forrest.
+
+
If you
+ you wish to emulate the behaviour of 0.6 and earlier then you
+ must add the following to your project sitemap.
+
+
+
+
The above allows us to create links to un-processed skinned files stored
+ in the {project:content} or {project:content.xdocs}
+ directory. For example:
+ <a href="/test1.html">HTML content</a>. However, it will
+ break the 0.7 behaviour of skinning HTML content. For this reason the old
+ ".ehtml" extension can be used to embed HTML content in a Forrest skinned
+ site
+
+
Note that you can change the matchers above to selectively serve some
+ content as raw un-processed content, whilst still serving other content
+ as skinned documents. For example, the following snippet would allow
+ you to serve the content of an old, deprecated site without processing
+ from Forrest, whilst still allowing all other content to be processed
+ by Forrest in the normal way:
+ Note that Forrest does not traverse external links to look for
+ other links.
+
+
+
+
+ Using site.xml to manage the links
+
As you will have discovered, using pathnames with ../../ etc. will
+ get very nasty. Real problems occur when you use a smart text editor
+ that tries to manage the links for you. For example, it will have
+ trouble linking to the raw content files which are not yet in their
+ final location.
+
+
+ Links and filenames are bound to change and re-arrange. It is
+ essential to only change those links in one central place, not in every
+ document.
+
+
+ The "site.xml" configuration file to the rescue. It maps
+ symbolic names to actual resources.
+
+
+
+ Basic link to internal generated pages
+
This single entry ...
+
+
+ enables a simple link to a generated document, which corresponds to
+ content/xdocs/index.xml ...
+
+
+
+
+
+ Group some items
+
This compound entry ...
+
+
+ enables a link to a generated document, which corresponds to
+ content/xdocs/samples/index.xml ...
+
+
+
+ and a link to a generated document, which corresponds to
+ content/xdocs/samples/faq.xml ...
+
+
+
+
+
+ Fragment identifiers
+
This compound entry ...
+
+
+ enables a link to a fragment identifier within the
+ samples/sample.html document ...
+
+
+
+
+
+ Define items for raw content
+
This entry ...
+
+
+ enables a link to a raw document, which corresponds to
+ content/hello.pdf ...
+
+
+
+
+
+
+ External links
+
This compound entry ...
+
+
+ enables a link to an external URL ...
+
+
+
+ and a link to another external URL ...
+
+
+
+ and a link to another external URL with a fragment identifier ...
+
+
+
+
+
+
diff --git a/crawler/kiss/docs/content/xdocs/samples/sample.xml b/crawler/kiss/docs/content/xdocs/samples/sample.xml
new file mode 100644
index 00000000..52774f7d
--- /dev/null
+++ b/crawler/kiss/docs/content/xdocs/samples/sample.xml
@@ -0,0 +1,407 @@
+
+
+
+
+
+ The Apache Forrest xdocs document-v2.0 DTD
+ The content of this document doesn't make any sense at all.
+ This is a demonstration document using all possible elements in
+ the current Apache Forrest xdocs document-v20.dtd
+
+
+
+
+ This is a demonstration document using all possible elements in the
+ current Apache Forrest xdocs document-v20.dtd
+ (See the DTD changes section at the bottom.)
+
+
+ Sample Content
+
Hint: See the xml source to see how the various
+ elements are used and see the
+
+ DTD reference documentation.
+
+
+ Block and inline elements
+
This is a simple paragraph. Most documents contain a fair amount of
+ paragraphs. Paragraphs are called <p>.
+
With the <p xml:space="preserve"> attribute, you can declare
+ that whitespace should be preserved, without implying it is in any other
+ way special.
+
+ This next paragraph has a class attribute of 'quote'. CSS can
+ be used to present this <p class='quote'> in
+ a different style than the other paragraphs. The handling of
+ this quoted paragraph is defined in the <extra-css>
+ element in the skinconf.xml.
+
+
+ Anyway, like I was sayin', shrimp is the fruit of the sea. You can
+ barbecue it, boil it, broil it, bake it, sautee it. Dey's uh,
+ shrimp-kabobs, shrimp creole, shrimp gumbo. Pan fried, deep fried,
+ stir-fried. There's pineapple shrimp, lemon shrimp, coconut shrimp,
+ pepper shrimp, shrimp soup, shrimp stew, shrimp salad, shrimp and
+ potatoes, shrimp burger, shrimp sandwich. That- that's about it.
+
+
A number of in-line elements are available in the DTD, we will show them
+ inside an unordered list (<ul>):
+
+
Here is a simple list item (<li>).
+
Have you seen the use of the <code> element in the
+ previous item?
+
Also, we have <sub> and <sup>
+ elements to show content above or below the text
+ baseline.
+
There is a facility to emphasize certain words using the
+ <em><strong>
+ elements.
+
We can use
+
+ <icon>s too.
+
Another possibility is the <img> element:
+ ,
+ which offers the ability to refer to an image map.
+
We have elements for hyperlinking:
+
+
<a href="faq.html">
+
Use this to
+ link
+ to another document. As per normal, this will open the new document
+ in the same browser window.
+
+
<a href="#section">
+
Use this to
+ link
+ to the named anchor in the current document.
+
+
+
<a href="faq.html#forrest">
+
Use this to
+ link
+ to another document and go to the named anchor. This will open
+ the new document in the same browser window.
+
Oh, by the way, a definition list <dl> was used inside
+ the previous list item. We could put another
+
+
unordered list
+
inside the list item
+
+
+
A sample nested table
+
Or even tables..
+
inside tables..
+
+
or inside lists, but I believe this liberty gets quickly quite
+ hairy as you see.
+
+
+
+
So far for the in-line elements, let's look at some paragraph-level
+ elements.
+ The <fixme> element is used for stuff
+ which still needs work. Mind the author attribute!
+ Use the <note> element to draw attention to something, e.g. ...The <code> element is used when the author can't
+ express himself clearly using normal sentences ;-)
+ Sleep deprivation can be the result of being involved in an open
+ source project. (a.k.a. the <warning> element).
+
+ If you want your own labels for notes and
+ warnings, specify them using the label attribute.
+
+
Apart from unordered lists, we have ordered lists too, of course.
+
+
Item 1
+
Item 2
+
This should be 3 if my math is still OK.
+
+
+
+
+ Various presentation formats
+
+
This sample document, written in document-v20 XML can be presented
+ via Forrest in a number of different formats. The links in the
+ following list show this document in each of the currently available
+ formats.
+
+
Each of the formats can be made available as a link near the top of
+ the page. Actual placement of those links depends on the skin
+ currently in use. Those links are enabled in the skinconf.xml via the
+ <disable-XXX-link> elements in the skinconf.xml
<disable-pdf-link>. By default, set to false, meaning
+ that this link will be shown.
+
+
+
+
Text
+
+
This document as straight text.
+
For additional information see the Forrest text-output
+ plugin.
+
+
<disable-txt-link>. By default, set to true, meaning
+ that this link will not be shown.
+
+
+
+
POD
+
+
This document as Perl POD (Plain Old Documentation). Text
+ with minimal formatting directives. If on a *nix system with perl
+ installed, see "man perlpod".
+
For additional information see the Forrest pod-output
+ plugin.
+
+
<disable-pod-link>. By default, set to true, meaning
+ that this link will not be shown.
+
+
+
+
+ Using sections
+
You can use sections to put some structure in your document. For some
+ strange historical reason, the section title is an attribute of the
+ <section> element.
+
+
+ Sections, the sequel
+
Just some second section.
+
+ Section 2.1
+
Which contains a subsection (2.1).
+
+
+
+
+ Showing preformatted source code
+
Enough about these sections. Let's have a look at more interesting
+ elements, <source> for instance:
+
+
CDATA sections are used within
+ <source> elements so that you can write pointy
+ brackets without needing to escape them with messy
+ < entities ...
+
+
+
Please take care to still use a sensible line-length within your
+ source elements.
+
+
+
+ Using tables
+
And now for a table:
+
+
Table caption
+
+
heading cell 1
+
heading cell 2
+
heading cell 3
+
+
+
data cell
+
this data cell spans two columns
+
+
+
+ Tables can be nested:
+
+
+
+
+
column 1
+
column 2
+
+
+
cell A
+
cell B
+
+
+
+
+
and can include most other elements
such as lists
+
+
+
+
+
+
+
+ Using figures
+
And a <figure> to end all of this.
+ Note that this can also be implemented with an
+ <img> element.
+
+
+
+
+ Using class attribute on links
+
+
The document-v13 had elements <fork> and <jump>. In
+ document-v20, those elements no longer exist but the functionality can
+ be duplicated by using the @class attribute.
+ Even though the opening of separate windows should be under the
+ control of the user, these techniques can still be employed.
+
+
+ Changes between document-v12 and document-v13
+
+ All v1.2 docs will work fine as v1.3 DTD. The main change is the
+ addition of a @class attribute to every element, which enables the
+ "extra-css" section in the skinconf to be put to good use.
+
+
+
+ Changes between document-v11 and document-v12
+
+ doc-v12 enhances doc-v11 by relaxing various restrictions that were
+ found to be unnecessary.
+
+
+
+ Links ((link|jump|fork) and inline elements (br|img|icon|acronym) are
+ allowed inside title.
+
+
+ Paragraphs (p|source|note|warning|fixme), table and figure|anchor are
+ allowed inside li.
+
+
+ Paragraphs (p|source|note|warning|fixme), lists (ol|ul|dl), table,
+ figure|anchor are allowed inside definition lists (dd) and tables (td
+ and dh).
+
+
+ Inline content
+ (strong|em|code|sub|sup|br|img|icon|acronym|link|jump|fork) is
+ allowed in strong and em.
+
+
+
+
+
+
+
diff --git a/crawler/kiss/docs/content/xdocs/samples/static.xml b/crawler/kiss/docs/content/xdocs/samples/static.xml
new file mode 100644
index 00000000..9553525c
--- /dev/null
+++ b/crawler/kiss/docs/content/xdocs/samples/static.xml
@@ -0,0 +1,62 @@
+
+
+
+
+
+ Static content - including raw un-processed files and documents
+
+
+
+ Linking to static content
+
+ You can place some types of raw content into the xdocs directory. For example,
+ you can place a PDF file in src/documentation/content/xdocs and link
+ to it normally,
+ <a href="../hello.pdf">hello.pdf</a>
+ However, note that if the file is one that Forrest is able to process, for example
+ an HTML file, these files will be processed accordingly.
+
+
+ It is also worth noting that files in the xdocs directory will only be copied
+ into your final site if there is a link to them somewhere in the site. See the next
+ section for details of how to include content that is not linked.
+
+
+
+ Including Static Content that is Not Linked
+
+
+ You can include raw HTML, PDFs, plain-text, and other files. In your final site by
+ placing them in the src/documentation/content directory. Files in this
+ directory will be copied over automatically but will not be processed in any way by
+ Forrest, that is they will be linked to as raw files.
+
+
+ You can also have sub-directories such as
+ src/documentation/content/samples/subdir/ which
+ reflects your main
+ xdocs/ tree. The raw files will then end up
+ beside your documents.
+
+
+
+
diff --git a/crawler/kiss/docs/content/xdocs/samples/subdir/book-sample.xml b/crawler/kiss/docs/content/xdocs/samples/subdir/book-sample.xml
new file mode 100644
index 00000000..f5396c66
--- /dev/null
+++ b/crawler/kiss/docs/content/xdocs/samples/subdir/book-sample.xml
@@ -0,0 +1,47 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/crawler/kiss/docs/content/xdocs/samples/subdir/hello.zip b/crawler/kiss/docs/content/xdocs/samples/subdir/hello.zip
new file mode 100644
index 00000000..b4fb6aae
Binary files /dev/null and b/crawler/kiss/docs/content/xdocs/samples/subdir/hello.zip differ
diff --git a/crawler/kiss/docs/content/xdocs/samples/subdir/index.xml b/crawler/kiss/docs/content/xdocs/samples/subdir/index.xml
new file mode 100644
index 00000000..d060da5b
--- /dev/null
+++ b/crawler/kiss/docs/content/xdocs/samples/subdir/index.xml
@@ -0,0 +1,38 @@
+
+
+
+
+
+ Page generated from a sub-directory
+
+
+
+
+
+
+
+
+ A sub-directory
+
This was generated from a sub-directory.
+
When creating new subdirectories, remember that these must
+ be declared in site.xml or each directory must have a book.xml file.
+