X-Git-Url: http://wamblee.org/gitweb/?a=blobdiff_plain;f=support%2Fsrc%2Forg%2Fwamblee%2Fxml%2FDomUtils.java;h=5651e34137f8caeec1c419a630a6840a947b1a82;hb=8f2d78e446f48a1ed156b252998ae17cd6f0ba2b;hp=4fdb52e752b6517440ed68538c36c1a86b2a9be9;hpb=d072c523829f9be6522b983962b0e1ea15788dad;p=utils diff --git a/support/src/org/wamblee/xml/DomUtils.java b/support/src/org/wamblee/xml/DomUtils.java index 4fdb52e7..5651e341 100644 --- a/support/src/org/wamblee/xml/DomUtils.java +++ b/support/src/org/wamblee/xml/DomUtils.java @@ -12,88 +12,240 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ + */ package org.wamblee.xml; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.stream.StreamSource; +import javax.xml.validation.Schema; +import javax.xml.validation.SchemaFactory; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.xml.serialize.OutputFormat; import org.apache.xml.serialize.XMLSerializer; import org.dom4j.DocumentException; import org.dom4j.io.DOMReader; import org.dom4j.io.DOMWriter; +import org.w3c.dom.Attr; import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import org.xml.sax.SAXException; +import com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl; +import com.sun.org.apache.xerces.internal.jaxp.validation.xs.SchemaFactoryImpl; + /** - * Some basic XML utilities for common reoccuring tasks for - * DOM documents. + * Some basic XML utilities for common reoccuring tasks for DOM documents. */ public final class DomUtils { - + + private static final Log LOG = LogFactory.getLog(DomUtils.class); + + /** + * Disabled default constructor. + * + */ + private DomUtils() { + // Empty. + } + /** - * Disabled default constructor. - * + * Parses an XML document from a string. + * + * @param aDocument + * document. + * @return */ - private DomUtils() { - // Empty. + public static Document read(String aDocument) throws XMLException { + ByteArrayInputStream is = new ByteArrayInputStream(aDocument.getBytes()); + return read(is); } - - + /** - * Parses an XML document from a stream. - * @param aIs Input stream. + * Parses an XML document from a stream. + * + * @param aIs + * Input stream. * @return */ - public static Document read(InputStream aIs) throws SAXException, ParserConfigurationException, IOException { - DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); - return builder.parse(aIs); + public static Document read(InputStream aIs) throws XMLException { + try { + DocumentBuilder builder = DocumentBuilderFactory.newInstance() + .newDocumentBuilder(); + return builder.parse(aIs); + } catch (SAXException e) { + throw new XMLException(e.getMessage(), e); + } catch (IOException e) { + throw new XMLException(e.getMessage(), e); + } catch (ParserConfigurationException e) { + throw new XMLException(e.getMessage(), e); + } finally { + try { + aIs.close(); + } catch (Exception e) { + LOG.warn("Error closing XML file", e); + } + } + } + + /** + * Reads and validates a document against a schema. + * + * @param aIs + * Input stream. + * @param aSchema + * Schema. + * @return Parsed and validated document. + */ + public static Document readAndValidate(InputStream aIs, InputStream aSchema) + throws XMLException { + + try { + final Schema schema = SchemaFactory.newInstance( + XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema( + new StreamSource(aSchema)); + + final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setValidating(true); + factory.setNamespaceAware(true); + factory.setSchema(schema); + + return factory.newDocumentBuilder().parse(aIs); + } catch (SAXException e) { + throw new XMLException(e.getMessage(), e); + } catch (IOException e) { + throw new XMLException(e.getMessage(), e); + } catch (ParserConfigurationException e) { + throw new XMLException(e.getMessage(), e); + } finally { + try { + aSchema.close(); + } catch (Exception e) { + LOG.warn("Error closing schema", e); + } + try { + aIs.close(); + } catch (Exception e) { + LOG.warn("Error closing XML file", e); + } + } + } /** - * Serializes an XML document to a stream. - * @param aDocument Document to serialize. - * @param aOs Output stream. + * Serializes an XML document to a stream. + * + * @param aDocument + * Document to serialize. + * @param aOs + * Output stream. */ - public static void serialize(Document aDocument, OutputStream aOs) throws IOException { + public static void serialize(Document aDocument, OutputStream aOs) + throws IOException { XMLSerializer serializer = new XMLSerializer(aOs, new OutputFormat()); serializer.serialize(aDocument); } - + /** - * Serializes an XML document. - * @param aDocument Document to serialize. - * @return Serialized document. + * Serializes an XML document. + * + * @param aDocument + * Document to serialize. + * @return Serialized document. */ public static String serialize(Document aDocument) throws IOException { ByteArrayOutputStream os = new ByteArrayOutputStream(); - serialize(aDocument, os); - return os.toString(); + serialize(aDocument, os); + return os.toString(); } - + /** - * Converts a dom4j document into a w3c DOM document. - * @param aDocument Document to convert. - * @return W3C DOM document. + * Converts a dom4j document into a w3c DOM document. + * + * @param aDocument + * Document to convert. + * @return W3C DOM document. */ - public static Document convert(org.dom4j.Document aDocument) throws DocumentException { + public static Document convert(org.dom4j.Document aDocument) + throws DocumentException { return new DOMWriter().write(aDocument); } /** - * Converts a W3C DOM document into a dom4j document. - * @param aDocument Document to convert. + * Converts a W3C DOM document into a dom4j document. + * + * @param aDocument + * Document to convert. * @return Dom4j document. */ - public static org.dom4j.Document convert(Document aDocument) { - return new DOMReader().read(aDocument); + public static org.dom4j.Document convert(Document aDocument) { + return new DOMReader().read(aDocument); + } + + /** + * Removes duplicate attributes from a DOM tree.This is useful for + * postprocessing the output of JTidy as a workaround for a bug in JTidy. + * + * @param aNode + * Node to remove duplicate attributes from (recursively). + * Attributes of the node itself are not dealt with. Only the + * child nodes are dealt with. + */ + public static void removeDuplicateAttributes(Node aNode) { + NodeList list = aNode.getChildNodes(); + for (int i = 0; i < list.getLength(); i++) { + Node node = list.item(i); + if (node instanceof Element) { + removeDuplicateAttributes((Element) node); + removeDuplicateAttributes(node); + } + } + } + + /** + * Removes duplicate attributes from an element. + * + * @param aElement + * Element. + */ + private static void removeDuplicateAttributes(Element aElement) { + NamedNodeMap attributes = aElement.getAttributes(); + Map uniqueAttributes = new TreeMap(); + List attlist = new ArrayList(); + for (int i = 0; i < attributes.getLength(); i++) { + Attr attribute = (Attr) attributes.item(i); + if (uniqueAttributes.containsKey(attribute.getNodeName())) { + LOG.info("Detected duplicate attribute (will be removed)'" + + attribute.getNodeName() + "'"); + } + uniqueAttributes.put(attribute.getNodeName(), attribute); + attlist.add(attribute); + } + // Remove all attributes from the element. + for (Attr att : attlist) { + aElement.removeAttributeNode(att); + } + // Add the unique attributes back to the element. + for (Attr att : uniqueAttributes.values()) { + aElement.setAttributeNode(att); + } } }