001: /*
002: * Copyright 2004 Outerthought bvba and Schaubroeck nv
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License"); you may not
005: * use this file except in compliance with the License. You may obtain a copy of
006: * the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
012: * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
013: * License for the specific language governing permissions and limitations under
014: * the License.
015: */
016: package org.outerj.daisy.frontend.editor;
017:
018: import org.apache.cocoon.forms.datatype.convertor.Convertor;
019: import org.apache.cocoon.forms.datatype.convertor.ConversionResult;
020: import org.apache.cocoon.forms.validation.ValidationError;
021: import org.apache.commons.lang.exception.ExceptionUtils;
022: import org.xml.sax.ContentHandler;
023: import org.xml.sax.SAXException;
024: import org.xml.sax.InputSource;
025: import org.cyberneko.html.parsers.SAXParser;
026:
027: import javax.xml.transform.OutputKeys;
028: import javax.xml.transform.sax.SAXTransformerFactory;
029: import javax.xml.transform.sax.TransformerHandler;
030: import javax.xml.transform.stream.StreamResult;
031: import java.util.Locale;
032: import java.io.StringReader;
033: import java.io.StringWriter;
034:
035: /**
036: * Cleans HTML to well-formed XML using NekoHTML.
037: */
038: public class LiteralHtmlCleaningConvertor implements Convertor {
039:
040: public ConversionResult convertFromString(String text,
041: Locale locale, FormatCache formatCache) {
042: try {
043: InputSource is = new InputSource();
044: is.setCharacterStream(new StringReader(text));
045:
046: SAXParser parser = new SAXParser();
047: parser.setFeature("http://xml.org/sax/features/namespaces",
048: true);
049: parser
050: .setFeature(
051: "http://cyberneko.org/html/features/override-namespaces",
052: false);
053: parser
054: .setFeature(
055: "http://cyberneko.org/html/features/insert-namespaces",
056: false);
057: parser
058: .setFeature(
059: "http://cyberneko.org/html/features/scanner/ignore-specified-charset",
060: true);
061: parser
062: .setProperty(
063: "http://cyberneko.org/html/properties/default-encoding",
064: "UTF-8");
065: parser.setProperty(
066: "http://cyberneko.org/html/properties/names/elems",
067: "lower");
068: parser.setProperty(
069: "http://cyberneko.org/html/properties/names/attrs",
070: "lower");
071:
072: // TODO creating a sax transformer factory is probably expensive?
073: SAXTransformerFactory transformerFactory = (SAXTransformerFactory) SAXTransformerFactory
074: .newInstance();
075: TransformerHandler serializer = transformerFactory
076: .newTransformerHandler();
077:
078: serializer.getTransformer().setOutputProperty(
079: OutputKeys.METHOD, "xml");
080: serializer.getTransformer().setOutputProperty(
081: OutputKeys.OMIT_XML_DECLARATION, "yes");
082:
083: StringWriter writer = new StringWriter();
084: serializer.setResult(new StreamResult(writer));
085:
086: parser.setContentHandler(serializer);
087: parser.setProperty(
088: "http://xml.org/sax/properties/lexical-handler",
089: serializer);
090: parser.parse(is);
091:
092: return new ConversionResult(writer.toString());
093: } catch (Throwable e) {
094: Throwable t = ExceptionUtils.getRootCause(e);
095: if (t == null)
096: t = e;
097: String message = t.getMessage();
098: if (message == null)
099: message = t.toString();
100: ValidationError validationError = new ValidationError(
101: message, false);
102: return new ConversionResult(validationError);
103:
104: }
105: }
106:
107: public String convertToString(Object object, Locale locale,
108: FormatCache formatCache) {
109: return (String) object;
110: }
111:
112: public Class getTypeClass() {
113: return java.lang.String.class;
114: }
115:
116: public void generateSaxFragment(ContentHandler contentHandler,
117: Locale locale) throws SAXException {
118: // nothing to say about me
119: }
120: }
|