001: package net.sf.saxon.event;
002:
003: import net.sf.saxon.charcode.CharacterSet;
004: import net.sf.saxon.charcode.CharacterSetFactory;
005: import net.sf.saxon.om.FastStringBuffer;
006: import net.sf.saxon.om.XMLChar;
007: import net.sf.saxon.tinytree.CharSlice;
008: import net.sf.saxon.trans.XPathException;
009:
010: import javax.xml.transform.OutputKeys;
011: import java.util.Properties;
012: import java.util.Stack;
013: import java.util.StringTokenizer;
014:
015: /**
016: * CDATAFilter: This ProxyEmitter converts character data to CDATA sections,
017: * if the character data belongs to one of a set of element types to be handled this way.
018: *
019: * @author Michael Kay
020: */
021:
022: public class CDATAFilter extends ProxyReceiver {
023:
024: private FastStringBuffer buffer = new FastStringBuffer(256);
025: private Stack stack = new Stack();
026: private int[] nameList; // fingerprints of cdata elements
027: private CharacterSet characterSet;
028:
029: /**
030: * Set the properties for this CDATA filter
031: */
032:
033: public void setOutputProperties(Properties details)
034: throws XPathException {
035: nameList = getCdataElements(details);
036: characterSet = CharacterSetFactory.getCharacterSet(details,
037: getPipelineConfiguration().getController());
038: }
039:
040: /**
041: * Output element start tag
042: */
043:
044: public void startElement(int nameCode, int typeCode,
045: int locationId, int properties) throws XPathException {
046: flush(buffer);
047: stack.push(new Integer(nameCode & 0xfffff));
048: super .startElement(nameCode, typeCode, locationId, properties);
049: }
050:
051: /**
052: * Output element end tag
053: */
054:
055: public void endElement() throws XPathException {
056: flush(buffer);
057: stack.pop();
058: super .endElement();
059: }
060:
061: /**
062: * Output a processing instruction
063: */
064:
065: public void processingInstruction(String target, CharSequence data,
066: int locationId, int properties) throws XPathException {
067: flush(buffer);
068: super .processingInstruction(target, data, locationId,
069: properties);
070: }
071:
072: /**
073: * Output character data
074: */
075:
076: public void characters(CharSequence chars, int locationId,
077: int properties) throws XPathException {
078:
079: if ((properties & ReceiverOptions.DISABLE_ESCAPING) == 0) {
080: buffer.append(chars.toString());
081: } else {
082: // if the user requests disable-output-escaping, this overrides the CDATA request. We end
083: // the CDATA section and output the characters as supplied.
084: flush(buffer);
085: super .characters(chars, locationId, properties);
086: }
087: }
088:
089: /**
090: * Output a comment
091: */
092:
093: public void comment(CharSequence chars, int locationId,
094: int properties) throws XPathException {
095: flush(buffer);
096: super .comment(chars, locationId, properties);
097: }
098:
099: /**
100: * Flush the buffer containing accumulated character data,
101: * generating it as CDATA where appropriate
102: */
103:
104: public void flush(FastStringBuffer buffer) throws XPathException {
105: boolean cdata;
106: int end = buffer.length();
107: if (end == 0)
108: return;
109:
110: if (stack.isEmpty()) {
111: cdata = false; // text is not part of any element
112: } else {
113: int fprint = ((Integer) stack.peek()).intValue();
114: cdata = isCDATA(fprint);
115: }
116:
117: if (cdata) {
118:
119: // Check that the buffer doesn't include a character not available in the current
120: // encoding
121:
122: int start = 0;
123: int k = 0;
124: while (k < end) {
125: int next = buffer.charAt(k);
126: int skip = 1;
127: if (XMLChar.isHighSurrogate((char) next)) {
128: next = XMLChar.supplemental((char) next, buffer
129: .charAt(k + 1));
130: skip = 2;
131: }
132: if (characterSet.inCharset(next)) {
133: k++;
134: } else {
135:
136: // flush out the preceding characters as CDATA
137:
138: char[] array = new char[k - start];
139: buffer.getChars(start, k, array, 0);
140: flushCDATA(array, k - start);
141:
142: while (k < end) {
143: // output consecutive non-encodable characters
144: // before restarting the CDATA section
145: //super.characters(CharBuffer.wrap(buffer, k, k+skip), 0, 0);
146: super .characters(buffer
147: .subSequence(k, k + skip), 0, 0);
148: // was: (..., ReceiverOptions.DISABLE_ESCAPING);
149: k += skip;
150: if (k >= end) {
151: break;
152: }
153: next = buffer.charAt(k);
154: skip = 1;
155: if (XMLChar.isHighSurrogate((char) next)) {
156: next = XMLChar.supplemental((char) next,
157: buffer.charAt(k + 1));
158: skip = 2;
159: }
160: if (characterSet.inCharset(next)) {
161: break;
162: }
163: }
164: start = k;
165: }
166: }
167: char[] rest = new char[end - start];
168: buffer.getChars(start, end, rest, 0);
169: flushCDATA(rest, end - start);
170:
171: } else {
172: // char[] array = new char[end];
173: // buffer.getChars(0, end, array, 0);
174: // super.characters(CharBuffer.wrap(array, 0, end), 0, 0);
175: super .characters(buffer, 0, 0);
176: }
177:
178: buffer.setLength(0);
179:
180: }
181:
182: /**
183: * Output an array as a CDATA section. At this stage we have checked that all the characters
184: * are OK, but we haven't checked that there is no "]]>" sequence in the data
185: */
186:
187: private void flushCDATA(char[] array, int len)
188: throws XPathException {
189: if (len == 0) {
190: return;
191: }
192: super .characters("<![CDATA[", 0,
193: ReceiverOptions.DISABLE_ESCAPING);
194:
195: // Check that the character data doesn't include the substring "]]>"
196:
197: int i = 0;
198: int doneto = 0;
199: while (i < len - 2) {
200: if (array[i] == ']' && array[i + 1] == ']'
201: && array[i + 2] == '>') {
202: super .characters(new CharSlice(array, doneto, i + 2
203: - doneto), 0, ReceiverOptions.DISABLE_ESCAPING);
204: super .characters("]]><![CDATA[", 0,
205: ReceiverOptions.DISABLE_ESCAPING);
206: doneto = i + 2;
207: }
208: i++;
209: }
210: super .characters(new CharSlice(array, doneto, len - doneto), 0,
211: ReceiverOptions.DISABLE_ESCAPING);
212: super .characters("]]>", 0, ReceiverOptions.DISABLE_ESCAPING);
213: }
214:
215: /**
216: * See if a particular element is a CDATA element
217: */
218:
219: private boolean isCDATA(int fingerprint) {
220: for (int i = 0; i < nameList.length; i++) {
221: if (nameList[i] == fingerprint)
222: return true;
223: }
224: return false;
225: }
226:
227: /**
228: * Extract the list of CDATA elements from the output properties
229: */
230:
231: private int[] getCdataElements(Properties details) {
232: String cdata = details
233: .getProperty(OutputKeys.CDATA_SECTION_ELEMENTS);
234: if (cdata == null) {
235: // this doesn't happen, but there's no harm allowing for it
236: return new int[0];
237: }
238: // first count the number of names in the list
239: int count = 0;
240: StringTokenizer st1 = new StringTokenizer(cdata);
241: while (st1.hasMoreTokens()) {
242: st1.nextToken();
243: count++;
244: }
245: int[] array = new int[count];
246: count = 0;
247: StringTokenizer st2 = new StringTokenizer(cdata);
248: while (st2.hasMoreTokens()) {
249: String expandedName = st2.nextToken();
250: array[count++] = getNamePool()
251: .getFingerprintForExpandedName(expandedName);
252: }
253: return array;
254: }
255:
256: };
257:
258: //
259: // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
260: // you may not use this file except in compliance with the License. You may obtain a copy of the
261: // License at http://www.mozilla.org/MPL/
262: //
263: // Software distributed under the License is distributed on an "AS IS" basis,
264: // WITHOUT WARRANTY OF ANY KIND, either express or implied.
265: // See the License for the specific language governing rights and limitations under the License.
266: //
267: // The Original Code is: all this file.
268: //
269: // The Initial Developer of the Original Code is Michael H. Kay.
270: //
271: // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
272: //
273: // Contributor(s): none.
274: //
|