001: /******************************************************************************
002: * JBoss, a division of Red Hat *
003: * Copyright 2006, Red Hat Middleware, LLC, and individual *
004: * contributors as indicated by the @authors tag. See the *
005: * copyright.txt in the distribution for a full listing of *
006: * individual contributors. *
007: * *
008: * This is free software; you can redistribute it and/or modify it *
009: * under the terms of the GNU Lesser General Public License as *
010: * published by the Free Software Foundation; either version 2.1 of *
011: * the License, or (at your option) any later version. *
012: * *
013: * This software is distributed in the hope that it will be useful, *
014: * but WITHOUT ANY WARRANTY; without even the implied warranty of *
015: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
016: * Lesser General Public License for more details. *
017: * *
018: * You should have received a copy of the GNU Lesser General Public *
019: * License along with this software; if not, write to the Free *
020: * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA *
021: * 02110-1301 USA, or see the FSF site: http://www.fsf.org. *
022: ******************************************************************************/package org.jboss.portal.format.util;
023:
024: /** This table provides lookup for converting a char to its HTML entity representation. */
025: public class EntityTable {
026:
027: /** All HTML entities. */
028: public static final EntityTable FULL = new EntityTable();
029:
030: /** All HTML entities except the HTML chars which are used to do HTML itself. */
031: public static final EntityTable BASIC = new BasicEntityTable();
032:
033: private static class BasicEntityTable extends EntityTable {
034: public BasicEntityTable() {
035: remove('<');
036: remove('>');
037: remove('"');
038: remove('&');
039: }
040: }
041:
042: private String[] charToEntity = new String[65536];
043:
044: protected EntityTable() {
045: put(160, "nbsp");
046: put(161, "iexcl");
047: put(162, "cent");
048: put(163, "pound");
049: put(164, "curren");
050: put(165, "yen");
051: put(166, "brvbar");
052: put(167, "sect");
053: put(168, "uml");
054: put(169, "copy");
055: put(170, "ordf");
056: put(171, "laquo");
057: put(172, "not");
058: put(173, "shy");
059: put(174, "reg");
060: put(175, "macr");
061: put(176, "deg");
062: put(177, "plusmn");
063: put(178, "sup2");
064: put(179, "sup3");
065: put(180, "acute");
066: put(181, "micro");
067: put(182, "para");
068: put(183, "middot");
069: put(184, "cedil");
070: put(185, "sup1");
071: put(186, "ordm");
072: put(187, "raquo");
073: put(188, "frac14");
074: put(189, "frac12");
075: put(190, "frac34");
076: put(191, "iquest");
077: put(192, "Agrave");
078: put(193, "Aacute");
079: put(194, "Acirc");
080: put(195, "Atilde");
081: put(196, "Auml");
082: put(197, "Aring");
083: put(198, "AElig");
084: put(199, "Ccedil");
085: put(200, "Egrave");
086: put(201, "Eacute");
087: put(202, "Ecirc");
088: put(203, "Euml");
089: put(204, "Igrave");
090: put(205, "Iacute");
091: put(206, "Icirc");
092: put(207, "Iuml");
093: put(208, "ETH");
094: put(209, "Ntilde");
095: put(210, "Ograve");
096: put(211, "Oacute");
097: put(212, "Ocirc");
098: put(213, "Otilde");
099: put(214, "Ouml");
100: put(215, "times");
101: put(216, "Oslash");
102: put(217, "Ugrave");
103: put(218, "Uacute");
104: put(219, "Ucirc");
105: put(220, "Uuml");
106: put(221, "Yacute");
107: put(222, "THORN");
108: put(223, "szlig");
109: put(224, "agrave");
110: put(225, "aacute");
111: put(226, "acirc");
112: put(227, "atilde");
113: put(228, "auml");
114: put(229, "aring");
115: put(230, "aelig");
116: put(231, "ccedil");
117: put(232, "egrave");
118: put(233, "eacute");
119: put(234, "ecirc");
120: put(235, "euml");
121: put(236, "igrave");
122: put(237, "iacute");
123: put(238, "icirc");
124: put(239, "iuml");
125: put(240, "eth");
126: put(241, "ntilde");
127: put(242, "ograve");
128: put(243, "oacute");
129: put(244, "ocirc");
130: put(245, "otilde");
131: put(246, "ouml");
132: put(247, "divide");
133: put(248, "oslash");
134: put(249, "ugrave");
135: put(250, "uacute");
136: put(251, "ucirc");
137: put(252, "uuml");
138: put(253, "yacute");
139: put(254, "thorn");
140: put(255, "yuml");
141: put(402, "fnof");
142: put(913, "Alpha");
143: put(914, "Beta");
144: put(915, "Gamma");
145: put(916, "Delta");
146: put(917, "Epsilon");
147: put(918, "Zeta");
148: put(919, "Eta");
149: put(920, "Theta");
150: put(921, "Iota");
151: put(922, "Kappa");
152: put(923, "Lambda");
153: put(924, "Mu");
154: put(925, "Nu");
155: put(926, "Xi");
156: put(927, "Omicron");
157: put(928, "Pi");
158: put(929, "Rho");
159: put(931, "Sigma");
160: put(932, "Tau");
161: put(933, "Upsilon");
162: put(934, "Phi");
163: put(935, "Chi");
164: put(936, "Psi");
165: put(937, "Omega");
166: put(945, "alpha");
167: put(946, "beta");
168: put(947, "gamma");
169: put(948, "delta");
170: put(949, "epsilon");
171: put(950, "zeta");
172: put(951, "eta");
173: put(952, "theta");
174: put(953, "iota");
175: put(954, "kappa");
176: put(955, "lambda");
177: put(956, "mu");
178: put(957, "nu");
179: put(958, "xi");
180: put(959, "omicron");
181: put(960, "pi");
182: put(961, "rho");
183: put(962, "sigmaf");
184: put(963, "sigma");
185: put(964, "tau");
186: put(965, "upsilon");
187: put(966, "phi");
188: put(967, "chi");
189: put(968, "psi");
190: put(969, "omega");
191: put(977, "thetasym");
192: put(978, "upsih");
193: put(982, "piv");
194: put(8226, "bull");
195: put(8230, "hellip");
196: put(8242, "prime");
197: put(8243, "Prime");
198: put(8254, "oline");
199: put(8260, "frasl");
200: put(8472, "weierp");
201: put(8465, "image");
202: put(8476, "real");
203: put(8482, "trade");
204: put(8501, "alefsym");
205: put(8592, "larr");
206: put(8593, "uarr");
207: put(8594, "rarr");
208: put(8595, "darr");
209: put(8596, "harr");
210: put(8629, "crarr");
211: put(8656, "lArr");
212: put(8657, "uArr");
213: put(8658, "rArr");
214: put(8659, "dArr");
215: put(8660, "hArr");
216: put(8704, "forall");
217: put(8706, "part");
218: put(8707, "exist");
219: put(8709, "empty");
220: put(8711, "nabla");
221: put(8712, "isin");
222: put(8713, "notin");
223: put(8715, "ni");
224: put(8719, "prod");
225: put(8721, "sum");
226: put(8722, "minus");
227: put(8727, "lowast");
228: put(8730, "radic");
229: put(8733, "prop");
230: put(8734, "infin");
231: put(8736, "ang");
232: put(8743, "and");
233: put(8744, "or");
234: put(8745, "cap");
235: put(8746, "cup");
236: put(8747, "int");
237: put(8756, "there4");
238: put(8764, "sim");
239: put(8773, "cong");
240: put(8776, "asymp");
241: put(8800, "ne");
242: put(8801, "equiv");
243: put(8804, "le");
244: put(8805, "ge");
245: put(8834, "sub");
246: put(8835, "sup");
247: put(8836, "nsub");
248: put(8838, "sube");
249: put(8839, "supe");
250: put(8853, "oplus");
251: put(8855, "otimes");
252: put(8869, "perp");
253: put(8901, "sdot");
254: put(8968, "lceil");
255: put(8969, "rceil");
256: put(8970, "lfloor");
257: put(8971, "rfloor");
258: put(9001, "lang");
259: put(9002, "rang");
260: put(9674, "loz");
261: put(9824, "spades");
262: put(9827, "clubs");
263: put(9829, "hearts");
264: put(9830, "diams");
265: put(34, "quot");
266: put(38, "amp");
267: put(60, "lt");
268: put(62, "gt");
269: put(338, "OElig");
270: put(339, "oelig");
271: put(352, "Scaron");
272: put(353, "scaron");
273: put(376, "Yuml");
274: put(710, "circ");
275: put(732, "tilde");
276: put(8194, "ensp");
277: put(8195, "emsp");
278: put(8201, "thinsp");
279: put(8204, "zwnj");
280: put(8205, "zwj");
281: put(8206, "lrm");
282: put(8207, "rlm");
283: put(8211, "ndash");
284: put(8212, "mdash");
285: put(8216, "lsquo");
286: put(8217, "rsquo");
287: put(8218, "sbquo");
288: put(8220, "ldquo");
289: put(8221, "rdquo");
290: put(8222, "bdquo");
291: put(8224, "dagger");
292: put(8225, "Dagger");
293: put(8240, "permil");
294: put(8249, "lsaquo");
295: put(8250, "rsaquo");
296: put(8364, "euro");
297: }
298:
299: protected final void put(int c, String entity) {
300: charToEntity[c] = entity;
301: }
302:
303: protected final void remove(int c) {
304: charToEntity[c] = null;
305: }
306:
307: /** Returns null if no entity is found or return the converted entity. */
308: public final String lookup(char c) {
309: return charToEntity[c];
310: }
311:
312: public final String convertEntities(String txt) {
313: // Get the chars it's faster
314: char[] chars = txt.toCharArray();
315:
316: // The new result if any
317: StringBuffer result = null;
318:
319: // The index of the last copied char
320: int previous = 0;
321:
322: // Perform lookup char by char
323: for (int current = 0; current < chars.length; current++) {
324: // Lookup
325: String replacement = lookup(chars[current]);
326:
327: // Do we have a replacement
328: if (replacement != null) {
329: // We lazy create the result
330: if (result == null) {
331: // Allocate 1/2 more than the current txt size
332: result = new StringBuffer(txt.length() * 3 / 2);
333: }
334: // Append the previous chars if any
335: result.append(chars, previous, current - previous);
336: // Append the replaced entity
337: result.append('&').append(replacement).append(';');
338: // Update the previous pointer
339: previous = current + 1;
340: }
341: }
342:
343: // If we have a result we need to complete it
344: if (result != null) {
345: result.append(chars, previous, chars.length - previous);
346: return result.toString();
347: } else {
348: return txt;
349: }
350: }
351: }
|