001: package ca.flop.jpublish.wiki;
002:
003: /*
004:
005: This is Textile
006: A Humane Web Text Generator
007:
008: Original PHP Version
009: Version 1.0
010: 21 Feb, 2003
011:
012: Copyright (c) 2003, Dean Allen, www.textism.com
013: All rights reserved.
014:
015: This java version by Gareth Simpson
016: 1.0 April 2003
017: 1.1 mid 2004
018: 1.2 March 2006
019: _______
020: LICENSE
021:
022: Redistribution and use in source and binary forms, with or without
023: modification, are permitted provided that the following conditions are met:
024:
025: * Redistributions of source code must retain the above copyright notice,
026: this list of conditions and the following disclaimer.
027:
028: * Redistributions in binary form must reproduce the above copyright notice,
029: this list of conditions and the following disclaimer in the documentation
030: and/or other materials provided with the distribution.
031:
032: * Neither the name Textile nor the names of its contributors may be used to
033: endorse or promote products derived from this software without specific
034: prior written permission.
035:
036: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
037: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
038: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
039: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
040: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
041: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
042: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
043: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
044: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
045: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
046: POSSIBILITY OF SUCH DAMAGE.
047:
048: _____________
049: USING TEXTILE
050:
051: Block modifier syntax:
052:
053: Header: hn.
054: Paragraphs beginning with 'hn. ' (where n is 1-6) are wrapped in header tags.
055: Example: <h1>Text</h1>
056:
057: Header with CSS class: hn(class).
058: Paragraphs beginning with 'hn(class). ' receive a CSS class attribute.
059: Example: <h1 class="class">Text</h1>
060:
061: Paragraph: p. (applied by default)
062: Paragraphs beginning with 'p. ' are wrapped in paragraph tags.
063: Example: <p>Text</p>
064:
065: Paragraph with CSS class: p(class).
066: Paragraphs beginning with 'p(class). ' receive a CSS class attribute.
067: Example: <p class="class">Text</p>
068:
069: Blockquote: bq.
070: Paragraphs beginning with 'bq. ' are wrapped in block quote tags.
071: Example: <blockquote>Text</blockquote>
072:
073: Blockquote with citation: bq(citeurl).
074: Paragraphs beginning with 'bq(citeurl). ' recieve a citation attribute.
075: Example: <blockquote cite="citeurl">Text</blockquote>
076:
077: Numeric list: #
078: Consecutive paragraphs beginning with # are wrapped in ordered list tags.
079: Example: <ol><li>ordered list</li></ol>
080:
081: Bulleted list: *
082: Consecutive paragraphs beginning with * are wrapped in unordered list tags.
083: Example: <ul><li>unordered list</li></ul>
084:
085:
086: Phrase modifier syntax:
087:
088: _emphasis_ <em>emphasis</em>
089: __italic__ <i>italic</i>
090: *strong* <strong>strong</strong>
091: **bold** <b>bold</b>
092: ??citation?? <cite>citation</cite>
093: -deleted text- <del>deleted</del>
094: +inserted text+ <ins>inserted</ins>
095: ^superscript^ <sup>superscript</sup>
096: ~subscript~ <sub>subscript</sub>
097: @code@ <code>computer code</code>
098:
099: ==notextile== leave text alone (do not format)
100:
101: "linktext":url <a href="url">linktext</a>
102: "linktext(title)":url <a href="url" title="title">linktext</a>
103:
104: !imageurl! <img src="imageurl">
105: !imageurl(alt text)! <img src="imageurl" alt="alt text" />
106: !imageurl!:linkurl <a href="linkurl"><img src="imageurl" /></a>
107:
108: ABC(Always Be Closing) <acronym title="Always Be Closing">ABC</acronym>
109:
110: */
111:
112: import java.util.ArrayList;
113:
114: public class JTextile {
115: private static final int ENT_COMPAT = 0;
116: private static final int ENT_NOQUOTES = 2;
117: private static final int ENT_QUOTES = 3;
118:
119: public JTextile() {
120: }
121:
122: public static String textile(String text) throws Exception {
123:
124: //$text = stripslashes($text);
125:
126: //# turn any incoming ampersands into a dummy character for now.
127: //# This uses a negative lookahead for alphanumerics followed by a semicolon,
128: //# implying an incoming html entity, to be skipped
129: text = preg_replace("&(?![#a-zA-Z0-9]+;)", "x%x%", text);
130:
131: //# unentify angle brackets and ampersands
132: text = replace(text, ">", ">");
133: text = replace(text, "<", "<");
134: text = replace(text, "&", "&");
135:
136: //# zap carriage returns
137: text = replace(text, "\r\n", "\n");
138:
139: //# zap tabs
140: text = replace(text, "\t", "");
141:
142: // trim each line
143: StringBuffer splitBuffer = new StringBuffer();
144:
145: String[] sList = text.split("/\n/");
146: for (int i = 0; i < sList.length; i++) {
147: splitBuffer.append(sList[i].trim());
148: splitBuffer.append("\n");
149: }
150:
151: text = splitBuffer.toString();
152:
153: //### Find and replace quick tags
154:
155: //# double equal signs mean <notextile>
156: text = preg_replace("(^|\\s)==(.*?)==([^\\w]{0,2})",
157: "$1<notextile>$2</notextile>$3$4", text);
158:
159: //# image qtag
160: text = preg_replace("!([^!\\s\\(=]+?)\\s?(\\(([^\\)]+?)\\))?!",
161: "<img src=\"$1\" alt=\"$3\" />", text);
162:
163: //# image with hyperlink
164: text = preg_replace("(<img.+ \\/>):(\\S+)",
165: "<a href=\"$2\">$1</a>", text);
166:
167: //# hyperlink qtag
168: text = preg_replace(
169: "\"([^\"\\(]+)\\s?(\\(([^\\)]+)\\))?\":(\\S+?)([^\\w\\s\\/;]|[1-9]*?)(\\s|$)",
170: "<a href=\"$4\" title=\"$3\">$1</a>$5$6", text);
171:
172: //# arrange qtag delineators and replacements in an array
173: String[] srcTags = { "\\*\\*", "\\*", "\\?\\?", "-", "\\+",
174: "~", "@" };
175: String[] replaceTags = { "b", "strong", "cite", "del", "ins",
176: "sub", "code" };
177:
178: //# loop through the array, replacing qtags with html
179: for (int i = 0; i < srcTags.length; i++) {
180: //text = preg_replace("(^|\\s|>)" + srcTags[i] + "\\b(.+?)\\b([^\\w\\s]*?)" + srcTags[i] + "([^\\w\\s]{0,2})(\\s|$)","$1<" + replaceTags[i] + ">$2$3</" + replaceTags[i] + ">$4$5",text);
181: text = preg_replace("(^|\\s|>)" + srcTags[i]
182: + "([^ ])(.+?)?([^\\w\\s]*?)([^ ])" + srcTags[i]
183: + "([^\\w\\s]{0,2})(\\s|$)", "$1<" + replaceTags[i]
184: + ">$2$3$4$5</" + replaceTags[i] + ">$6$7", text);
185: }
186:
187: //# some weird bs with underscores and \b word boundaries,
188: //# so we'll do those on their own
189:
190: text = preg_replace("(^|\\s)__(.*?)__([^\\w\\s]{0,2})",
191: "$1<i>$2</i>$3", text);
192:
193: text = preg_replace("(^|\\s)_(.*?)_([^\\w\\s]{0,2})",
194: "$1<em>$2</em>$3", text);
195:
196: text = preg_replace("\\^(.*?)\\^", "<sup>$1</sup>", text);
197:
198: // ### Find and replace typographic chars and special tags
199:
200: //# small problem with double quotes at the end of a string
201:
202: text = preg_replace("\"$", "\" ", text);
203:
204: //# NB: all these will wreak havoc inside <html> tags
205:
206: String[] glyph_search = {
207: "([^\\s[{<])?\\'([dmst]\\b|ll\\b|ve\\b|\\s|$)", // single closing
208: "\\'", // single opening
209: "([^\\s[{])?\"(\\s|$)", // # double closing
210: "\"", // double opening
211: "\\b( )?\\.{3}", // # ellipsis
212: "\\b([A-Z][A-Z0-9]{2,})\\b(\\(([^\\)]+)\\))", // # 3+ uppercase acronym
213: "(^|[^\"][>\\s])([A-Z][A-Z0-9 ]{2,})([^<a-z0-9]|$)", // # 3+ uppercase caps
214: "\\s?--\\s?", // # em dash
215: "\\s-\\s", // # en dash
216: "(\\d+)-(\\d+)", // # en dash
217: "(\\d+) ?x ?(\\d+)", //# dimension sign
218: "\\b ?(\\((tm|TM)\\))", // trademark
219: "\\b ?(\\([rR]\\))", // # registered
220: "\\b ?(\\([cC]\\))" // # registered
221: };
222:
223: String[] glyph_replace = { "$1’$2", //# single closing
224: "‘", //# single opening
225: "$1”$2", //# double closing
226: "“", //# double opening
227: "$1…", //# ellipsis
228: "<acronym title=\"$2\">$1</acronym>", //# 3+ uppercase acronym
229: //"$1<span class=\"caps\">$2</span>$3", //# 3+ uppercase caps
230: "$1$2$3", //# 3+ uppercase caps
231: "—", //# em dash
232: " – ", //# en dash
233: "$1–$2", //# en dash
234: "$1×$2", //# dimension sign
235: "™", //# trademark
236: "®", //# registered
237: "©" //# copyright
238: };
239:
240: // # set toggle for turning off replacements between <code> or <pre>
241: boolean codepre = false;
242: boolean notextile = false;
243:
244: //# if there is no html, do a simple search and replace
245:
246: if (!preg_match("<.[^<]*>", text)) {
247: text = preg_replace(glyph_search, glyph_replace, text);
248: } else {
249:
250: StringBuffer out = new StringBuffer();
251: //# else split the text into an array at <.*>
252: //$text = preg_split("/(<.*>)/U",$text,-1,PREG_SPLIT_DELIM_CAPTURE);
253: String[] textSplit = preg_split("<.[^<]*>", text);
254: for (int i = 0; i < textSplit.length; i++) {
255:
256: // # matches are off if we're between <code>, <pre> etc.
257: if (preg_match("<(code|pre|kbd)>", textSplit[i]
258: .toLowerCase())) {
259: codepre = true;
260: }
261: if (preg_match("<notextile>", textSplit[i]
262: .toLowerCase())) {
263: codepre = true;
264: notextile = true;
265: } else if (preg_match("</(code|pre|kbd)>", textSplit[i]
266: .toLowerCase())) {
267: codepre = false;
268: } else if (preg_match("</notextile>", textSplit[i]
269: .toLowerCase())) {
270: codepre = false;
271: notextile = false;
272: }
273:
274: if (!preg_match("<.[^<]*?>", textSplit[i])
275: && codepre == false) {
276: textSplit[i] = preg_replace(glyph_search,
277: glyph_replace, textSplit[i]);
278: }
279:
280: //# convert htmlspecial if between <code>
281: if (codepre == true && notextile == false) {
282: textSplit[i] = htmlspecialchars(textSplit[i],
283: ENT_NOQUOTES);
284: textSplit[i] = replace(textSplit[i], "<pre>",
285: "<pre>");
286: textSplit[i] = replace(textSplit[i],
287: "<code>", "<code>");
288: textSplit[i] = replace(textSplit[i],
289: "<notextile>", "<notextile>");
290: }
291:
292: if (notextile == true) {
293: textSplit[i] = replace(textSplit[i], "\n", "({)(})");
294: }
295:
296: //# each line gets pushed to a new array
297: out.append(textSplit[i]);
298: }
299:
300: text = out.toString();
301:
302: }
303:
304: //### Block level formatting
305:
306: //# deal with forced breaks; this is going to be a problem between
307: //# <pre> tags, but we'll clean them later
308:
309: //////!!! not working
310: //text = preg_replace("(\\S)(_*)([[:punct:]]*) *\n([^#*\\s])", "$1$2$3<br />$4", text);
311: //text = preg_replace("(\\S)(_*)([:punct:]*) *\\n([^#*\\s])", "$1$2$3<br />$4", text);
312:
313: text = preg_replace("(\\S)(_*)([:punct:]*) *\\n([^#*\\s])",
314: "$1$2$3<br />$4", text);
315:
316: //# might be a problem with lists
317: text = replace(text, "l><br />", "l>\n");
318:
319: boolean pre = false;
320:
321: String[] block_find = { "^\\s?\\*\\s(.*)", //# bulleted list *
322: "^\\s?#\\s(.*)", //# numeric list #
323: "^bq\\. (.*)", //# blockquote bq.
324: "^bq\\((\\S+?)\\). (.*)", //# blockquote bq(cite-url).
325: "^h(\\d)\\(([\\w]+)\\)\\.\\s(.*)", //# header hn(class). w/ css class
326: "^h(\\d)\\. (.*)", //# plain header hn.
327: "^p\\(([[:alnum:]]+)\\)\\.\\s(.*)", //# para p(class). w/ css class
328: "^p\\. (.*)", //# plain paragraph
329: "^([^\\t ]+.*)" //# remaining plain paragraph
330: };
331:
332: /*
333: String[] block_find = {
334: "/^\\s?\\*\\s(.*)/", // # bulleted list *
335: "/^\\s?#\\s(.*)/", // # numeric list #
336: "/^bq\\. (.*)/", // # blockquote bq.
337: "/^h(\\d)\\(([[:alnum:]]+)\\)\\.\\s(.*)/", // # header hn(class). w/ css class
338: "/^h(\\d)\\. (.*)/", // # plain header hn.
339: "/^p\\(([[:alnum:]]+)\\)\\.\\s(.*)/", // # para p(class). w/ css class
340: "/^p\\. (.*)/i", // # plain paragraph
341: "/^([^\\t ]+.*)/i" // # remaining plain paragraph
342: };
343: */
344: String[] block_replace = { "\t<liu>$1</liu>$2",
345: "\t<lio>$1</lio>$2", "\t<blockquote>$1</blockquote>$2",
346: "\t<blockquote cite=\"$1\">$2</blockquote>$3",
347: "\t<h$1 class=\"$2\">$3</h$1>$4", "\t<h$1>$2</h$1>$3",
348: "\t<p class=\"$1\">$2</p>$3", "\t<p>$1</p>",
349: "\t<p>$1</p>$2" };
350:
351: StringBuffer blockBuffer = new StringBuffer();
352:
353: String list = "";
354:
355: // This done to ensure that lists close after themselves
356: text += " \n";
357:
358: //# split the text into an array by newlines
359: String[] bList = text.split("\n");
360: for (int i = 0; i <= bList.length; i++) {
361: String line = " ";
362: if (i < bList.length)
363: line = bList[i];
364:
365: //#make sure the line isn't blank
366: if (true || line.length() > 0) // actually i think we want blank lines
367: {
368:
369: //# matches are off if we're between <pre> or <code> tags
370: if (line.toLowerCase().indexOf("<pre>") > -1) {
371: pre = true;
372: }
373:
374: //# deal with block replacements first, then see if we're in a list
375: if (!pre) {
376: line = preg_replace(block_find, block_replace, line);
377: }
378:
379: //# kill any br tags that slipped in earlier
380: if (pre == true) {
381: line = replace(line, "<br />", "\n");
382: }
383:
384: //# matches back on after </pre>
385: if (line.toLowerCase().indexOf("</pre>") > -1) {
386: pre = false;
387: }
388:
389: //# at the beginning of a list, $line switches to a value
390: if (list.length() == 0 && preg_match("\\t<li", line)) {
391: line = preg_replace("^(\\t<li)(o|u)",
392: "\n<$2l>\n$1$2", line);
393: list = line.substring(2, 3);
394: }
395: //# at the end of a list, $line switches to empty
396: else if (list.length() > 0
397: && !preg_match("\\t<li" + list, line)) {
398: line = preg_replace("^(.*)$", "</" + list
399: + "l>\n$1", line);
400: list = "";
401: }
402: }
403: // push each line to a new array once it's processed
404: blockBuffer.append(line);
405: blockBuffer.append("\n");
406:
407: }
408: text = blockBuffer.toString();
409:
410: //#clean up <notextile>
411: text = preg_replace("<\\/?notextile>", "", text);
412:
413: //#clean up <notextile>
414: text = replace(text, "({)(})", "\n");
415:
416: //# clean up liu and lio
417: text = preg_replace("<(\\/?)li(u|o)>", "<$1li>", text);
418:
419: //# turn the temp char back to an ampersand entity
420: text = replace(text, "x%x%", "&");
421:
422: //# Newline linebreaks, just for markup tidiness
423: text = replace(text, "<br />", "<br />\n");
424:
425: return text;
426: }
427:
428: /**
429: * Does just that.
430: *
431: * @param source The string to start with
432: * @param searchFor The string we are looking for
433: * @param replaceWith The replacement
434: * @return The reformatted string
435: */
436: private static String replace(String source, String searchFor,
437: String replaceWith) {
438: if (source == null || "".equals(source)) {
439: return source;
440: }
441:
442: if (replaceWith == null) {
443: return source;
444: }
445:
446: if ("".equals(searchFor)) {
447: return source;
448: }
449:
450: int s = 0;
451: int e = 0;
452: StringBuffer result = new StringBuffer();
453:
454: while ((e = source.indexOf(searchFor, s)) >= 0) {
455: result.append(source.substring(s, e));
456: result.append(replaceWith);
457: s = e + searchFor.length();
458: }
459: result.append(source.substring(s));
460: return result.toString();
461:
462: }
463:
464: private static String htmlspecialchars(String text, int mode) {
465: text = replace(text, "&", "&");
466: if (mode != ENT_NOQUOTES)
467: text = replace(text, "\"", """);
468: if (mode == ENT_QUOTES)
469: text = replace(text, "'", "'");
470: text = replace(text, "<", "<");
471: text = replace(text, ">", ">");
472: return text;
473: }
474:
475: private static String preg_replace(String pattern, String replace,
476: String text) throws Exception {
477: gnu.regexp.RE r = new gnu.regexp.RE(pattern);
478: return r.substituteAll(text, replace);
479: }
480:
481: private static String preg_replace(String[] pattern,
482: String[] replace, String text) throws Exception {
483: for (int i = 0; i < pattern.length; i++) {
484: text = preg_replace(pattern[i], replace[i], text);
485: }
486: return text;
487: }
488:
489: private static boolean preg_match(String pattern, String text)
490: throws Exception {
491: gnu.regexp.RE r = new gnu.regexp.RE(pattern);
492: return r.getMatch(text) != null;
493: }
494:
495: private static String[] preg_split(String pattern, String text)
496: throws Exception {
497:
498: int startAt = 0;
499: ArrayList tempList = new ArrayList();
500:
501: gnu.regexp.RE r = new gnu.regexp.RE(pattern);
502:
503: gnu.regexp.REMatch match = r.getMatch(text);
504:
505: while (match != null) {
506: String beforeMatch = text.substring(startAt, match
507: .getStartIndex());
508: tempList.add(beforeMatch);
509: tempList.add(match.toString());
510: startAt = match.getEndIndex();
511: match = r.getMatch(text, startAt);
512: }
513:
514: tempList.add(text.substring(startAt));
515:
516: // copy out our templist to an array of strings which is what we return
517: String[] ret = new String[tempList.size()];
518:
519: for (int i = 0; i < ret.length; i++) {
520: ret[i] = (String) tempList.get(i);
521: }
522:
523: return ret;
524:
525: }
526:
527: }
|