001: /*******************************************************************************
002: * Copyright (c) 2000, 2006 IBM Corporation and others.
003: * All rights reserved. This program and the accompanying materials
004: * are made available under the terms of the Eclipse Public License v1.0
005: * which accompanies this distribution, and is available at
006: * http://www.eclipse.org/legal/epl-v10.html
007: *
008: * Contributors:
009: * IBM Corporation - initial API and implementation
010: *******************************************************************************/package org.eclipse.ui.internal.navigator;
011:
012: import java.util.Vector;
013:
014: /**
015: * A string pattern matcher, suppporting "*" and "?" wildcards.
016: */
017: public class StringMatcher {
018: protected String fPattern;
019:
020: protected int fLength; // pattern length
021:
022: protected boolean fIgnoreWildCards;
023:
024: protected boolean fIgnoreCase;
025:
026: protected boolean fHasLeadingStar;
027:
028: protected boolean fHasTrailingStar;
029:
030: protected String fSegments[]; // the given pattern is split into *
031:
032: // separated segments
033:
034: /* boundary value beyond which we don't need to search in the text */
035: protected int fBound = 0;
036:
037: protected static final char fSingleWildCard = '\u0000';
038:
039: /**
040: *
041: */
042: static class Position {
043: int start; // inclusive
044:
045: int end; // exclusive
046:
047: Position(int start, int end) {
048: this .start = start;
049: this .end = end;
050: }
051:
052: int getStart() {
053: return start;
054: }
055:
056: int getEnd() {
057: return end;
058: }
059: }
060:
061: /**
062: * StringMatcher constructor takes in a String object that is a simple
063: * pattern which may contain '*' for 0 and many characters and '?' for
064: * exactly one character.
065: *
066: * Literal '*' and '?' characters must be escaped in the pattern e.g., "\*"
067: * means literal "*", etc.
068: *
069: * Escaping any other character (including the escape character itself),
070: * just results in that character in the pattern. e.g., "\a" means "a" and
071: * "\\" means "\"
072: *
073: * If invoking the StringMatcher with string literals in Java, don't forget
074: * escape characters are represented by "\\".
075: *
076: * @param pattern
077: * the pattern to match text against
078: * @param ignoreCase
079: * if true, case is ignored
080: * @param ignoreWildCards
081: * if true, wild cards and their escape sequences are ignored
082: * (everything is taken literally).
083: */
084: public StringMatcher(String pattern, boolean ignoreCase,
085: boolean ignoreWildCards) {
086: if (pattern == null) {
087: throw new IllegalArgumentException();
088: }
089: fIgnoreCase = ignoreCase;
090: fIgnoreWildCards = ignoreWildCards;
091: fPattern = pattern;
092: fLength = pattern.length();
093:
094: if (fIgnoreWildCards) {
095: parseNoWildCards();
096: } else {
097: parseWildCards();
098: }
099: }
100:
101: /**
102: * Find the first occurrence of the pattern between
103: * <code>start</code)(inclusive)
104: * and <code>end</code>(exclusive).
105: * @param text the String object to search in
106: * @param start the starting index of the search range, inclusive
107: * @param end the ending index of the search range, exclusive
108: * @return an <code>StringMatcher.Position</code> object that keeps the starting
109: * (inclusive) and ending positions (exclusive) of the first occurrence of the
110: * pattern in the specified range of the text; return null if not found or subtext
111: * is empty (start==end). A pair of zeros is returned if pattern is empty string
112: * Note that for pattern like "*abc*" with leading and trailing stars, position of "abc"
113: * is returned. For a pattern like"*??*" in text "abcdf", (1,3) is returned
114: */
115: public StringMatcher.Position find(String text, int start, int end) {
116: if (text == null) {
117: throw new IllegalArgumentException();
118: }
119:
120: int tlen = text.length();
121: if (start < 0) {
122: start = 0;
123: }
124: if (end > tlen) {
125: end = tlen;
126: }
127: if (end < 0 || start >= end) {
128: return null;
129: }
130: if (fLength == 0) {
131: return new Position(start, start);
132: }
133: if (fIgnoreWildCards) {
134: int x = posIn(text, start, end);
135: if (x < 0) {
136: return null;
137: }
138: return new Position(x, x + fLength);
139: }
140:
141: int segCount = fSegments.length;
142: if (segCount == 0) {
143: return new Position(start, end);
144: }
145:
146: int curPos = start;
147: int matchStart = -1;
148: int i;
149: for (i = 0; i < segCount && curPos < end; ++i) {
150: String current = fSegments[i];
151: int nextMatch = regExpPosIn(text, curPos, end, current);
152: if (nextMatch < 0) {
153: return null;
154: }
155: if (i == 0) {
156: matchStart = nextMatch;
157: }
158: curPos = nextMatch + current.length();
159: }
160: if (i < segCount) {
161: return null;
162: }
163: return new Position(matchStart, curPos);
164: }
165:
166: /**
167: * match the given <code>text</code> with the pattern
168: *
169: * @return true if matched eitherwise false
170: * @param text
171: * a String object
172: */
173: public boolean match(String text) {
174: if (text == null) {
175: return false;
176: }
177: return match(text, 0, text.length());
178: }
179:
180: /**
181: * Given the starting (inclusive) and the ending (exclusive) positions in
182: * the <code>text</code>, determine if the given substring matches with
183: * aPattern
184: *
185: * @return true if the specified portion of the text matches the pattern
186: * @param text
187: * a String object that contains the substring to match
188: * @param start
189: * marks the starting position (inclusive) of the substring
190: * @param end
191: * marks the ending index (exclusive) of the substring
192: */
193: public boolean match(String text, int start, int end) {
194: if (null == text) {
195: throw new IllegalArgumentException();
196: }
197:
198: if (start > end) {
199: return false;
200: }
201:
202: if (fIgnoreWildCards) {
203: return (end - start == fLength)
204: && fPattern.regionMatches(fIgnoreCase, 0, text,
205: start, fLength);
206: }
207: int segCount = fSegments.length;
208: if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) {
209: // contains
210: // only
211: // '*'(s)
212: return true;
213: }
214: if (start == end) {
215: return fLength == 0;
216: }
217: if (fLength == 0) {
218: return start == end;
219: }
220:
221: int tlen = text.length();
222: if (start < 0) {
223: start = 0;
224: }
225: if (end > tlen) {
226: end = tlen;
227: }
228:
229: int tCurPos = start;
230: int bound = end - fBound;
231: if (bound < 0) {
232: return false;
233: }
234: int i = 0;
235: String current = fSegments[i];
236: int segLength = current.length();
237:
238: /* process first segment */
239: if (!fHasLeadingStar) {
240: if (!regExpRegionMatches(text, start, current, 0, segLength)) {
241: return false;
242: }
243: ++i;
244: tCurPos = tCurPos + segLength;
245:
246: }
247: if ((fSegments.length == 1) && (!fHasLeadingStar)
248: && (!fHasTrailingStar)) {
249: // only one segment to match, no wildcards specified
250: return tCurPos == end;
251: }
252: /* process middle segments */
253: while (i < segCount) {
254: current = fSegments[i];
255: int currentMatch;
256: int k = current.indexOf(fSingleWildCard);
257: if (k < 0) {
258: currentMatch = textPosIn(text, tCurPos, end, current);
259: if (currentMatch < 0) {
260: return false;
261: }
262: } else {
263: currentMatch = regExpPosIn(text, tCurPos, end, current);
264: if (currentMatch < 0) {
265: return false;
266: }
267: }
268: tCurPos = currentMatch + current.length();
269: i++;
270: }
271:
272: /* process final segment */
273: if (!fHasTrailingStar && tCurPos != end) {
274: int clen = current.length();
275: return regExpRegionMatches(text, end - clen, current, 0,
276: clen);
277: }
278: return i == segCount;
279: }
280:
281: /**
282: * This method parses the given pattern into segments seperated by wildcard
283: * '*' characters. Since wildcards are not being used in this case, the
284: * pattern consists of a single segment.
285: */
286: private void parseNoWildCards() {
287: fSegments = new String[1];
288: fSegments[0] = fPattern;
289: fBound = fLength;
290: }
291:
292: /**
293: * Parses the given pattern into segments seperated by wildcard '*'
294: * characters.
295: *
296: */
297: private void parseWildCards() {
298: if (fPattern.startsWith("*")) { //$NON-NLS-1$
299: fHasLeadingStar = true;
300: }
301: if (fPattern.endsWith("*")) {//$NON-NLS-1$
302: /* make sure it's not an escaped wildcard */
303: if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') {
304: fHasTrailingStar = true;
305: }
306: }
307:
308: Vector temp = new Vector();
309:
310: int pos = 0;
311: StringBuffer buf = new StringBuffer();
312: while (pos < fLength) {
313: char c = fPattern.charAt(pos++);
314: switch (c) {
315: case '\\':
316: if (pos >= fLength) {
317: buf.append(c);
318: } else {
319: char next = fPattern.charAt(pos++);
320: /* if it's an escape sequence */
321: if (next == '*' || next == '?' || next == '\\') {
322: buf.append(next);
323: } else {
324: /* not an escape sequence, just insert literally */
325: buf.append(c);
326: buf.append(next);
327: }
328: }
329: break;
330: case '*':
331: if (buf.length() > 0) {
332: /* new segment */
333: temp.addElement(buf.toString());
334: fBound += buf.length();
335: buf.setLength(0);
336: }
337: break;
338: case '?':
339: /* append special character representing single match wildcard */
340: buf.append(fSingleWildCard);
341: break;
342: default:
343: buf.append(c);
344: }
345: }
346:
347: /* add last buffer to segment list */
348: if (buf.length() > 0) {
349: temp.addElement(buf.toString());
350: fBound += buf.length();
351: }
352:
353: fSegments = new String[temp.size()];
354: temp.copyInto(fSegments);
355: }
356:
357: /**
358: * @param text
359: * a string which contains no wildcard
360: * @param start
361: * the starting index in the text for search, inclusive
362: * @param end
363: * the stopping point of search, exclusive
364: * @return the starting index in the text of the pattern , or -1 if not
365: * found
366: */
367: protected int posIn(String text, int start, int end) {// no wild card in
368: // pattern
369: int max = end - fLength;
370:
371: if (!fIgnoreCase) {
372: int i = text.indexOf(fPattern, start);
373: if (i == -1 || i > max) {
374: return -1;
375: }
376: return i;
377: }
378:
379: for (int i = start; i <= max; ++i) {
380: if (text.regionMatches(true, i, fPattern, 0, fLength)) {
381: return i;
382: }
383: }
384:
385: return -1;
386: }
387:
388: /**
389: * @param text
390: * a simple regular expression that may only contain '?'(s)
391: * @param start
392: * the starting index in the text for search, inclusive
393: * @param end
394: * the stopping point of search, exclusive
395: * @param p
396: * a simple regular expression that may contains '?'
397: * @return the starting index in the text of the pattern , or -1 if not
398: * found
399: */
400: protected int regExpPosIn(String text, int start, int end, String p) {
401: int plen = p.length();
402:
403: int max = end - plen;
404: for (int i = start; i <= max; ++i) {
405: if (regExpRegionMatches(text, i, p, 0, plen)) {
406: return i;
407: }
408: }
409: return -1;
410: }
411:
412: /**
413: *
414: * @return boolean
415: * @param text
416: * a String to match
417: * @param tStart
418: * indicates the starting index of match, inclusive
419: * @param p
420: * a simple regular expression that may contain '?'
421: * @param pStart
422: * @param plen
423: */
424: protected boolean regExpRegionMatches(String text, int tStart,
425: String p, int pStart, int plen) {
426: while (plen-- > 0) {
427: char tchar = text.charAt(tStart++);
428: char pchar = p.charAt(pStart++);
429:
430: /* process wild cards */
431: if (!fIgnoreWildCards) {
432: /* skip single wild cards */
433: if (pchar == fSingleWildCard) {
434: continue;
435: }
436: }
437: if (pchar == tchar) {
438: continue;
439: }
440: if (fIgnoreCase) {
441: if (Character.toUpperCase(tchar) == Character
442: .toUpperCase(pchar)) {
443: continue;
444: }
445: // comparing after converting to upper case doesn't handle all
446: // cases;
447: // also compare after converting to lower case
448: if (Character.toLowerCase(tchar) == Character
449: .toLowerCase(pchar)) {
450: continue;
451: }
452: }
453: return false;
454: }
455: return true;
456: }
457:
458: /**
459: * @param text
460: * the string to match
461: * @param start
462: * the starting index in the text for search, inclusive
463: * @param end
464: * the stopping point of search, exclusive
465: * @param p
466: * a string that has no wildcard
467: * @return the starting index in the text of the pattern , or -1 if not
468: * found
469: */
470: protected int textPosIn(String text, int start, int end, String p) {
471:
472: int plen = p.length();
473: int max = end - plen;
474:
475: if (!fIgnoreCase) {
476: int i = text.indexOf(p, start);
477: if (i == -1 || i > max) {
478: return -1;
479: }
480: return i;
481: }
482:
483: for (int i = start; i <= max; ++i) {
484: if (text.regionMatches(true, i, p, 0, plen)) {
485: return i;
486: }
487: }
488:
489: return -1;
490: }
491: }
|