001: /*
002: * Copyright 2001-2006 C:1 Financial Services GmbH
003: *
004: * This software is free software; you can redistribute it and/or
005: * modify it under the terms of the GNU Lesser General Public
006: * License Version 2.1, as published by the Free Software Foundation.
007: *
008: * This software is distributed in the hope that it will be useful,
009: * but WITHOUT ANY WARRANTY; without even the implied warranty of
010: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
011: * Lesser General Public License for more details.
012: *
013: * You should have received a copy of the GNU Lesser General Public
014: * License along with this library; if not, write to the Free Software
015: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
016: */
017:
018: package de.finix.contelligent.search;
019:
020: import gnu.regexp.RE;
021:
022: import java.util.StringTokenizer;
023:
024: /**
025: * This is the implementation of a simple query language:
026: * <ul>
027: * <li>only words are allowed, all special Lucene characters are interpreted as
028: * whitespace</li>
029: * <li>+/- are allowed to force a word to be included/not included in the
030: * document</li>
031: * <li>wildcard-search is supported by appending * to a term</li>
032: * </ul>
033: */
034: public class SimpleSearchQueryBuilder implements SearchQueryBuilder {
035:
036: RE regularExpression;
037:
038: public String buildQuery(String queryString) {
039: StringBuffer buffer = new StringBuffer();
040: StringTokenizer tokenizer = new StringTokenizer(queryString);
041:
042: while (tokenizer.hasMoreTokens()) {
043: String token = tokenizer.nextToken();
044: char first = token.charAt(0);
045: char last = token.charAt(token.length() - 1);
046: boolean keepFirst = false;
047: boolean keepLast = false;
048:
049: if (first == '-' || first == '+') {
050: keepFirst = true;
051: }
052: if (last == '*') {
053: keepLast = true;
054: }
055: buffer
056: .append(
057: replaceSpecialCharacters(token, keepFirst,
058: keepLast)).append(' ');
059: }
060: return checkQuotes(buffer.toString().trim());
061: }
062:
063: private String checkQuotes(String query) {
064: int quoteCount = 0;
065:
066: for (int i = 0; i < query.length(); i++) {
067: if (query.charAt(i) == '"') {
068: quoteCount++;
069: }
070: }
071: if (quoteCount % 2 != 0) {
072: return query + '"';
073: }
074: return query;
075: }
076:
077: private String replaceSpecialCharacters(String token,
078: boolean keepFirst, boolean keepLast) {
079: StringBuffer buffer = new StringBuffer(token.length());
080: int start = 0;
081: int end = token.length();
082:
083: if (keepFirst) {
084: start = 1;
085: buffer.append(token.charAt(0));
086: }
087: if (keepLast) {
088: end = end - 1;
089: }
090:
091: for (int i = start; i < end; i++) {
092: char c = token.charAt(i);
093: if (Character.isLetterOrDigit(c) || c == '"') {
094: buffer.append(Character.toLowerCase(c));
095: }
096: }
097: if (keepLast) {
098: buffer.append(token.charAt(token.length() - 1));
099: }
100: return buffer.toString();
101: }
102: }
|