Source Code Cross Referenced for KeywordSearchUtil.java in  » ERP-CRM-Financial » ofbiz » org » ofbiz » common » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » ERP CRM Financial » ofbiz » org.ofbiz.common 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*******************************************************************************
002:         * Licensed to the Apache Software Foundation (ASF) under one
003:         * or more contributor license agreements.  See the NOTICE file
004:         * distributed with this work for additional information
005:         * regarding copyright ownership.  The ASF licenses this file
006:         * to you under the Apache License, Version 2.0 (the
007:         * "License"); you may not use this file except in compliance
008:         * with the License.  You may obtain a copy of the License at
009:         * 
010:         * http://www.apache.org/licenses/LICENSE-2.0
011:         * 
012:         * Unless required by applicable law or agreed to in writing,
013:         * software distributed under the License is distributed on an
014:         * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015:         * KIND, either express or implied.  See the License for the
016:         * specific language governing permissions and limitations
017:         * under the License.
018:         *******************************************************************************/package org.ofbiz.common;
019:
020:        import java.util.HashMap;
021:        import java.util.HashSet;
022:        import java.util.Iterator;
023:        import java.util.List;
024:        import java.util.Map;
025:        import java.util.Set;
026:        import java.util.StringTokenizer;
027:        import java.util.TreeSet;
028:
029:        import org.ofbiz.base.util.Debug;
030:        import org.ofbiz.base.util.UtilMisc;
031:        import org.ofbiz.base.util.UtilProperties;
032:        import org.ofbiz.base.util.UtilValidate;
033:        import org.ofbiz.entity.GenericDelegator;
034:        import org.ofbiz.entity.GenericEntityException;
035:        import org.ofbiz.entity.GenericValue;
036:
037:        /**
038:         * A few utility methods related to Keyword Search.
039:         */
040:        public class KeywordSearchUtil {
041:
042:            public static final String module = KeywordSearchUtil.class
043:                    .getName();
044:
045:            public static Set thesaurusRelsToInclude = new HashSet();
046:            public static Set thesaurusRelsForReplace = new HashSet();
047:
048:            static {
049:                thesaurusRelsToInclude.add("KWTR_UF");
050:                thesaurusRelsToInclude.add("KWTR_USE");
051:                thesaurusRelsToInclude.add("KWTR_CS");
052:                thesaurusRelsToInclude.add("KWTR_NT");
053:                thesaurusRelsToInclude.add("KWTR_BT");
054:                thesaurusRelsToInclude.add("KWTR_RT");
055:
056:                thesaurusRelsForReplace.add("KWTR_USE");
057:                thesaurusRelsForReplace.add("KWTR_CS");
058:            }
059:
060:            public static String getSeparators() {
061:                // String separators = ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_";
062:                String seps = UtilProperties.getPropertyValue("keywordsearch",
063:                        "index.keyword.separators",
064:                        ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_");
065:                return seps;
066:            }
067:
068:            public static String getStopWordBagOr() {
069:                return UtilProperties.getPropertyValue("keywordsearch",
070:                        "stop.word.bag.or");
071:            }
072:
073:            public static String getStopWordBagAnd() {
074:                return UtilProperties.getPropertyValue("keywordsearch",
075:                        "stop.word.bag.and");
076:            }
077:
078:            public static boolean getRemoveStems() {
079:                String removeStemsStr = UtilProperties.getPropertyValue(
080:                        "keywordsearch", "remove.stems");
081:                return "true".equals(removeStemsStr);
082:            }
083:
084:            public static Set getStemSet() {
085:                String stemBag = UtilProperties.getPropertyValue(
086:                        "keywordsearch", "stem.bag");
087:                Set stemSet = new TreeSet();
088:                if (UtilValidate.isNotEmpty(stemBag)) {
089:                    String curToken;
090:                    StringTokenizer tokenizer = new StringTokenizer(stemBag,
091:                            ": ");
092:                    while (tokenizer.hasMoreTokens()) {
093:                        curToken = tokenizer.nextToken();
094:                        stemSet.add(curToken);
095:                    }
096:                }
097:                return stemSet;
098:            }
099:
100:            public static void processForKeywords(String str, Map keywords,
101:                    boolean forSearch, boolean anyPrefix, boolean anySuffix,
102:                    boolean isAnd) {
103:                String separators = getSeparators();
104:                String stopWordBagOr = getStopWordBagOr();
105:                String stopWordBagAnd = getStopWordBagAnd();
106:
107:                boolean removeStems = getRemoveStems();
108:                Set stemSet = getStemSet();
109:
110:                processForKeywords(str, keywords, separators, stopWordBagAnd,
111:                        stopWordBagOr, removeStems, stemSet, forSearch,
112:                        anyPrefix, anySuffix, isAnd);
113:            }
114:
115:            public static void processKeywordsForIndex(String str,
116:                    Map keywords, String separators, String stopWordBagAnd,
117:                    String stopWordBagOr, boolean removeStems, Set stemSet) {
118:                processForKeywords(str, keywords, separators, stopWordBagAnd,
119:                        stopWordBagOr, removeStems, stemSet, false, false,
120:                        false, false);
121:            }
122:
123:            public static void processForKeywords(String str, Map keywords,
124:                    String separators, String stopWordBagAnd,
125:                    String stopWordBagOr, boolean removeStems, Set stemSet,
126:                    boolean forSearch, boolean anyPrefix, boolean anySuffix,
127:                    boolean isAnd) {
128:                Set keywordSet = makeKeywordSet(str, separators, forSearch);
129:                fixupKeywordSet(keywordSet, keywords, stopWordBagAnd,
130:                        stopWordBagOr, removeStems, stemSet, forSearch,
131:                        anyPrefix, anySuffix, isAnd);
132:            }
133:
134:            public static void fixupKeywordSet(Set keywordSet, Map keywords,
135:                    String stopWordBagAnd, String stopWordBagOr,
136:                    boolean removeStems, Set stemSet, boolean forSearch,
137:                    boolean anyPrefix, boolean anySuffix, boolean isAnd) {
138:                if (keywordSet == null) {
139:                    return;
140:                }
141:
142:                Iterator keywordIter = keywordSet.iterator();
143:                while (keywordIter.hasNext()) {
144:                    String token = (String) keywordIter.next();
145:
146:                    // when cleaning up the tokens the ordering is inportant: check stop words, remove stems, then get rid of 1 character tokens (1 digit okay)
147:
148:                    // check stop words
149:                    String colonToken = ":" + token + ":";
150:                    if (forSearch) {
151:                        if ((isAnd && stopWordBagAnd.indexOf(colonToken) >= 0)
152:                                || (!isAnd && stopWordBagOr.indexOf(colonToken) >= 0)) {
153:                            continue;
154:                        }
155:                    } else {
156:                        if (stopWordBagOr.indexOf(colonToken) >= 0
157:                                && stopWordBagAnd.indexOf(colonToken) >= 0) {
158:                            continue;
159:                        }
160:                    }
161:
162:                    // remove stems
163:                    if (removeStems) {
164:                        Iterator stemIter = stemSet.iterator();
165:                        while (stemIter.hasNext()) {
166:                            String stem = (String) stemIter.next();
167:                            if (token.endsWith(stem)) {
168:                                token = token.substring(0, token.length()
169:                                        - stem.length());
170:                            }
171:                        }
172:                    }
173:
174:                    // get rid of all length 0 tokens now
175:                    if (token.length() == 0) {
176:                        continue;
177:                    }
178:
179:                    // get rid of all length 1 character only tokens, pretty much useless
180:                    if (token.length() == 1
181:                            && Character.isLetter(token.charAt(0))) {
182:                        continue;
183:                    }
184:
185:                    if (forSearch) {
186:                        StringBuffer strSb = new StringBuffer();
187:                        if (anyPrefix)
188:                            strSb.append('%');
189:                        strSb.append(token);
190:                        if (anySuffix)
191:                            strSb.append('%');
192:                        // replace all %% with %
193:                        int dblPercIdx = -1;
194:                        while ((dblPercIdx = strSb.indexOf("%%")) >= 0) {
195:                            //Debug.logInfo("before strSb: " + strSb, module);
196:                            strSb.replace(dblPercIdx, dblPercIdx + 2, "%");
197:                            //Debug.logInfo("after strSb: " + strSb, module);
198:                        }
199:                        token = strSb.toString();
200:                    }
201:
202:                    // group by word, add up weight
203:                    Long curWeight = (Long) keywords.get(token);
204:                    if (curWeight == null) {
205:                        keywords.put(token, new Long(1));
206:                    } else {
207:                        keywords
208:                                .put(token, new Long(curWeight.longValue() + 1));
209:                    }
210:                }
211:            }
212:
213:            public static Set makeKeywordSet(String str, String separators,
214:                    boolean forSearch) {
215:                if (separators == null)
216:                    separators = getSeparators();
217:
218:                Set keywords = new TreeSet();
219:                if (str.length() > 0) {
220:                    // strip off weird characters
221:                    str = str.replaceAll("\\\302\\\240|\\\240", " ");
222:
223:                    if (forSearch) {
224:                        // remove %_*? from separators if is for a search
225:                        StringBuffer sb = new StringBuffer(separators);
226:                        if (sb.indexOf("%") >= 0)
227:                            sb.deleteCharAt(sb.indexOf("%"));
228:                        if (sb.indexOf("_") >= 0)
229:                            sb.deleteCharAt(sb.indexOf("_"));
230:                        if (sb.indexOf("*") >= 0)
231:                            sb.deleteCharAt(sb.indexOf("*"));
232:                        if (sb.indexOf("?") >= 0)
233:                            sb.deleteCharAt(sb.indexOf("?"));
234:                        separators = sb.toString();
235:                    }
236:
237:                    StringTokenizer tokener = new StringTokenizer(str,
238:                            separators, false);
239:                    while (tokener.hasMoreTokens()) {
240:                        // make sure it is lower case before doing anything else
241:                        String token = tokener.nextToken().toLowerCase();
242:
243:                        if (forSearch) {
244:                            // these characters will only be present if it is for a search, ie not for indexing
245:                            token = token.replace('*', '%');
246:                            token = token.replace('?', '_');
247:                        }
248:
249:                        keywords.add(token);
250:                    }
251:                }
252:                return keywords;
253:            }
254:
255:            public static Set fixKeywordsForSearch(Set keywordSet,
256:                    boolean anyPrefix, boolean anySuffix, boolean removeStems,
257:                    boolean isAnd) {
258:                Map keywords = new HashMap();
259:                fixupKeywordSet(keywordSet, keywords, getStopWordBagAnd(),
260:                        getStopWordBagOr(), removeStems, getStemSet(), true,
261:                        anyPrefix, anySuffix, isAnd);
262:                return keywords.keySet();
263:            }
264:
265:            public static boolean expandKeywordForSearch(String enteredKeyword,
266:                    Set addToSet, GenericDelegator delegator) {
267:                boolean replaceEnteredKeyword = false;
268:
269:                try {
270:                    List thesaurusList = delegator.findByAndCache(
271:                            "KeywordThesaurus", UtilMisc.toMap(
272:                                    "enteredKeyword", enteredKeyword));
273:                    Iterator thesaurusIter = thesaurusList.iterator();
274:                    while (thesaurusIter.hasNext()) {
275:                        GenericValue keywordThesaurus = (GenericValue) thesaurusIter
276:                                .next();
277:                        String relationshipEnumId = (String) keywordThesaurus
278:                                .get("relationshipEnumId");
279:                        if (thesaurusRelsToInclude.contains(relationshipEnumId)) {
280:                            addToSet
281:                                    .addAll(makeKeywordSet(keywordThesaurus
282:                                            .getString("alternateKeyword"),
283:                                            null, true));
284:                            if (thesaurusRelsForReplace
285:                                    .contains(relationshipEnumId)) {
286:                                replaceEnteredKeyword = true;
287:                            }
288:                        }
289:                    }
290:                } catch (GenericEntityException e) {
291:                    Debug
292:                            .logError(e, "Error expanding entered keyword",
293:                                    module);
294:                }
295:
296:                Debug.logInfo("Expanded keyword [" + enteredKeyword
297:                        + "], got set: " + addToSet, module);
298:                return replaceEnteredKeyword;
299:            }
300:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.