Source Code Cross Referenced for Index.java in  » Content-Management-System » hippo » org » apache » slide » index » lucene » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Content Management System » hippo » org.apache.slide.index.lucene 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         * $Header$
003:         * $Revision: 7966 $
004:         * $Date: 2007-08-23 05:23:06 -0700 $
005:         *
006:         * ====================================================================
007:         *
008:         * Copyright 1999-2004 The Apache Software Foundation
009:         *
010:         * Licensed under the Apache License, Version 2.0 (the "License");
011:         * you may not use this file except in compliance with the License.
012:         * You may obtain a copy of the License at
013:         *
014:         *     http://www.apache.org/licenses/LICENSE-2.0
015:         *
016:         * Unless required by applicable law or agreed to in writing, software
017:         * distributed under the License is distributed on an "AS IS" BASIS,
018:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
019:         * See the License for the specific language governing permissions and
020:         * limitations under the License.
021:         *
022:         */
023:        package org.apache.slide.index.lucene;
024:
025:        import java.io.ByteArrayInputStream;
026:        import java.io.File;
027:        import java.io.IOException;
028:        import java.io.Reader;
029:        import java.text.DecimalFormat;
030:        import java.text.SimpleDateFormat;
031:        import java.util.Date;
032:        import java.util.Enumeration;
033:        import java.util.Iterator;
034:        import java.util.LinkedList;
035:        import java.util.Locale;
036:        import java.util.StringTokenizer;
037:
038:        import nl.hippo.slide.extractor.LanguageSpecificContentExtractor;
039:        import nl.hippo.slide.index.analysis.SimpleStandardAnalyzer;
040:
041:        import org.apache.avalon.framework.logger.Logger;
042:        import org.apache.lucene.document.Document;
043:        import org.apache.lucene.document.Field;
044:        import org.apache.lucene.index.IndexReader;
045:        import org.apache.lucene.index.IndexWriter;
046:        import org.apache.lucene.search.BooleanQuery;
047:        import org.apache.lucene.search.IndexSearcher;
048:        import org.apache.lucene.store.Directory;
049:        import org.apache.lucene.store.FSDirectory;
050:        import org.apache.slide.content.NodeProperty;
051:        import org.apache.slide.content.NodeRevisionDescriptor;
052:        import org.apache.slide.content.RevisionNotFoundException;
053:        import org.apache.slide.extractor.ContentExtractor;
054:        import org.apache.slide.extractor.ExtractorException;
055:        import org.apache.slide.search.IndexException;
056:
057:        /**
058:         * Wrapper for Lucene index.
059:         */
060:        public class Index {
061:            public static final String KEY_FIELD_NAME = "SLIDE_KEY";
062:            public static final String URI_FIELD_NAME = "SLIDE_URI";
063:            public static final String SCOPE_FIELD_NAME = "SLIDE_SCOPE";
064:            public static final String DEPTH_FIELD_NAME = "SLIDE_DEPTH";
065:            public static final String VERSION_FIELD_NAME = "SLIDE_VERSION";
066:            public static final String IS_DEFINED_FIELD_NAME = "SLIDE_ISDEFINED";
067:            public static final String CONTENT_FIELD_NAME = "SLIDE_CONTENT";
068:            public static final String NULL_FIELD_NAME = "SLIDE_NULL";
069:            public static final String STRING_INDEX_DATE_FORMATE = "yyyy-MM-dd HH:mm:ss";
070:
071:            public static final SimpleDateFormat DATE_INDEX_FORMAT = new SimpleDateFormat(
072:                    STRING_INDEX_DATE_FORMATE, Locale.UK);
073:
074:            public static final DecimalFormat INT_INDEX_FORMAT = new DecimalFormat(
075:                    "b0000000000000000000;a0000000000000000000");
076:
077:            public static final String DATE_LOWER_BOUND = new SimpleDateFormat(
078:                    STRING_INDEX_DATE_FORMATE, Locale.UK).format(new Date(0));
079:            public static final String DATE_UPPER_BOUND = new SimpleDateFormat(
080:                    STRING_INDEX_DATE_FORMATE, Locale.UK).format(new Date(
081:                    Long.MAX_VALUE));
082:            public static final String INT_LOWER_BOUND = INT_INDEX_FORMAT
083:                    .format(Long.MIN_VALUE);
084:            public static final String INT_UPPER_BOUND = INT_INDEX_FORMAT
085:                    .format(Long.MAX_VALUE);
086:            public static final String STRING_UPPER_BOUND = "\uffff\uffff";
087:            public static final String STRING_LOWER_BOUND = "";
088:
089:            protected static final String LOG_CHANNEL = Index.class.getName();
090:
091:            protected IndexConfiguration configuration;
092:            protected String indexName;
093:            protected Logger logger;
094:
095:            protected LinkedList txnQueue = new LinkedList();
096:
097:            /**
098:             * Counter for recently executed index jobs (insertions, deletions).
099:             * Will be reseted after optimization.
100:             */
101:            public Index(IndexConfiguration configuration, Logger logger,
102:                    String name) throws IndexException {
103:                this .logger = logger;
104:                this .configuration = configuration;
105:                this .indexName = name;
106:
107:                File file = new File(this .configuration.getIndexPath());
108:                if (!file.exists() && !file.mkdirs()) {
109:                    throw new IndexException(
110:                            "Error can't find or create index directory: "
111:                                    + this .configuration.getIndexPath());
112:                }
113:
114:                try {
115:                    Directory directory = getDirectory();
116:                    if (IndexReader.indexExists(directory)) {
117:                        if (IndexReader.isLocked(directory)) {
118:                            IndexReader.unlock(directory);
119:                        }
120:                    } else {
121:                        IndexWriter writer = new IndexWriter(directory,
122:                                configuration.getAnalyzer(), true);
123:                        writer.close();
124:                    }
125:                } catch (IOException e) {
126:                    throw new IndexException("Error while creating index: "
127:                            + this .configuration.getIndexPath(), e);
128:                }
129:
130:                // TODO make configurable
131:                BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
132:            }
133:
134:            public IndexConfiguration getConfiguration() {
135:                return this .configuration;
136:            }
137:
138:            public Logger getLogger() {
139:                return this .logger;
140:            }
141:
142:            public IndexSearcher getSearcher() throws IOException {
143:                // TODO can this be reused?
144:                return new IndexSearcher(this .configuration.getIndexPath());
145:            }
146:
147:            public void releaseSearcher(IndexSearcher s) throws IOException {
148:                s.close();
149:            }
150:
151:            public IndexReader getReader() throws IOException {
152:                return IndexReader.open(this .configuration.getIndexPath());
153:            }
154:
155:            public void releaseReader(IndexReader r) throws IOException {
156:                r.close();
157:            }
158:
159:            private Directory getDirectory() throws IOException {
160:                // file system based directory
161:                return FSDirectory.getDirectory(this .configuration
162:                        .getIndexPath(), false);
163:            }
164:
165:            private Field indexString(String fieldName, String value,
166:                    boolean storeAll) {
167:                // if storeAll : store field
168:                if (storeAll) {
169:                    return storedString(fieldName, value);
170:                } else {
171:                    return unstoredString(fieldName, value);
172:                }
173:            }
174:
175:            private Field indexTextField(String fieldName, String value,
176:                    boolean storeAll) {
177:                // if storeAll : store field
178:                if (storeAll) {
179:                    return storedTextField(fieldName, value);
180:                } else {
181:                    return textField(fieldName, value);
182:                }
183:            }
184:
185:            private Field unstoredString(String fieldName, String value) {
186:                if (!configuration.isCaseSensitive())
187:                    value = value.toLowerCase();
188:                // don't store, index, don't tokenize
189:                return new Field(fieldName, value, Field.Store.NO,
190:                        Field.Index.UN_TOKENIZED);
191:            }
192:
193:            private Field storedString(String fieldName, String value) {
194:                if (!configuration.isCaseSensitive())
195:                    value = value.toLowerCase();
196:                // store, index, don't tokenize
197:                return new Field(fieldName, value, Field.Store.YES,
198:                        Field.Index.UN_TOKENIZED);
199:            }
200:
201:            private Field unstoredNonContentString(String fieldName,
202:                    String value) {
203:                // store book keeping info in the original case
204:                // don't store, index, don't tokenize
205:                return new Field(fieldName, value, Field.Store.NO,
206:                        Field.Index.UN_TOKENIZED);
207:            }
208:
209:            private Field storedNonContentString(String fieldName, String value) {
210:                // store book keeping info in the original case
211:                // store, index, don't tokenize
212:                return new Field(fieldName, value, Field.Store.YES,
213:                        Field.Index.UN_TOKENIZED);
214:            }
215:
216:            // dont need to lowercase the text fields cause the analyzers will take care of that
217:            private Field textField(String fieldName, String value) {
218:                // don't store, index, tokenize
219:                return new Field(fieldName, value, Field.Store.NO,
220:                        Field.Index.TOKENIZED);
221:            }
222:
223:            private Field storedTextField(String fieldName, String value) {
224:                // don't store, index, tokenize
225:                return new Field(fieldName, value, Field.Store.YES,
226:                        Field.Index.TOKENIZED);
227:            }
228:
229:            private Field textField(String fieldName, Reader value) {
230:                // default: don't store, index, tokenize
231:                return new Field(fieldName, value);
232:            }
233:
234:            /**
235:             * Creates a lucene index document for a properties indexer.
236:             * @param uri resource 
237:             * @param descriptor properties to be indexed
238:             */
239:            public Document createLuceneDocument(String uri,
240:                    NodeRevisionDescriptor descriptor, byte[] contentBuffer,
241:                    ContentExtractor[] extractors)
242:                    throws RevisionNotFoundException, ExtractorException {
243:
244:                this .logger.debug(uri + ": creating doc!");
245:
246:                Document doc = new Document();
247:
248:                doc.add(unstoredNonContentString(Index.KEY_FIELD_NAME,
249:                        configuration.generateKey(uri, descriptor
250:                                .getRevisionNumber())));
251:                doc.add(storedNonContentString(Index.URI_FIELD_NAME, uri));
252:
253:                // scopes
254:                StringTokenizer tokenizer = new StringTokenizer(uri, "/");
255:                StringBuffer buffer = new StringBuffer(uri.length());
256:
257:                doc.add(unstoredNonContentString(Index.SCOPE_FIELD_NAME, "/"));
258:                int depth = 0;
259:                for (; tokenizer.hasMoreTokens();) {
260:                    buffer.append("/").append(tokenizer.nextToken());
261:                    doc.add(unstoredNonContentString(Index.SCOPE_FIELD_NAME,
262:                            buffer.toString()));
263:                    depth++;
264:                }
265:                doc.add(unstoredNonContentString(Index.DEPTH_FIELD_NAME,
266:                        configuration.intToIndexString(depth)));
267:
268:                // resource type
269:                String rtype = descriptor.getResourceType();
270:                for (Iterator i = configuration.knownResourceTypes(); i
271:                        .hasNext();) {
272:                    String name = (String) i.next();
273:                    if (rtype.indexOf(name) != -1) {
274:                        doc.add(unstoredNonContentString(IndexConfiguration
275:                                .generateFieldName(
276:                                        NodeProperty.DEFAULT_NAMESPACE,
277:                                        "resourcetype"), name));
278:                    }
279:                }
280:
281:                // all other properties
282:                for (Enumeration e = descriptor.enumerateProperties(); e
283:                        .hasMoreElements();) {
284:                    NodeProperty property = (NodeProperty) e.nextElement();
285:
286:                    String p_namespace = property.getNamespace();
287:                    String p_name = property.getName();
288:
289:                    Object value = property.getValue();
290:
291:                    if (value == null)
292:                        continue;
293:                    if (!configuration.isIndexedProperty(p_namespace, p_name))
294:                        continue;
295:
296:                    if (configuration.isDateProperty(p_namespace, p_name)) {
297:                        Date date = IndexConfiguration.getDateValue(value);
298:                        if (date != null) {
299:                            doc.add(indexString(IndexConfiguration
300:                                    .generateFieldName(property.getNamespace(),
301:                                            property.getName()), configuration
302:                                    .dateToIndexString(date),
303:                                    configuration.storeAll));
304:                        }
305:                        this .logger.debug(IndexConfiguration.generateFieldName(
306:                                property.getNamespace(), property.getName())
307:                                + " is a date type!");
308:                    } else if (configuration.isIntProperty(p_namespace, p_name)) {
309:                        try {
310:                            doc.add(indexString(IndexConfiguration
311:                                    .generateFieldName(property.getNamespace(),
312:                                            property.getName()), configuration
313:                                    .intToIndexString(Long.parseLong(value
314:                                            .toString())),
315:                                    configuration.storeAll));
316:                            this .logger.debug(IndexConfiguration
317:                                    .generateFieldName(property.getNamespace(),
318:                                            property.getName())
319:                                    + " is an int type!");
320:                        } catch (NumberFormatException ex) {
321:                            // TODO log warning
322:                        }
323:                    } else if (configuration
324:                            .isTextProperty(p_namespace, p_name)) {
325:                        doc.add(indexTextField(IndexConfiguration
326:                                .generateFieldName(property.getNamespace(),
327:                                        property.getName()), value.toString(),
328:                                configuration.storeAll));
329:                        this .logger.debug(IndexConfiguration.generateFieldName(
330:                                property.getNamespace(), property.getName())
331:                                + " is a text type!");
332:                    } else if (configuration.isStringProperty(p_namespace,
333:                            p_name)) {
334:                        doc.add(indexString(IndexConfiguration
335:                                .generateFieldName(property.getNamespace(),
336:                                        property.getName()), value.toString(),
337:                                configuration.storeAll));
338:
339:                        // also add default text tokenized property for String properties for seaching
340:                        String fieldName = IndexConfiguration
341:                                .generateFieldName(property.getNamespace(),
342:                                        IndexConfiguration.TOKENIZED_PREFIX
343:                                                + property.getName());
344:                        if (configuration.getAnalyzerForField(fieldName) == null) {
345:                            configuration.addTextProperty(property
346:                                    .getNamespace(),
347:                                    IndexConfiguration.TOKENIZED_PREFIX
348:                                            + property.getName(),
349:                                    new SimpleStandardAnalyzer());
350:                        }
351:                        doc.add(textField(fieldName, value.toString()));
352:
353:                        this .logger.debug(IndexConfiguration.generateFieldName(
354:                                property.getNamespace(), property.getName())
355:                                + " is a string type!");
356:                    }
357:                    if (configuration.supportsIsDefined(p_namespace, p_name)) {
358:                        doc.add(unstoredNonContentString(
359:                                Index.IS_DEFINED_FIELD_NAME, IndexConfiguration
360:                                        .generateFieldName(property
361:                                                .getNamespace(), property
362:                                                .getName())));
363:                        this .logger.debug(IndexConfiguration.generateFieldName(
364:                                property.getNamespace(), property.getName())
365:                                + " supports is-defined!");
366:                    }
367:                }
368:
369:                if (extractors != null && extractors.length > 0
370:                        && contentBuffer != null) {
371:                    for (int i = 0; i < extractors.length; i++) {
372:                        ByteArrayInputStream content = new ByteArrayInputStream(
373:                                contentBuffer);
374:                        try {
375:                            if (extractors[i] instanceof  LanguageSpecificContentExtractor) {
376:                                LanguageSpecificContentExtractor lsce = (LanguageSpecificContentExtractor) extractors[i];
377:                                String locale = lsce.getLocale();
378:                                if (locale != null && !locale.equals("")) {
379:                                    doc.add(textField(Index.CONTENT_FIELD_NAME
380:                                            .concat(locale), extractors[i]
381:                                            .extract(content)));
382:                                } else {
383:                                    doc.add(textField(Index.CONTENT_FIELD_NAME,
384:                                            extractors[i].extract(content)));
385:                                }
386:                            } else {
387:                                doc.add(textField(Index.CONTENT_FIELD_NAME,
388:                                        extractors[i].extract(content)));
389:                            }
390:                        } catch (Exception e) {
391:                            this .logger
392:                                    .warn(
393:                                            uri
394:                                                    + " error extracting content, skipping (extractor = '"
395:                                                    + extractors[i] + "'):", e);
396:                        }
397:                    }
398:
399:                    this .logger
400:                            .debug(uri + " has content which was extracted!");
401:                }
402:
403:                return doc;
404:            }
405:
406:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.