Source Code Cross Referenced for RegainToolkit.java in » Search-Engine » regain » net » sf » regain » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Search Engine » regain » net.sf.regain
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        /*
0002:         * regain - A file search engine providing plenty of formats
0003:         * Copyright (C) 2004  Til Schneider
0004:         *
0005:         * This library is free software; you can redistribute it and/or
0006:         * modify it under the terms of the GNU Lesser General Public
0007:         * License as published by the Free Software Foundation; either
0008:         * version 2.1 of the License, or (at your option) any later version.
0009:         *
0010:         * This library is distributed in the hope that it will be useful,
0011:         * but WITHOUT ANY WARRANTY; without even the implied warranty of
0012:         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0013:         * Lesser General Public License for more details.
0014:         *
0015:         * You should have received a copy of the GNU Lesser General Public
0016:         * License along with this library; if not, write to the Free Software
0017:         * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
0018:         *
0019:         * Contact: Til Schneider, info@murfman.de
0020:         *
0021:         * CVS information:
0022:         *  $RCSfile$
0023:         *   $Source$
0024:         *     $Date: 2007-11-01 13:53:31 +0100 (Do, 01 Nov 2007) $
0025:         *   $Author: til132 $
0026:         * $Revision: 260 $
0027:         */
0028:        package net.sf.regain;
0029:
0030:        import java.io.BufferedReader;
0031:        import java.io.File;
0032:        import java.io.FileInputStream;
0033:        import java.io.FileOutputStream;
0034:        import java.io.FileReader;
0035:        import java.io.IOException;
0036:        import java.io.InputStream;
0037:        import java.io.InputStreamReader;
0038:        import java.io.OutputStream;
0039:        import java.io.PrintStream;
0040:        import java.io.Reader;
0041:        import java.io.StringReader;
0042:        import java.io.StringWriter;
0043:        import java.io.UnsupportedEncodingException;
0044:        import java.io.Writer;
0045:        import java.lang.reflect.Constructor;
0046:        import java.lang.reflect.Method;
0047:        import java.net.MalformedURLException;
0048:        import java.net.URL;
0049:        import java.net.URLClassLoader;
0050:        import java.net.URLDecoder;
0051:        import java.net.URLEncoder;
0052:        import java.text.NumberFormat;
0053:        import java.util.ArrayList;
0054:        import java.util.Arrays;
0055:        import java.util.Calendar;
0056:        import java.util.Date;
0057:        import java.util.HashMap;
0058:        import java.util.HashSet;
0059:        import java.util.Iterator;
0060:        import java.util.Locale;
0061:        import java.util.StringTokenizer;
0062:
0063:        import org.apache.lucene.analysis.Analyzer;
0064:        import org.apache.lucene.analysis.Token;
0065:        import org.apache.lucene.analysis.TokenStream;
0066:        import org.apache.lucene.analysis.WhitespaceAnalyzer;
0067:        import org.apache.lucene.analysis.de.GermanAnalyzer;
0068:        import org.apache.lucene.analysis.standard.StandardAnalyzer;
0069:        import org.apache.lucene.index.IndexReader;
0070:        import org.apache.lucene.index.Term;
0071:        import org.apache.lucene.index.TermEnum;
0072:
0073:        /**
0074:         * Enth�lt Hilfsmethoden, die sowohl vom Crawler als auch von der Suchmaske
0075:         * genutzt werden.
0076:         *
0077:         * @author Til Schneider, www.murfman.de
0078:         */
0079:        public class RegainToolkit {
0080:
0081:            /** The encoding used for storing URLs in the index */
0082:            public static final String INDEX_ENCODING = "UTF-8";
0083:
0084:            /**
0085:             * Gibt an, ob die Worte, die der Analyzer identifiziert ausgegeben werden
0086:             * sollen.
0087:             */
0088:            private static final boolean ANALYSE_ANALYZER = false;
0089:
0090:            /** The number of bytes in a kB (kilo byte). */
0091:            private static final int SIZE_KB = 1024;
0092:
0093:            /** The number of bytes in a MB (mega byte). */
0094:            private static final int SIZE_MB = 1024 * 1024;
0095:
0096:            /** The number of bytes in a GB (giga byte). */
0097:            private static final int SIZE_GB = 1024 * 1024 * 1024;
0098:
0099:            /** The cached system's default encoding. */
0100:            private static String mSystemDefaultEncoding;
0101:
0102:            /** Der gecachte, systemspeziefische Zeilenumbruch. */
0103:            private static String mLineSeparator;
0104:
0105:            /**
0106:             * L�scht ein Verzeichnis mit allen Unterverzeichnissen und -dateien.
0107:             *
0108:             * @param dir Das zu l�schende Verzeichnis.
0109:             *
0110:             * @throws RegainException Wenn das L�schen fehl schlug.
0111:             */
0112:            public static void deleteDirectory(File dir) throws RegainException {
0113:                if (!dir.exists()) {
0114:                    return; // Nothing to do
0115:                }
0116:
0117:                // Delete all children
0118:                File[] children = dir.listFiles();
0119:                if (children != null) {
0120:                    for (int i = 0; i < children.length; i++) {
0121:                        if (children[i].isDirectory()) {
0122:                            deleteDirectory(children[i]);
0123:                        } else {
0124:                            if (!children[i].delete()) {
0125:                                throw new RegainException("Deleting "
0126:                                        + children[i].getAbsolutePath()
0127:                                        + " failed!");
0128:                            }
0129:                        }
0130:                    }
0131:                }
0132:
0133:                // Delete self
0134:                if (!dir.delete()) {
0135:                    throw new RegainException("Deleting "
0136:                            + dir.getAbsolutePath() + " failed!");
0137:                }
0138:            }
0139:
0140:            /**
0141:             * Writes all data from the reader to the writer.
0142:             * <p>
0143:             * Neither the reader nor the writer will be closed. This has to be done by
0144:             * the caller!
0145:             *
0146:             * @param reader The reader that provides the data.
0147:             * @param writer The writer where to write the data.
0148:             *
0149:             * @throws IOException If reading or writing failed.
0150:             */
0151:            public static void pipe(Reader reader, Writer writer)
0152:                    throws IOException {
0153:                char[] buffer = new char[10240]; // 10 kB (or kChars ;-) )
0154:
0155:                int len;
0156:                while ((len = reader.read(buffer)) != -1) {
0157:                    writer.write(buffer, 0, len);
0158:                }
0159:            }
0160:
0161:            /**
0162:             * Schreibt alle Daten, die der InputStream liefert in den OutputStream.
0163:             * <p>
0164:             * Weder der InputStream noch der OutputStream werden dabei geschlossen. Dies
0165:             * muss die aufrufende Methode �bernehmen!
0166:             *
0167:             * @param in Der InputStream, der die Daten liefert.
0168:             * @param out Der OutputStream auf den die Daten geschrieben werden sollen.
0169:             *
0170:             * @throws IOException Wenn Lesen oder Schreiben fehl schlug.
0171:             */
0172:            public static void pipe(InputStream in, OutputStream out)
0173:                    throws IOException {
0174:                byte[] buffer = new byte[10240]; // 10 kB
0175:
0176:                int len;
0177:                while ((len = in.read(buffer)) != -1) {
0178:                    out.write(buffer, 0, len);
0179:                }
0180:            }
0181:
0182:            /**
0183:             * Copies a file.
0184:             *
0185:             * @param from The source file.
0186:             * @param to The target file.
0187:             * @throws RegainException If copying failed.
0188:             */
0189:            public static void copyFile(File from, File to)
0190:                    throws RegainException {
0191:                FileInputStream in = null;
0192:                FileOutputStream out = null;
0193:                try {
0194:                    in = new FileInputStream(from);
0195:                    out = new FileOutputStream(to);
0196:
0197:                    RegainToolkit.pipe(in, out);
0198:                } catch (IOException exc) {
0199:                    throw new RegainException("Copying file from "
0200:                            + from.getAbsolutePath() + " to "
0201:                            + to.getAbsolutePath() + " failed", exc);
0202:                } finally {
0203:                    if (out != null) {
0204:                        try {
0205:                            out.close();
0206:                        } catch (IOException exc) {
0207:                        }
0208:                    }
0209:                    if (in != null) {
0210:                        try {
0211:                            in.close();
0212:                        } catch (IOException exc) {
0213:                        }
0214:                    }
0215:                }
0216:            }
0217:
0218:            /**
0219:             * Copies a directory.
0220:             * 
0221:             * @param fromDir The source directory.
0222:             * @param toDir The target directory.
0223:             * @param copySubDirs Specifies whether to copy sub directories.
0224:             * @param excludeExtension The file extension to exclude.
0225:             * @throws RegainException If copying the index failed.
0226:             */
0227:            public static void copyDirectory(File fromDir, File toDir,
0228:                    boolean copySubDirs, String excludeExtension)
0229:                    throws RegainException {
0230:                File[] indexFiles = fromDir.listFiles();
0231:                for (int i = 0; i < indexFiles.length; i++) {
0232:                    String fileName = indexFiles[i].getName();
0233:                    File targetFile = new File(toDir, fileName);
0234:                    if (indexFiles[i].isDirectory()) {
0235:                        if (copySubDirs) {
0236:                            targetFile.mkdir();
0237:                            copyDirectory(indexFiles[i], targetFile,
0238:                                    copySubDirs, excludeExtension);
0239:                        }
0240:                    } else if ((excludeExtension == null)
0241:                            || (!fileName.endsWith(excludeExtension))) {
0242:                        RegainToolkit.copyFile(indexFiles[i], targetFile);
0243:                    }
0244:                }
0245:            }
0246:
0247:            /**
0248:             * Copies a directory.
0249:             * 
0250:             * @param fromDir The source directory.
0251:             * @param toDir The target directory.
0252:             * @param copySubDirs Specifies whether to copy sub directories.
0253:             * @throws RegainException If copying the index failed.
0254:             */
0255:            public static void copyDirectory(File fromDir, File toDir,
0256:                    boolean copySubDirs) throws RegainException {
0257:                copyDirectory(fromDir, toDir, copySubDirs, null);
0258:            }
0259:
0260:            /**
0261:             * Reads a String from a stream.
0262:             * 
0263:             * @param stream The stream to read the String from
0264:             * @param charsetName The name of the charset to use.
0265:             * @return The stream content as String.
0266:             * @throws RegainException If reading the String failed.
0267:             */
0268:            public static String readStringFromStream(InputStream stream,
0269:                    String charsetName) throws RegainException {
0270:                InputStreamReader reader = null;
0271:                try {
0272:                    if (charsetName == null) {
0273:                        reader = new InputStreamReader(stream);
0274:                    } else {
0275:                        reader = new InputStreamReader(stream, charsetName);
0276:                    }
0277:                    StringWriter writer = new StringWriter();
0278:
0279:                    RegainToolkit.pipe(reader, writer);
0280:
0281:                    reader.close();
0282:                    writer.close();
0283:
0284:                    return writer.toString();
0285:                } catch (IOException exc) {
0286:                    throw new RegainException(
0287:                            "Reading String from stream failed", exc);
0288:                } finally {
0289:                    if (reader != null) {
0290:                        try {
0291:                            reader.close();
0292:                        } catch (IOException exc) {
0293:                        }
0294:                    }
0295:                }
0296:            }
0297:
0298:            /**
0299:             * Reads a String from a stream.
0300:             * 
0301:             * @param stream The stream to read the String from
0302:             * @return The stream content as String.
0303:             * @throws RegainException If reading the String failed.
0304:             */
0305:            public static String readStringFromStream(InputStream stream)
0306:                    throws RegainException {
0307:                return readStringFromStream(stream, null);
0308:            }
0309:
0310:            /**
0311:             * Liest einen String aus einer Datei.
0312:             *
0313:             * @param file Die Datei aus der der String gelesen werden soll.
0314:             *
0315:             * @return Der Inhalt der Datei als String oder <code>null</code>, wenn die
0316:             *         Datei nicht existiert.
0317:             * @throws RegainException Wenn das Lesen fehl schlug.
0318:             */
0319:            public static String readStringFromFile(File file)
0320:                    throws RegainException {
0321:                if (!file.exists()) {
0322:                    return null;
0323:                }
0324:
0325:                FileInputStream stream = null;
0326:                try {
0327:                    stream = new FileInputStream(file);
0328:                    return readStringFromStream(stream);
0329:                } catch (IOException exc) {
0330:                    throw new RegainException("Reading String from "
0331:                            + file.getAbsolutePath() + "failed", exc);
0332:                } finally {
0333:                    if (stream != null) {
0334:                        try {
0335:                            stream.close();
0336:                        } catch (IOException exc) {
0337:                        }
0338:                    }
0339:                }
0340:            }
0341:
0342:            /**
0343:             * Reads a word list from a file.
0344:             *
0345:             * @param file The file to read the list from.
0346:             *
0347:             * @return The lines of the file.
0348:             * @throws RegainException If reading failed.
0349:             */
0350:            public static String[] readListFromFile(File file)
0351:                    throws RegainException {
0352:                if (!file.exists()) {
0353:                    return null;
0354:                }
0355:
0356:                FileReader reader = null;
0357:                BufferedReader buffReader = null;
0358:                try {
0359:                    reader = new FileReader(file);
0360:                    buffReader = new BufferedReader(reader);
0361:
0362:                    ArrayList list = new ArrayList();
0363:                    String line;
0364:                    while ((line = buffReader.readLine()) != null) {
0365:                        list.add(line);
0366:                    }
0367:
0368:                    String[] asArr = new String[list.size()];
0369:                    list.toArray(asArr);
0370:
0371:                    return asArr;
0372:                } catch (IOException exc) {
0373:                    throw new RegainException("Reading word list from "
0374:                            + file.getAbsolutePath() + "failed", exc);
0375:                } finally {
0376:                    if (buffReader != null) {
0377:                        try {
0378:                            buffReader.close();
0379:                        } catch (IOException exc) {
0380:                        }
0381:                    }
0382:                    if (reader != null) {
0383:                        try {
0384:                            reader.close();
0385:                        } catch (IOException exc) {
0386:                        }
0387:                    }
0388:                }
0389:            }
0390:
0391:            /**
0392:             * Writes data to a file
0393:             *
0394:             * @param data The data
0395:             * @param file The file to write to
0396:             *
0397:             * @throws RegainException When writing failed
0398:             */
0399:            public static void writeToFile(byte[] data, File file)
0400:                    throws RegainException {
0401:                FileOutputStream stream = null;
0402:                try {
0403:                    stream = new FileOutputStream(file);
0404:                    stream.write(data);
0405:                    stream.close();
0406:                } catch (IOException exc) {
0407:                    throw new RegainException("Writing file failed: "
0408:                            + file.getAbsolutePath(), exc);
0409:                } finally {
0410:                    if (stream != null) {
0411:                        try {
0412:                            stream.close();
0413:                        } catch (IOException exc) {
0414:                        }
0415:                    }
0416:                }
0417:            }
0418:
0419:            /**
0420:             * Writes a String into a file.
0421:             *
0422:             * @param text The string.
0423:             * @param file The file to write to.
0424:             *
0425:             * @throws RegainException If writing failed.
0426:             */
0427:            public static void writeToFile(String text, File file)
0428:                    throws RegainException {
0429:                writeListToFile(new String[] { text }, file);
0430:            }
0431:
0432:            /**
0433:             * Writes a word list in a file. Each item of the list will be written in a
0434:             * line.
0435:             *
0436:             * @param wordList The word list.
0437:             * @param file The file to write to.
0438:             *
0439:             * @throws RegainException If writing failed.
0440:             */
0441:            public static void writeListToFile(String[] wordList, File file)
0442:                    throws RegainException {
0443:                if ((wordList == null) || (wordList.length == 0)) {
0444:                    // Nothing to do
0445:                    return;
0446:                }
0447:
0448:                FileOutputStream stream = null;
0449:                PrintStream printer = null;
0450:                try {
0451:                    stream = new FileOutputStream(file);
0452:                    printer = new PrintStream(stream);
0453:
0454:                    for (int i = 0; i < wordList.length; i++) {
0455:                        printer.println(wordList[i]);
0456:                    }
0457:                } catch (IOException exc) {
0458:                    throw new RegainException("Writing word list to "
0459:                            + file.getAbsolutePath() + " failed", exc);
0460:                } finally {
0461:                    if (printer != null) {
0462:                        printer.close();
0463:                    }
0464:                    if (stream != null) {
0465:                        try {
0466:                            stream.close();
0467:                        } catch (IOException exc) {
0468:                        }
0469:                    }
0470:                }
0471:            }
0472:
0473:            /**
0474:             * Gets the size of a directory with all files.
0475:             * 
0476:             * @param dir The directory to get the size for.
0477:             * @return The size of the directory.
0478:             */
0479:            public static long getDirectorySize(File dir) {
0480:                File[] childArr = dir.listFiles();
0481:                long size = 0;
0482:                if (childArr != null) {
0483:                    for (int i = 0; i < childArr.length; i++) {
0484:                        if (childArr[i].isDirectory()) {
0485:                            size += getDirectorySize(childArr[i]);
0486:                        } else {
0487:                            size += childArr[i].length();
0488:                        }
0489:                    }
0490:                }
0491:                return size;
0492:            }
0493:
0494:            /**
0495:             * Returns the destinct values of one or more fields.
0496:             * <p>
0497:             * If an index directory is provided, then the values will be read from there.
0498:             * They will be extracted from the search index if there are no matching
0499:             * cache files. After extracting the cache files will be created, so the next
0500:             * call will be faster.
0501:             *
0502:             * @param indexReader The index reader to use for reading the field values.
0503:             * @param fieldNameArr The names of the fields to read the destinct values for.
0504:             * @param indexDir The index directory where to read or write the cached
0505:             *        destinct values. May be null.
0506:             * @return A hashmap containing for a field name (key, String) the sorted
0507:             *         array of destinct values (value, String[]).
0508:             * @throws RegainException If reading from the index failed. Or if reading or
0509:             *         writing a cache file failed.
0510:             */
0511:            public static HashMap readFieldValues(IndexReader indexReader,
0512:                    String[] fieldNameArr, File indexDir)
0513:                    throws RegainException {
0514:                // Create the result map
0515:                HashMap resultMap = new HashMap();
0516:
0517:                // Try to read the field values from the cache files
0518:                // and remember the failed field names in a set
0519:                HashSet fieldsToReadSet = new HashSet();
0520:                for (int i = 0; i < fieldNameArr.length; i++) {
0521:                    String field = fieldNameArr[i];
0522:
0523:                    String[] fieldValueArr = null;
0524:                    if (indexDir != null) {
0525:                        File fieldFile = new File(indexDir, "field_values_"
0526:                                + field + ".txt");
0527:
0528:                        // NOTE: fieldValueArr stays null if the file does not exist
0529:                        fieldValueArr = readListFromFile(fieldFile);
0530:                    }
0531:
0532:                    if (fieldValueArr != null) {
0533:                        resultMap.put(field, fieldValueArr);
0534:                    } else {
0535:                        // There is no cache file -> We have to read the values from the index
0536:                        fieldsToReadSet.add(field);
0537:
0538:                        // Add an empty ArrayList that can hold the values
0539:                        resultMap.put(field, new ArrayList());
0540:                    }
0541:                }
0542:
0543:                // For bug-prevention: Enforce the usage of the fieldsToReadSet
0544:                // (There may be some field names removed)
0545:                fieldNameArr = null;
0546:
0547:                // Read the terms
0548:                if (!fieldsToReadSet.isEmpty()) {
0549:                    try {
0550:                        TermEnum termEnum = indexReader.terms();
0551:                        while (termEnum.next()) {
0552:                            Term term = termEnum.term();
0553:                            String field = term.field();
0554:                            if (fieldsToReadSet.contains(field)) {
0555:                                // This is a value of a wanted field
0556:                                ArrayList valueList = (ArrayList) resultMap
0557:                                        .get(field);
0558:                                valueList.add(term.text());
0559:                            }
0560:                        }
0561:                    } catch (IOException exc) {
0562:                        throw new RegainException(
0563:                                "Reading terms from index failed", exc);
0564:                    }
0565:                }
0566:
0567:                // Convert the lists into arrays.
0568:                Iterator readFieldIter = fieldsToReadSet.iterator();
0569:                while (readFieldIter.hasNext()) {
0570:                    String field = (String) readFieldIter.next();
0571:
0572:                    ArrayList valueList = (ArrayList) resultMap.get(field);
0573:                    String[] valueArr = new String[valueList.size()];
0574:                    valueList.toArray(valueArr);
0575:
0576:                    // Sort the array
0577:                    Arrays.sort(valueArr);
0578:
0579:                    // Overwrite the list with the array
0580:                    resultMap.put(field, valueArr);
0581:
0582:                    // Write the results to a file
0583:                    if (indexDir != null) {
0584:                        File fieldFile = new File(indexDir, "field_values_"
0585:                                + field + ".txt");
0586:                        writeListToFile(valueArr, fieldFile);
0587:                    }
0588:                }
0589:
0590:                return resultMap;
0591:            }
0592:
0593:            /**
0594:             * Creates an analyzer that is used both from the crawler and the search mask.
0595:             * It is important that both use the same analyzer which is the reason for
0596:             * this method.
0597:             *
0598:             * @param analyzerType The type of the analyzer to create. Either a classname
0599:             *        or "english" or "german".
0600:             * @param stopWordList All words that should not be indexed.
0601:             * @param exclusionList All words that shouldn't be changed by the analyzer.
0602:             * @param untokenizedFieldNames The names of the fields that should not be
0603:             *        tokenized.
0604:             * @return The analyzer.
0605:             * @throws RegainException If the creation failed.
0606:             */
0607:            public static Analyzer createAnalyzer(String analyzerType,
0608:                    String[] stopWordList, String[] exclusionList,
0609:                    String[] untokenizedFieldNames) throws RegainException {
0610:                if (analyzerType == null) {
0611:                    throw new RegainException("No analyzer type specified!");
0612:                }
0613:
0614:                // Get the analyzer class name
0615:                analyzerType = analyzerType.trim();
0616:                String analyzerClassName = analyzerType;
0617:                if (analyzerType.equalsIgnoreCase("english")) {
0618:                    analyzerClassName = StandardAnalyzer.class.getName();
0619:                } else if (analyzerType.equalsIgnoreCase("german")) {
0620:                    analyzerClassName = GermanAnalyzer.class.getName();
0621:                }
0622:
0623:                // Get the analyzer class
0624:                Class analyzerClass;
0625:                try {
0626:                    analyzerClass = Class.forName(analyzerClassName);
0627:                } catch (ClassNotFoundException exc) {
0628:                    throw new RegainException("Analyzer class not found: "
0629:                            + analyzerClassName, exc);
0630:                }
0631:
0632:                // Create an instance
0633:                Analyzer analyzer;
0634:                if ((stopWordList != null) && (stopWordList.length != 0)) {
0635:                    Constructor ctor;
0636:                    try {
0637:                        ctor = analyzerClass
0638:                                .getConstructor(new Class[] { stopWordList
0639:                                        .getClass() });
0640:                    } catch (Throwable thr) {
0641:                        throw new RegainException("Analyzer " + analyzerType
0642:                                + " does not support stop words");
0643:                    }
0644:                    try {
0645:                        analyzer = (Analyzer) ctor
0646:                                .newInstance(new Object[] { stopWordList });
0647:                    } catch (Throwable thr) {
0648:                        throw new RegainException(
0649:                                "Creating analyzer instance failed", thr);
0650:                    }
0651:                } else {
0652:                    try {
0653:                        analyzer = (Analyzer) analyzerClass.newInstance();
0654:                    } catch (Throwable thr) {
0655:                        throw new RegainException(
0656:                                "Creating analyzer instance failed", thr);
0657:                    }
0658:                }
0659:
0660:                // Try to apply the exclusion list
0661:                if ((exclusionList != null) && (exclusionList.length != 0)) {
0662:                    // NOTE: This is supported by the GermanAnalyzer for instance
0663:                    Method setter;
0664:                    try {
0665:                        setter = analyzerClass.getMethod(
0666:                                "setStemExclusionTable",
0667:                                new Class[] { exclusionList.getClass() });
0668:                    } catch (Throwable thr) {
0669:                        throw new RegainException("Analyzer " + analyzerType
0670:                                + " does not support exclusion lists");
0671:                    }
0672:
0673:                    try {
0674:                        setter.invoke(analyzer, new Object[] { exclusionList });
0675:                    } catch (Throwable thr) {
0676:                        throw new RegainException(
0677:                                "Applying exclusion list failed.", thr);
0678:                    }
0679:                }
0680:
0681:                analyzer = new WrapperAnalyzer(analyzer, untokenizedFieldNames);
0682:
0683:                if (ANALYSE_ANALYZER) {
0684:                    return createAnalysingAnalyzer(analyzer);
0685:                }
0686:                return analyzer;
0687:            }
0688:
0689:            /**
0690:             * Erzeugt einen Analyzer, der die Aufrufe an einen eingebetteten Analyzer
0691:             * analysiert.
0692:             * <p>
0693:             * Dies ist beim Debugging hilfreich, wenn man pr�fen will, was ein Analyzer
0694:             * bei bestimmten Anfragen ausgibt.
0695:             *
0696:             * @param nestedAnalyzer The nested Analyzer that should
0697:             * @return Ein Analyzer, der die Aufrufe an einen eingebetteten Analyzer
0698:             *         analysiert.
0699:             */
0700:            private static Analyzer createAnalysingAnalyzer(
0701:                    final Analyzer nestedAnalyzer) {
0702:                return new Analyzer() {
0703:                    public TokenStream tokenStream(String fieldName,
0704:                            Reader reader) {
0705:                        // NOTE: For Analyzation we have to read the reader twice:
0706:                        //       Once for the analyzation and second for the returned TokenStream
0707:                        //       -> We save the content of the Reader in a String and read this
0708:                        //          String twice.
0709:                        try {
0710:                            // Save the content of the reader in a String
0711:                            StringWriter writer = new java.io.StringWriter();
0712:                            pipe(reader, writer);
0713:                            String asString = writer.toString();
0714:
0715:                            // Anaylize the call
0716:                            TokenStream stream = nestedAnalyzer.tokenStream(
0717:                                    fieldName, new StringReader(asString));
0718:                            System.out
0719:                                    .println("Tokens for '" + asString + "':");
0720:                            Token token;
0721:                            while ((token = stream.next()) != null) {
0722:                                System.out.println("  '" + token.termText()
0723:                                        + "'");
0724:                            }
0725:
0726:                            // Do the call a second time and return the result this time
0727:                            return nestedAnalyzer.tokenStream(fieldName,
0728:                                    new StringReader(asString));
0729:                        } catch (IOException exc) {
0730:                            System.out.println("exc: " + exc);
0731:
0732:                            return null;
0733:                        }
0734:                    }
0735:                };
0736:            }
0737:
0738:            /**
0739:             * Replaces in a string all occurences of <code>pattern</code> with
0740:             * <code>replacement</code>.
0741:             * <p>
0742:             * Note: <code>pattern</code> may be a substring of <code>replacement</code>.
0743:             *
0744:             * @param source The string to search in
0745:             * @param pattern The pattern to be replaced
0746:             * @param replacement The replacement for each occurence of the pattern.
0747:             *
0748:             * @return A string where all occurences of <code>pattern</code> are replaced
0749:             *         by <code>replacement</code>.
0750:             */
0751:            public static String replace(String source, String pattern,
0752:                    String replacement) {
0753:                // Check whether the pattern occurs in the source at all
0754:                int pos = source.indexOf(pattern);
0755:                if (pos == -1) {
0756:                    // The pattern does not occur in the source -> return the source
0757:                    return source;
0758:                }
0759:
0760:                // Build a new String where pattern is replaced by the replacement
0761:                StringBuffer target = new StringBuffer(source.length());
0762:                int start = 0; // The start of a part without the pattern
0763:                do {
0764:                    target.append(source.substring(start, pos));
0765:                    target.append(replacement);
0766:                    start = pos + pattern.length();
0767:                } while ((pos = source.indexOf(pattern, start)) != -1);
0768:                target.append(source.substring(start, source.length()));
0769:
0770:                // return the String
0771:                return target.toString();
0772:            }
0773:
0774:            /**
0775:             * Replaces in a string all occurences of a list of patterns with replacements.
0776:             * <p>
0777:             * Note: The string is searched left to right. So any pattern matching earlier
0778:             * in the string will be replaced.
0779:             * Example: replace("abcd", { "bc", "ab", "cd" }, { "x", "1", "2" }) will
0780:             * return "12" (the pattern "bc" won't be applied, since "ab" matches before).
0781:             * <p>
0782:             * Note: If two patterns match at the same position, then the first one
0783:             * defined will be applied.
0784:             * Example: replace("abcd", { "ab", "abc" }, { "1", "2" }) will return "1cd".
0785:             *
0786:             * @param source The string to search in
0787:             * @param patternArr The pattern to be replaced
0788:             * @param replacementArr The replacement for each occurence of the pattern.
0789:             *
0790:             * @return A string where all occurences of <code>pattern</code> are replaced
0791:             *         by <code>replacement</code>.
0792:             */
0793:            public static String replace(String source, String[] patternArr,
0794:                    String[] replacementArr) {
0795:                if (patternArr.length != replacementArr.length) {
0796:                    throw new IllegalArgumentException(
0797:                            "patternArr and replacementArr must "
0798:                                    + "have the same length: "
0799:                                    + patternArr.length + " != "
0800:                                    + replacementArr.length);
0801:                }
0802:
0803:                // Check whether the patterns occurs in the source at all
0804:                int[] posArr = new int[patternArr.length];
0805:                int minPos = Integer.MAX_VALUE;
0806:                int minPosIdx = -1;
0807:                for (int i = 0; i < posArr.length; i++) {
0808:                    posArr[i] = source.indexOf(patternArr[i]);
0809:                    if (posArr[i] != -1 && posArr[i] < minPos) {
0810:                        minPos = posArr[i];
0811:                        minPosIdx = i;
0812:                    }
0813:                }
0814:                if (minPosIdx == -1) {
0815:                    // The patterns do not occur in the source -> return the source
0816:                    return source;
0817:                }
0818:
0819:                // Build a new String where patterns are replaced by the replacements
0820:                StringBuffer target = new StringBuffer(source.length());
0821:                int start = 0; // The start of a part without the pattern
0822:                do {
0823:                    target.append(source.substring(start, minPos));
0824:                    target.append(replacementArr[minPosIdx]);
0825:                    start = minPos + patternArr[minPosIdx].length();
0826:
0827:                    // Find the next matching pattern
0828:                    minPos = Integer.MAX_VALUE;
0829:                    minPosIdx = -1;
0830:                    for (int i = 0; i < posArr.length; i++) {
0831:                        if (posArr[i] < start) {
0832:                            // The last match was before the current position
0833:                            // -> Find the next match for that pattern
0834:                            posArr[i] = source.indexOf(patternArr[i], start);
0835:                        }
0836:                        if (posArr[i] != -1 && posArr[i] < minPos) {
0837:                            minPos = posArr[i];
0838:                            minPosIdx = i;
0839:                        }
0840:                    }
0841:                } while (minPosIdx != -1);
0842:                target.append(source.substring(start, source.length()));
0843:
0844:                // return the String
0845:                return target.toString();
0846:            }
0847:
0848:            /**
0849:             * Gibt einen Wert in Prozent mit zwei Nachkommastellen zur�ck.
0850:             *
0851:             * @param value Der Wert. (Zwischen 0 und 1)
0852:             * @return Der Wert in Prozent.
0853:             */
0854:            public static String toPercentString(double value) {
0855:                NumberFormat format = NumberFormat.getPercentInstance();
0856:                format.setMinimumFractionDigits(2);
0857:                format.setMaximumFractionDigits(2);
0858:                return format.format(value);
0859:            }
0860:
0861:            /**
0862:             * Gibt einen f�r den Menschen gut lesbaren String f�r eine Anzahl Bytes
0863:             * zur�ck.
0864:             *
0865:             * @param bytes Die Anzahl Bytes
0866:             * @return Ein String, der sie Anzahl Bytes wiedergibt
0867:             */
0868:            public static String bytesToString(long bytes) {
0869:                return bytesToString(bytes, Locale.ENGLISH);
0870:            }
0871:
0872:            /**
0873:             * Gibt einen f�r den Menschen gut lesbaren String f�r eine Anzahl Bytes
0874:             * zur�ck.
0875:             *
0876:             * @param bytes Die Anzahl Bytes
0877:             * @param locale The locale to use for formatting the numbers.
0878:             * @return Ein String, der sie Anzahl Bytes wiedergibt
0879:             */
0880:            public static String bytesToString(long bytes, Locale locale) {
0881:                return bytesToString(bytes, 2, locale);
0882:            }
0883:
0884:            /**
0885:             * Gibt einen f�r den Menschen gut lesbaren String f�r eine Anzahl Bytes
0886:             * zur�ck.
0887:             *
0888:             * @param bytes Die Anzahl Bytes
0889:             * @param fractionDigits Die Anzahl der Nachkommastellen
0890:             * @return Ein String, der sie Anzahl Bytes wiedergibt
0891:             */
0892:            public static String bytesToString(long bytes, int fractionDigits) {
0893:                return bytesToString(bytes, fractionDigits, Locale.ENGLISH);
0894:            }
0895:
0896:            /**
0897:             * Gibt einen f�r den Menschen gut lesbaren String f�r eine Anzahl Bytes
0898:             * zur�ck.
0899:             *
0900:             * @param bytes Die Anzahl Bytes
0901:             * @param fractionDigits Die Anzahl der Nachkommastellen
0902:             * @param locale The locale to use for formatting the numbers.
0903:             * @return Ein String, der sie Anzahl Bytes wiedergibt
0904:             */
0905:            public static String bytesToString(long bytes, int fractionDigits,
0906:                    Locale locale) {
0907:                int factor;
0908:                String unit;
0909:
0910:                if (bytes > SIZE_GB) {
0911:                    factor = SIZE_GB;
0912:                    unit = "GB";
0913:                } else if (bytes > SIZE_MB) {
0914:                    factor = SIZE_MB;
0915:                    unit = "MB";
0916:                } else if (bytes > SIZE_KB) {
0917:                    factor = SIZE_KB;
0918:                    unit = "kB";
0919:                } else {
0920:                    return bytes + " Byte";
0921:                }
0922:
0923:                NumberFormat format = NumberFormat.getInstance(locale);
0924:                format.setMinimumFractionDigits(fractionDigits);
0925:                format.setMaximumFractionDigits(fractionDigits);
0926:
0927:                String asString = format.format((double) bytes
0928:                        / (double) factor);
0929:
0930:                return asString + " " + unit;
0931:            }
0932:
0933:            /**
0934:             * Gets a human readable String for a time.
0935:             *
0936:             * @param time The time in milliseconds.
0937:             * @return The time as String.
0938:             */
0939:            public static String toTimeString(long time) {
0940:                if (time == -1) {
0941:                    // This is no time
0942:                    return "?";
0943:                }
0944:
0945:                long millis = time % 1000;
0946:                time /= 1000;
0947:                long secs = time % 60;
0948:                time /= 60;
0949:                long mins = time % 60;
0950:                time /= 60;
0951:                long hours = time;
0952:
0953:                if (hours != 0) {
0954:                    return hours + ":" + ((mins > 9) ? "" : "0") + mins + ":"
0955:                            + ((secs > 9) ? "" : "0") + secs + " h";
0956:                } else if (mins != 0) {
0957:                    return mins + ":" + ((secs > 9) ? "" : "0") + secs + " min";
0958:                } else if (secs != 0) {
0959:                    NumberFormat format = NumberFormat.getInstance();
0960:                    format.setMinimumFractionDigits(2);
0961:                    format.setMaximumFractionDigits(2);
0962:
0963:                    String asString = format.format(secs + millis / 1000.0);
0964:
0965:                    return asString + " sec";
0966:                } else {
0967:                    return millis + " millis";
0968:                }
0969:            }
0970:
0971:            /**
0972:             * Konvertiert ein Date-Objekt in einen String mit dem Format
0973:             * "YYYY-MM-DD HH:MM". Das ist n�tig, um ein eindeutiges und vom Menschen
0974:             * lesbares Format zu haben.
0975:             * <p>
0976:             * Dieses Format ist mit Absicht nicht lokalisiert, um die Eindeutigkeit zu
0977:             * gew�hrleisten. Die Lokalisierung muss die Suchmaske �bernehmen.
0978:             *
0979:             * @param lastModified Das zu konvertiernende Date-Objekt
0980:             * @return Ein String mit dem Format "YYYY-MM-DD HH:MM"
0981:             * @see #stringToLastModified(String)
0982:             */
0983:            public static String lastModifiedToString(Date lastModified) {
0984:                Calendar cal = Calendar.getInstance();
0985:                cal.setTime(lastModified);
0986:
0987:                int year = cal.get(Calendar.YEAR);
0988:                int month = cal.get(Calendar.MONTH) + 1; // +1: In the Date class january is 0
0989:                int day = cal.get(Calendar.DAY_OF_MONTH);
0990:
0991:                int hour = cal.get(Calendar.HOUR_OF_DAY);
0992:                int minute = cal.get(Calendar.MINUTE);
0993:
0994:                StringBuffer buffer = new StringBuffer(16);
0995:
0996:                // "YYYY-"
0997:                buffer.append(year);
0998:                buffer.append('-');
0999:
1000:                // "MM-"
1001:                if (month < 10) {
1002:                    buffer.append('0');
1003:                }
1004:                buffer.append(month);
1005:                buffer.append('-');
1006:
1007:                // "DD "
1008:                if (day < 10) {
1009:                    buffer.append('0');
1010:                }
1011:                buffer.append(day);
1012:                buffer.append(' ');
1013:
1014:                // "HH:"
1015:                if (hour < 10) {
1016:                    buffer.append('0');
1017:                }
1018:                buffer.append(hour);
1019:                buffer.append(':');
1020:
1021:                // "MM"
1022:                if (minute < 10) {
1023:                    buffer.append('0');
1024:                }
1025:                buffer.append(minute);
1026:
1027:                return buffer.toString();
1028:            }
1029:
1030:            /**
1031:             * Konvertiert einen String mit dem Format "YYYY-MM-DD HH:MM" in ein
1032:             * Date-Objekt.
1033:             *
1034:             * @param asString Der zu konvertierende String
1035:             * @return Das konvertierte Date-Objekt.
1036:             * @throws RegainException Wenn der String ein falsches Format hat.
1037:             * @see #lastModifiedToString(Date)
1038:             */
1039:            public static Date stringToLastModified(String asString)
1040:                    throws RegainException {
1041:                Calendar cal = Calendar.getInstance();
1042:
1043:                try {
1044:                    // Format: "YYYY-MM-DD HH:MM"
1045:
1046:                    int year = Integer.parseInt(asString.substring(0, 4));
1047:                    cal.set(Calendar.YEAR, year);
1048:                    int month = Integer.parseInt(asString.substring(5, 7));
1049:                    cal.set(Calendar.MONTH, month - 1); // -1: In the Date class january is 0
1050:                    int day = Integer.parseInt(asString.substring(8, 10));
1051:                    cal.set(Calendar.DAY_OF_MONTH, day);
1052:
1053:                    int hour = Integer.parseInt(asString.substring(11, 13));
1054:                    cal.set(Calendar.HOUR_OF_DAY, hour);
1055:                    int minute = Integer.parseInt(asString.substring(14, 16));
1056:                    cal.set(Calendar.MINUTE, minute);
1057:                    cal.set(Calendar.SECOND, 0);
1058:                } catch (Throwable thr) {
1059:                    throw new RegainException(
1060:                            "Last-modified-string has not the format"
1061:                                    + "'YYYY-MM-DD HH:MM': " + asString, thr);
1062:                }
1063:
1064:                return cal.getTime();
1065:            }
1066:
1067:            /**
1068:             * Splits a String into a string array.
1069:             *
1070:             * @param str The String to split.
1071:             * @param delim The String that separates the items to split
1072:             * @return An array the items.
1073:             */
1074:            public static String[] splitString(String str, String delim) {
1075:                return splitString(str, delim, false);
1076:            }
1077:
1078:            /**
1079:             * Splits a String into a string array.
1080:             *
1081:             * @param str The String to split.
1082:             * @param delim The String that separates the items to split
1083:             * @param trimSplits Specifies whether {@link String#trim()} should be called
1084:             *        for every split.
1085:             * @return An array the items.
1086:             */
1087:            public static String[] splitString(String str, String delim,
1088:                    boolean trimSplits) {
1089:                StringTokenizer tokenizer = new StringTokenizer(str, delim);
1090:                String[] searchFieldArr = new String[tokenizer.countTokens()];
1091:                for (int i = 0; i < searchFieldArr.length; i++) {
1092:                    searchFieldArr[i] = tokenizer.nextToken();
1093:                    if (trimSplits) {
1094:                        searchFieldArr[i] = searchFieldArr[i].trim();
1095:                    }
1096:                }
1097:                return searchFieldArr;
1098:            }
1099:
1100:            /**
1101:             * Gibt den systemspeziefischen Zeilenumbruch zur�ck.
1102:             *
1103:             * @return Der Zeilenumbruch.
1104:             */
1105:            public static String getLineSeparator() {
1106:                if (mLineSeparator == null) {
1107:                    mLineSeparator = System.getProperty("line.separator");
1108:                }
1109:
1110:                return mLineSeparator;
1111:            }
1112:
1113:            /**
1114:             * Returns the system's default encoding.
1115:             *
1116:             * @return the system's default encoding.
1117:             */
1118:            public static String getSystemDefaultEncoding() {
1119:                if (mSystemDefaultEncoding == null) {
1120:                    mSystemDefaultEncoding = new InputStreamReader(System.in)
1121:                            .getEncoding();
1122:                }
1123:
1124:                return mSystemDefaultEncoding;
1125:            }
1126:
1127:            /**
1128:             * Checks whether the given String contains whitespace.
1129:             * 
1130:             * @param str The String to check.
1131:             * @return Whether the given String contains whitespace.
1132:             */
1133:            public static boolean containsWhitespace(String str) {
1134:                for (int i = 0; i < str.length(); i++) {
1135:                    if (Character.isWhitespace(str.charAt(i))) {
1136:                        return true;
1137:                    }
1138:                }
1139:
1140:                return false;
1141:            }
1142:
1143:            /**
1144:             * Checks an array of group names.
1145:             * 
1146:             * @param accessController The access controller that returned the array of
1147:             *        group names.
1148:             * @param groupArr The array of group names to check.
1149:             * @throws RegainException If the array of group names is not valid.
1150:             */
1151:            public static void checkGroupArray(Object accessController,
1152:                    String[] groupArr) throws RegainException {
1153:                if (groupArr == null) {
1154:                    // Check for null
1155:                    throw new RegainException("Access controller "
1156:                            + accessController.getClass().getName()
1157:                            + " returned illegal " + "group array: null");
1158:                } else {
1159:                    // Check for whitespace
1160:                    for (int i = 0; i < groupArr.length; i++) {
1161:                        if (RegainToolkit.containsWhitespace(groupArr[i])) {
1162:                            throw new RegainException("Access controller "
1163:                                    + accessController.getClass().getName()
1164:                                    + " returned illegal "
1165:                                    + "group name containing whitespace: '"
1166:                                    + groupArr[i] + "'");
1167:                        }
1168:                    }
1169:                }
1170:            }
1171:
1172:            /**
1173:             * Loads a class and creates an instance.
1174:             * 
1175:             * @param className The name of the class to load and create an instance of.
1176:             * @param superClass The super class the class must extend.
1177:             * @param classLoader The class loader to use for loading the class. May be
1178:             *        <code>null</code>
1179:             * @return An object of the class.
1180:             * @throws RegainException If loading the class or creating the instance
1181:             *         failed or if the class is no instance of the given super class. 
1182:             */
1183:            public static Object createClassInstance(String className,
1184:                    Class super Class, ClassLoader classLoader)
1185:                    throws RegainException {
1186:                // Load the class
1187:                Class clazz;
1188:                try {
1189:                    if (classLoader == null) {
1190:                        clazz = Class.forName(className);
1191:                    } else {
1192:                        clazz = classLoader.loadClass(className);
1193:                    }
1194:                } catch (ClassNotFoundException exc) {
1195:                    throw new RegainException("The class '" + className
1196:                            + "' does not exist", exc);
1197:                }
1198:
1199:                // Create the instance
1200:                Object obj;
1201:                try {
1202:                    obj = clazz.newInstance();
1203:                } catch (Exception exc) {
1204:                    throw new RegainException(
1205:                            "Error creating instance of class " + className,
1206:                            exc);
1207:                }
1208:
1209:                // Check the instance
1210:                if (!super Class.isInstance(obj)) {
1211:                    throw new RegainException("The class " + className
1212:                            + " does not " + "implement "
1213:                            + super Class.getName());
1214:                }
1215:
1216:                return obj;
1217:            }
1218:
1219:            /**
1220:             * Loads a class and creates an instance.
1221:             * 
1222:             * @param className The name of the class to load and create an instance of.
1223:             * @param superClass The super class the class must extend.
1224:             * @param jarFileName The name of the jar file to load the class from.
1225:             *        May be <code>null</code>.
1226:             * @return An object of the class.
1227:             * @throws RegainException If loading the class or creating the instance
1228:             *         failed or if the class is no instance of the given super class. 
1229:             */
1230:            public static Object createClassInstance(String className,
1231:                    Class super Class, String jarFileName)
1232:                    throws RegainException {
1233:                // Create a class loader for the jar file
1234:                ClassLoader classLoader = null;
1235:                if (jarFileName != null) {
1236:                    File jarFile = new File(jarFileName);
1237:                    if (!jarFile.exists()) {
1238:                        throw new RegainException("Jar file does not exist: "
1239:                                + jarFile.getAbsolutePath());
1240:                    }
1241:
1242:                    try {
1243:                        classLoader = new URLClassLoader(new URL[] { jarFile
1244:                                .toURL() }, super Class.getClassLoader());
1245:                    } catch (MalformedURLException exc) {
1246:                        throw new RegainException("Creating class loader for "
1247:                                + "jar file failed: "
1248:                                + jarFile.getAbsolutePath(), exc);
1249:                    }
1250:                }
1251:
1252:                // Create the instance
1253:                return createClassInstance(className, super Class, classLoader);
1254:            }
1255:
1256:            /**
1257:             * Gets the file name that is described by a URL with the <code>file://</code>
1258:             * protocol.
1259:             *
1260:             * @param url The URL to get the file name for.
1261:             * @return The file name that matches the URL.
1262:             * @throws RegainException If the URL's protocol isn't <code>file://</code>.
1263:             */
1264:            public static String urlToFileName(String url)
1265:                    throws RegainException {
1266:                if (!url.startsWith("file://")) {
1267:                    throw new RegainException(
1268:                            "URL must have the file:// protocol to get a "
1269:                                    + "File for it");
1270:                }
1271:
1272:                // Cut the file://
1273:                String fileName = url.substring(7);
1274:
1275:                // Replace URL-encoded special characters
1276:                return urlDecode(fileName, INDEX_ENCODING);
1277:            }
1278:
1279:            /**
1280:             * Gets the file that is described by a URL with the <code>file://</code>
1281:             * protocol.
1282:             *
1283:             * @param url The URL to get the file for.
1284:             * @return The file that matches the URL.
1285:             * @throws RegainException If the URL's protocol isn't <code>file://</code>.
1286:             */
1287:            public static File urlToFile(String url) throws RegainException {
1288:                return new File(urlToFileName(url));
1289:            }
1290:
1291:            /**
1292:             * Returns the URL of a file name.
1293:             *
1294:             * @param fileName The file name to get the URL for
1295:             * @return The URL of the file.
1296:             * @throws RegainException If URL-encoding failed. 
1297:             */
1298:            public static String fileNameToUrl(String fileName)
1299:                    throws RegainException {
1300:                // Replace special characters
1301:                fileName = urlEncode(fileName, INDEX_ENCODING);
1302:
1303:                // Replace file separators by /
1304:                // NOTE: "/" is "%2F", "\" is "%5C"
1305:                fileName = replace(fileName, "%2F", "/");
1306:                fileName = replace(fileName, "%5C", "/"); // Yes: "\" should become "/"
1307:
1308:                return "file://" + fileName;
1309:            }
1310:
1311:            /**
1312:             * Returns the URL of a file.
1313:             *
1314:             * @param file The file to get the URL for
1315:             * @return The URL of the file.
1316:             * @throws RegainException If URL-encoding failed. 
1317:             */
1318:            public static String fileToUrl(File file) throws RegainException {
1319:                return fileNameToUrl(file.getAbsolutePath());
1320:            }
1321:
1322:            /**
1323:             * Gets the canonical URL of a file (no symbolic links, normalised names etc).
1324:             * Symbolic link detection may fail in certain situations, like for NFS file systems
1325:             *
1326:             * @param file The file to get the canonical URL for
1327:             * @return The URL of the file.
1328:             * @throws RegainException If URL-encoding failed. 
1329:             */
1330:            public static String fileToCanonicalUrl(File file)
1331:                    throws RegainException {
1332:                String canUrl = null;
1333:                try {
1334:                    //This may throw SecurityException
1335:                    canUrl = file.getCanonicalPath();
1336:                } catch (Exception e) {
1337:                    return null;
1338:                }
1339:                //Canonical url returns "current dir:parh"
1340:                int pos = canUrl.indexOf(':') + 1;
1341:                if (pos > 0 && pos < canUrl.length()) {
1342:                    canUrl = canUrl.substring(pos);
1343:                }
1344:
1345:                return fileNameToUrl(canUrl);
1346:            }
1347:
1348:            /**
1349:             * URL-encodes a String. 
1350:             * 
1351:             * @param text The String to URL-encode.
1352:             * @param encoding The encoding to use. 
1353:             * @return The URL-encoded String.
1354:             * @throws RegainException If URL-encoding failed.
1355:             */
1356:            public static String urlEncode(String text, String encoding)
1357:                    throws RegainException {
1358:                try {
1359:                    // For Java 1.2.2
1360:                    //return URLEncoder.encode(text);
1361:
1362:                    // Since Java 1.3
1363:                    return URLEncoder.encode(text, encoding);
1364:                } catch (UnsupportedEncodingException exc) {
1365:                    throw new RegainException("URL-encoding failed: '" + text
1366:                            + "'", exc);
1367:                }
1368:            }
1369:
1370:            /**
1371:             * URL-decodes a String. 
1372:             * 
1373:             * @param text The String to URL-decode.
1374:             * @param encoding The encoding to use. 
1375:             * @return The URL-decoded String.
1376:             * @throws RegainException If URL-decoding failed.
1377:             */
1378:            public static String urlDecode(String text, String encoding)
1379:                    throws RegainException {
1380:                try {
1381:                    return URLDecoder.decode(text, encoding);
1382:                } catch (UnsupportedEncodingException exc) {
1383:                    throw new RegainException("URL-decoding failed: '" + text
1384:                            + "'", exc);
1385:                }
1386:            }
1387:
1388:            // inner class WrapperAnalyzer
1389:
1390:            /**
1391:             * An analyzer that changes a document in lowercase before delivering
1392:             * it to a nested analyzer. For the field "groups" an analyzer is used that
1393:             * only tokenizes the input without stemming the tokens.
1394:             */
1395:            private static class WrapperAnalyzer extends Analyzer {
1396:
1397:                /** The analyzer to use for a field that shouldn't be stemmed. */
1398:                private Analyzer mNoStemmingAnalyzer;
1399:                /** The nested analyzer. */
1400:                private Analyzer mNestedAnalyzer;
1401:                /** The names of the fields that should not be tokenized. */
1402:                private HashSet mUntokenizedFieldNames;
1403:
1404:                /**
1405:                 * Creates a new instance of WrapperAnalyzer.
1406:                 * 
1407:                 * @param nestedAnalyzer The nested analyzer.
1408:                 * @param untokenizedFieldNames The names of the fields that should not be
1409:                 *        tokenized.
1410:                 */
1411:                public WrapperAnalyzer(Analyzer nestedAnalyzer,
1412:                        String[] untokenizedFieldNames) {
1413:                    mNoStemmingAnalyzer = new WhitespaceAnalyzer();
1414:                    mNestedAnalyzer = nestedAnalyzer;
1415:
1416:                    mUntokenizedFieldNames = new HashSet();
1417:                    for (int i = 0; i < untokenizedFieldNames.length; i++) {
1418:                        mUntokenizedFieldNames.add(untokenizedFieldNames[i]);
1419:                    }
1420:                }
1421:
1422:                /**
1423:                 * Creates a TokenStream which tokenizes all the text in the provided
1424:                 * Reader.
1425:                 */
1426:                public TokenStream tokenStream(String fieldName, Reader reader) {
1427:                    boolean useStemming = true;
1428:                    // NOTE: For security reasons we explicitely check for the groups field
1429:                    //       and don't use the mUntokenizedFieldNames for this implicitely
1430:                    if (fieldName.equals("groups")
1431:                            || mUntokenizedFieldNames.contains(fieldName)) {
1432:                        useStemming = false;
1433:                    }
1434:
1435:                    if (useStemming) {
1436:                        Reader lowercasingReader = new LowercasingReader(reader);
1437:                        return mNestedAnalyzer.tokenStream(fieldName,
1438:                                lowercasingReader);
1439:                    } else {
1440:                        return mNoStemmingAnalyzer.tokenStream(fieldName,
1441:                                reader);
1442:                    }
1443:                }
1444:
1445:            } // inner class WrapperAnalyzer
1446:
1447:            // inner class LowercasingReader
1448:
1449:            /**
1450:             * Liest alle Zeichen von einem eingebetteten Reader in Kleinschreibung.
1451:             *
1452:             * @author Til Schneider, www.murfman.de
1453:             */
1454:            private static class LowercasingReader extends Reader {
1455:
1456:                /** Der eingebettete Reader. */
1457:                private Reader mNestedReader;
1458:
1459:                /**
1460:                 * Erzeugt eine neue LowercasingReader-Instanz.
1461:                 *
1462:                 * @param nestedReader Der Reader, von dem die Daten kommen, die in
1463:                 *        Kleinschreibung gewandelt werden sollen.
1464:                 */
1465:                public LowercasingReader(Reader nestedReader) {
1466:                    mNestedReader = nestedReader;
1467:                }
1468:
1469:                /**
1470:                 * Schlie�t den eingebetteten Reader.
1471:                 *
1472:                 * @throws IOException Wenn der eingebettete Reader nicht geschlossen werden
1473:                 *         konnte.
1474:                 */
1475:                public void close() throws IOException {
1476:                    mNestedReader.close();
1477:                }
1478:
1479:                /**
1480:                 * Liest Daten vom eingebetteten Reader und wandelt sie in Kleinschreibung.
1481:                 *
1482:                 * @param cbuf Der Puffer, in den die gelesenen Daten geschrieben werden
1483:                 *        sollen
1484:                 * @param off Der Offset im Puffer, ab dem geschreiben werden soll.
1485:                 * @param len Die max. Anzahl von Zeichen, die geschrieben werden soll.
1486:                 * @return Die Anzahl von Zeichen, die tats�chlich geschrieben wurde, bzw.
1487:                 *         <code>-1</code>, wenn keine Daten mehr verf�gbar sind.
1488:                 * @throws IOException Wenn nicht vom eingebetteten Reader gelesen werden
1489:                 *         konnte.
1490:                 */
1491:                public int read(char[] cbuf, int off, int len)
1492:                        throws IOException {
1493:                    // Read the data
1494:                    int charCount = mNestedReader.read(cbuf, off, len);
1495:
1496:                    // Make it lowercase
1497:                    if (charCount != -1) {
1498:                        for (int i = off; i < off + charCount; i++) {
1499:                            cbuf[i] = Character.toLowerCase(cbuf[i]);
1500:                        }
1501:                    }
1502:
1503:                    // Return the number of chars read
1504:                    return charCount;
1505:                }
1506:
1507:            } // inner class LowercasingReader
1508:
1509:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.