001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.modules.javadoc.search;
043:
044: import java.io.*;
045: import java.lang.ref.*;
046: import java.text.Collator;
047: import java.util.*;
048: import javax.swing.event.ChangeEvent;
049: import javax.swing.event.ChangeListener;
050:
051: import javax.swing.text.html.parser.*;
052:
053: import org.openide.ErrorManager;
054: import org.openide.filesystems.*;
055: import org.openide.util.NbBundle;
056:
057: import org.openide.util.RequestProcessor;
058:
059: /**
060: * Builds index of Javadoc filesystems.
061: * @author Svata Dedic, Jesse Glick
062: */
063: public class IndexBuilder implements Runnable, ChangeListener {
064:
065: private static final String[] INDEX_FILE_NAMES = {
066: "overview-summary.html", // NOI18N
067: "index.html", // NOI18N
068: "index.htm", // NOI18N
069: };
070:
071: private static IndexBuilder INSTANCE;
072:
073: private static RequestProcessor.Task task;
074:
075: private static final ErrorManager err = ErrorManager.getDefault()
076: .getInstance(
077: "org.netbeans.modules.javadoc.search.IndexBuilder"); // NOI18N;
078:
079: private Reference cachedData;
080:
081: private JavadocRegistry jdocRegs;
082:
083: /**
084: * WeakMap<FileSystem : info> of information extracted from filesystems.
085: */
086: Map filesystemInfo = Collections.EMPTY_MAP;
087:
088: private static class Info {
089: /**
090: * Display name / title of the helpset
091: */
092: String title;
093:
094: /**
095: * Name of the index/overview file
096: */
097: String indexFileName;
098: }
099:
100: private IndexBuilder() {
101: this .jdocRegs = JavadocRegistry.getDefault();
102: this .jdocRegs.addChangeListener(this );
103: if (err.isLoggable(ErrorManager.INFORMATIONAL)) {
104: err.log("new IndexBuilder");
105: }
106: }
107:
108: /**
109: * Get the default index builder instance.
110: * It will start parsing asynch.
111: */
112: public synchronized static IndexBuilder getDefault() {
113: if (INSTANCE != null)
114: return INSTANCE;
115: INSTANCE = new IndexBuilder();
116: scheduleTask();
117: return INSTANCE;
118: }
119:
120: public void run() {
121: cachedData = null;
122: refreshIndex();
123: }
124:
125: public void stateChanged(ChangeEvent event) {
126: scheduleTask();
127: }
128:
129: /**
130: * Get the important information from the index builder.
131: * Waits for parsing to complete first, if necessary.
132: * @return two lists, one of String display names, the other of FileObject indices
133: */
134: public List[] getIndices() {
135: task.waitFinished();
136: if (cachedData != null) {
137: List[] data = (List[]) cachedData.get();
138: if (data != null) {
139: if (err.isLoggable(ErrorManager.INFORMATIONAL)) {
140: err.log("getIndices (cached)");
141: }
142: return data;
143: }
144: }
145:
146: if (err.isLoggable(ErrorManager.INFORMATIONAL)) {
147: err.log("getIndices");
148: }
149: Map m = this .filesystemInfo;
150: Iterator it = m.entrySet().iterator();
151: final Collator c = Collator.getInstance();
152: class Pair implements Comparable {
153: public String display;
154: public FileObject fo;
155:
156: public int compareTo(Object o) {
157: return c.compare(display, ((Pair) o).display);
158: }
159: }
160: SortedSet pairs = new TreeSet(); // SortedSet<Pair>
161: for (int i = 0; i < m.size(); i++) {
162: Map.Entry e = (Map.Entry) it.next();
163: FileObject f = (FileObject) e.getKey();
164: Info info = (Info) e.getValue();
165: FileObject fo = f.getFileObject(info.indexFileName);
166: if (fo == null)
167: continue;
168: Pair p = new Pair();
169: p.display = info.title;
170: p.fo = fo;
171: pairs.add(p);
172: }
173: List display = new ArrayList(pairs.size());
174: List fos = new ArrayList(pairs.size());
175: it = pairs.iterator();
176: while (it.hasNext()) {
177: Pair p = (Pair) it.next();
178: display.add(p.display);
179: fos.add(p.fo);
180: }
181: List[] data = new List[] { display, fos };
182: cachedData = new WeakReference(data);
183: return data;
184: }
185:
186: private void refreshIndex() {
187: if (err.isLoggable(ErrorManager.INFORMATIONAL)) {
188: err.log("refreshIndex");
189: }
190: Map oldMap;
191: synchronized (this ) {
192: oldMap = this .filesystemInfo;
193: }
194: //Enumeration e = FileSystemCapability.DOC.fileSystems();
195: FileObject docRoots[] = jdocRegs.getDocRoots();
196: // XXX needs to be able to listen to result; when it changes, call scheduleTask()
197: Map m = new WeakHashMap();
198: // long startTime = System.nanoTime();
199:
200: for (int ifCount = 0; ifCount < docRoots.length; ifCount++) {
201: FileObject fo = docRoots[ifCount];
202: Info oldInfo = (Info) oldMap.get(fo);
203: if (oldInfo != null) {
204: // No need to reparse.
205: m.put(fo, oldInfo);
206: continue;
207: }
208:
209: FileObject index = null;
210: for (int i = 0; i < INDEX_FILE_NAMES.length; i++) {
211: if ((index = fo.getFileObject(INDEX_FILE_NAMES[i])) != null) {
212: break;
213: }
214: }
215: if (index == null || index.getName().equals("index")) { // NOI18N
216: // For single-package doc sets, overview-summary.html is not present,
217: // and index.html is less suitable (it is framed). Look for a package
218: // summary.
219: // [PENDING] Display name is not ideal, e.g. "org.openide.windows (NetBeans Input/Output API)"
220: // where simply "NetBeans Input/Output API" is preferable... but standard title filter
221: // regexps are not so powerful (to avoid matching e.g. "Servlets (Main Documentation)").
222: FileObject packageList = fo
223: .getFileObject("package-list"); // NOI18N
224: if (packageList != null) {
225: try {
226: InputStream is = packageList.getInputStream();
227: try {
228: BufferedReader r = new BufferedReader(
229: new InputStreamReader(is));
230: String line = r.readLine();
231: if (line != null && r.readLine() == null) {
232: // Good, exactly one line as expected. A package name.
233: String resName = line.replace('.', '/')
234: + "/package-summary.html"; // NOI18N
235: FileObject pindex = fo
236: .getFileObject(resName);
237: if (pindex != null) {
238: index = pindex;
239: }
240: // else fall back to index.html if available
241: }
242: } finally {
243: is.close();
244: }
245: } catch (IOException ioe) {
246: // Oh well, skip this one.
247: ErrorManager.getDefault().notify(
248: ErrorManager.INFORMATIONAL, ioe);
249: }
250: }
251: }
252: if (index != null) {
253: // Try to find a title.
254: String title = parseTitle(index);
255: if (title != null) {
256: JavadocSearchType st = jdocRegs.findSearchType(fo);
257: if (st == null)
258: continue;
259: title = st.getOverviewTitleBase(title);
260: }
261: if (title == null || "".equals(title)) { // NOI18N
262: String filename = FileUtil
263: .getFileDisplayName(index);
264: if (filename.length() > 54) {
265: // trim to display 54 chars
266: filename = filename.substring(0, 10)
267: + "[...]" // NOI18N
268: + filename.substring(
269: filename.length() - 40,
270: filename.length());
271: }
272: title = NbBundle.getMessage(IndexBuilder.class,
273: "FMT_NoOverviewTitle",
274: new Object[] { filename }); // NOI18N
275: }
276: Info info = new Info();
277: info.title = title == null ? fo.getName() : title;
278: info.indexFileName = FileUtil
279: .getRelativePath(fo, index);
280: m.put(fo, info);
281: }
282: synchronized (this ) {
283: this .filesystemInfo = m;
284: }
285: }
286:
287: // long elapsedTime = System.nanoTime() - startTime;
288: // System.out.println("\nElapsed time[nano]: " + elapsedTime);
289: }
290:
291: /**
292: * Attempt to find the title of an HTML file object.
293: * May return null if there is no title tag, or "" if it is empty.
294: */
295: private String parseTitle(FileObject html) {
296: String title = null;
297: try {
298: // #71979: html parser used again to fix encoding issues.
299: // I have measured no difference if the parser or plain file reading
300: // is used (#32551).
301: // In case the parser is stopped as soon as it finds the title it is
302: // even faster than the previous fix.
303: InputStream is = new BufferedInputStream(html
304: .getInputStream(), 1024);
305: SimpleTitleParser tp = new SimpleTitleParser(is);
306: try {
307: tp.parse();
308: title = tp.getTitle();
309: } finally {
310: is.close();
311: }
312: } catch (IOException ioe) {
313: ErrorManager.getDefault().notify(
314: ErrorManager.INFORMATIONAL, ioe);
315: }
316: return title != null ? title.trim() : title;
317: }
318:
319: private synchronized static void scheduleTask() {
320: if (task == null) {
321: task = new RequestProcessor("Javadoc Index Builder")
322: .create(getDefault()); // NOI18N
323: }
324: // Give it a small delay to avoid restarting too many times e.g. during
325: // project switch:
326: task.schedule(100);
327: }
328:
329: static final class SimpleTitleParser {
330:
331: private char cc;
332: private InputStream is;
333: private String charset;
334: private String title;
335: private int state = CONTINUE;
336:
337: private static final int CONTINUE = 0;
338: private static final int EXIT = 0;
339:
340: SimpleTitleParser(InputStream is) {
341: this .is = is;
342: }
343:
344: public String getTitle() {
345: return title;
346: }
347:
348: public void parse() throws IOException {
349: readNext();
350: while (state == CONTINUE) {
351: switch (cc) {
352: case '<': // start of tags
353: handleOpenBrace();
354: break;
355: case (char) -1: // EOF
356: return;
357: default:
358: readNext();
359: }
360: }
361: }
362:
363: private void readNext() throws IOException {
364: cc = (char) is.read();
365: }
366:
367: private void handleOpenBrace() throws IOException {
368: StringBuilder sb = new StringBuilder();
369: while (true) {
370: readNext();
371: switch (cc) {
372: case '>': // end of tag
373: String tag = sb.toString().toLowerCase();
374: if (tag.startsWith("body")) { // NOI18N
375: state = EXIT;
376: return; // exit parsing, no title
377: } else if (tag.startsWith("meta")) { // NOI18N
378: handleMetaTag(tag);
379: return;
380: } else if (tag.startsWith("title")) { // NOI18N
381: handleTitleTag();
382: return;
383:
384: }
385: return;
386: case (char) -1: // EOF
387: return;
388: case ' ':
389: if (sb.length() == 0) // ignore leading spaces
390: break;
391: default:
392: sb.append(cc);
393: }
394: }
395:
396: }
397:
398: private void handleMetaTag(String txt) {
399: // parse something like
400: // <META http-equiv="Content-Type" content="text/html; charset=euc-jp">
401: // see http://www.w3.org/TR/REC-html32#meta
402: String name = ""; // NOI18N
403: String value = ""; // NOI18N
404:
405: char tc;
406: char[] txts = txt.toCharArray();
407: int offset = 5; // skip "meta "
408: int start = offset;
409: int state = 0;
410: while (offset < txts.length) {
411: tc = txt.charAt(offset);
412: if (tc == '=' && state == 0) { // end of name
413: name = String
414: .valueOf(txts, start, offset++ - start)
415: .trim();
416: state = 1;
417: } else if (state == 1 && (tc == '"' || tc == '\'')) { // start of value
418: start = ++offset;
419: state = 2;
420: } else if (state == 2 && (tc == '"' || tc == '\'')) { // end of value
421: value = String.valueOf(txts, start, offset++
422: - start);
423: if ("content".equals(name)) { // NOI18N
424: break;
425: }
426: name = ""; // NOI18N
427: state = 0;
428: start = offset;
429: } else {
430: ++offset;
431: }
432:
433: }
434:
435: StringTokenizer tk = new StringTokenizer(value, ";"); // NOI18N
436: while (tk.hasMoreTokens()) {
437: String str = tk.nextToken().trim();
438: if (str.startsWith("charset")) { //NOI18N
439: str = str.substring(7).trim();
440: if (str.charAt(0) == '=') {
441: this .charset = str.substring(1).trim();
442: return;
443: }
444: }
445: }
446: }
447:
448: private void handleTitleTag() throws IOException {
449: byte[] buf = new byte[200];
450: int offset = 0;
451: while (true) {
452: readNext();
453: switch (cc) {
454: case (char) -1: // EOF
455: return;
456: case '>': // </title>
457: if ("</title".equals(new String(buf, offset - 7, 7)
458: .toLowerCase())) {
459: // title is ready
460: // XXX maybe we should also resolve entities like >
461: state = EXIT;
462: if (charset == null) {
463: title = new String(buf, 0, offset - 7)
464: .trim();
465: } else {
466: title = new String(buf, 0, offset - 7,
467: charset).trim();
468: }
469: return;
470: }
471: default:
472: cc = (cc == '\n' || cc == '\r') ? ' ' : cc;
473: if (offset == buf.length) {
474: buf = enlarge(buf);
475: }
476: buf[offset++] = (byte) cc;
477:
478: }
479: }
480: }
481:
482: private static byte[] enlarge(byte[] b) {
483: byte[] b2 = new byte[b.length + 200];
484: for (int i = 0; i < b.length; i++) {
485: b2[i] = b[i];
486: }
487: return b2;
488: }
489: }
490:
491: }
|