001: /*
002: * FileIndexer.java December 2005
003: *
004: * Copyright (C) 2005, Niall Gallagher <niallg@users.sf.net>
005: *
006: * This library is free software; you can redistribute it and/or
007: * modify it under the terms of the GNU Lesser General Public
008: * License as published by the Free Software Foundation.
009: *
010: * This library is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013: * GNU Lesser General Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser General
016: * Public License along with this library; if not, write to the
017: * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
018: * Boston, MA 02111-1307 USA
019: */
020:
021: package simple.http.serve;
022:
023: import java.util.MissingResourceException;
024: import java.util.ResourceBundle;
025: import simple.util.cache.Cache;
026: import simple.util.net.Path;
027: import simple.util.net.URI;
028: import java.io.IOException;
029: import java.util.Locale;
030: import java.io.File;
031:
032: /**
033: * The <code>FileIndexer</code> provides an implementation of the
034: * <code>Indexer</code> object that provides a direct mapping from
035: * a request URI as defined in RFC 2616 to the resources meta data.
036: * This uses a <code>File</code> object to define the mapping
037: * for the request URI paths. Using a <code>File</code> object
038: * allows the <code>FileIndexer</code> to be easily used with both
039: * DOS and UNIX systems.
040: * <p>
041: * This <code>Indexer</code> implementation uses a MIME database
042: * to obtain mappings for the <code>getContentType</code> method.
043: * The file used is "Content.properties", which is packaged within
044: * <code>simple.http.serve</code>. This determines the MIME type
045: * of the request URI by matching file extension of the resource
046: * with the MIME type as defined in the "Content.properties" file.
047: * The mappings in the "Content.properties" file can be overridden
048: * by any XML file named "Content.xml" within reach of the
049: * <code>Locator</code> object, this configuration file requires
050: * the mappings to be in the form of wild card patterns.
051: * <code><pre>
052: *
053: * <?xml version="1.0" encoding="UTF-8"?>
054: * <content>
055: * <resolve match="*.jpg" type="image/jpeg">
056: * <resolve match="/gif/*" type="image/gif">
057: * </content>
058: *
059: * </pre></code>
060: * For example, taking the XML configuration file described above,
061: * this will match all files ending with the ".jpg" extension to
062: * the MIME type "image/jpeg". Also, all files within the "/gif/"
063: * folder will be considered GIF images with a MIME type of
064: * "image/gif". By default most of the common file extensions
065: * already have mappings, however overriding these can be useful.
066: *
067: * @author Niall Gallagher
068: *
069: * @see simple.http.serve.ContentResolver
070: * @see simple.util.parse.URIParser
071: * @see simple.util.parse.PathParser
072: */
073: final class FileIndexer implements Indexer {
074:
075: /**
076: * This is used to load the Content.properties file for the
077: * list of the matching MIME types and file extensions.
078: */
079: private static ResourceBundle content;
080:
081: static {
082: try {
083: content = ResourceBundle
084: .getBundle("simple.http.serve.Content");
085: } catch (MissingResourceException e) {
086: e.printStackTrace();
087: }
088: }
089:
090: /**
091: * This is used to extract any user specified MIME types.
092: */
093: private ContentResolver resolver;
094:
095: /**
096: * This is used to cache the meta information acquired.
097: */
098: private Cache cache;
099:
100: /**
101: * This will be used to fetch the real OS system paths.
102: */
103: private File base;
104:
105: /**
106: * Constructor for the <code>FileIndexer</code> object. This is
107: * used to create a centralized store for meta data. The meta
108: * data created by this is acquired from the context frequently,
109: * so in order to improve performance all indexes are cached,
110: * except those URI targets that contain query parameters.
111: *
112: * @param lookup this is used to load the configuration files
113: * @param base this is the root of the context that is used
114: */
115: public FileIndexer(Locator lookup, File base) {
116: this .resolver = new ContentResolver(lookup);
117: this .cache = new Cache(20, 100);
118: this .base = base;
119: }
120:
121: /**
122: * This is an all in one method that allows all the information
123: * on the target URI to be gathered at once. The motivation for
124: * this method is primarily convenience. However it is also used
125: * to increase the performance of the <code>FileIndexer</code>
126: * by using a cache of the most recently used indexes. This will
127: * help to reduce the amount or parsing and memory required.
128: *
129: * @param target this is the request URI that is to be parsed
130: *
131: * @return this is the index of meta data for the URI target
132: */
133: public Index getIndex(String target) {
134: Object data = cache.lookup(target);
135: Index index = (Index) data;
136:
137: if (index == null) {
138: index = getIndex(this , target);
139: }
140: return index;
141: }
142:
143: /**
144: * This is an all in one method that allows all the information
145: * on the target URI to be gathered at once. The motivation for
146: * this method is primarily convenience. However it is also used
147: * to increase the performance of the <code>FileIndexer</code>
148: * by using a cache of the most recently used indexes. This will
149: * help to reduce the amount or parsing and memory required.
150: * This is used as a convinience method for caching indexes.
151: *
152: * @param indexer this is typically the current indexer object
153: * @param target this is the request URI that is to be parsed
154: *
155: * @return this is the index of meta data for the URI target
156: */
157: public Index getIndex(Indexer indexer, String target) {
158: Index index = new FileIndex(indexer, target);
159:
160: if (target.indexOf('?') < 0) {
161: cache.cache(target, index);
162: }
163: return index;
164: }
165:
166: /**
167: * This is used to translate the HTTP request URI into the
168: * <code>File</code> object that it represents. This will convert
169: * the URI to a format that the system can use and then create
170: * the <code>File</code> object for that path. So if for example
171: * the context path was "c:\path" on a DOS system and the HTTP
172: * URI given was "/index.html" this returns the <code>File</code>
173: * "c:\path\index.html". This is basically for convenience as the
174: * same could be achieved using the <code>getRealPath</code> and
175: * then creating the <code>File</code> from that OS specific path.
176: *
177: * @param target this is the HTTP request URI path that is used
178: * to retrieve the <code>File</code> object
179: *
180: * @return returns the <code>File</code> for the given path
181: */
182: public File getFile(URI target) {
183: return getFile(target.getPath());
184: }
185:
186: /**
187: * This is used to translate the HTTP request URI into the
188: * <code>Path</code> object that it represents. This enables the
189: * HTTP request URI to be examined thoroughly an allows various
190: * other files to be examined relative to it. For example if the
191: * URI referenced a path "/usr/bin/file" and some resource
192: * in the same directory is required then the <code>Path</code>
193: * can be used to acquire the relative path. This is useful if
194: * links within a HTML page are to be dynamically generated. The
195: * <code>Path.getRelative</code> provides this functionality.
196: *
197: * @param target this is the HTTP request URI path that is used
198: * to retrieve the <code>Path</code> object
199: *
200: * @return returns the <code>Path</code> for the given path
201: */
202: public Path getPath(URI target) {
203: return target.getPath();
204: }
205:
206: /**
207: * This is used to translate the request URI path into the
208: * <code>File</code> object that it represents. This will convert
209: * the path to a format that the system can use and then create
210: * the <code>File</code> object for that path. So if for example
211: * the context path was "c:\path" on a DOS system and the request
212: * URI given was "/index.html" this returns the <code>File</code>
213: * "c:\path\index.html". This is basically for convenience as the
214: * same could be achieved using the <code>getRealPath</code> and
215: * then creating the <code>File</code> from that OS specific path.
216: *
217: * @param path this is the URI path that is used to retrieve the
218: * <code>File</code> object
219: *
220: * @return returns the <code>File</code> for the given path
221: */
222: private File getFile(Path path) {
223: return new File(base, path.toString().replace('/',
224: File.separatorChar));
225: }
226:
227: /**
228: * This will parse the HTTP request URI specified and return the
229: * <code>Locale</code> for that resource. The <code>Locale</code>
230: * is extracted from the target by examining the path segment of
231: * the HTTP request URI. The path segment is the abs_path token
232: * defined in RFC 2396. It is extracted from a second extension
233: * in the file name. So for example if the HTTP request URI was
234: * "http://some.host/usr;param=value/index.en_US.html" then the
235: * file name "index.en_US.html" would have the second file
236: * extension en_US converted into a <code>Locale</code>. This
237: * will not interfere if the file name was "compressed.tar.gz",
238: * it will simply ignore the "tar" second file extension and
239: * return <code>Locale.getDefault</code>.
240: *
241: * @param target the request URI to be parsed for its locale
242: *
243: * @return this will return the locale for the specified URI
244: */
245: public Locale getLocale(URI target) {
246: return getLocale(target.getPath());
247: }
248:
249: /**
250: * This will parse the request URI path specified and return the
251: * <code>Locale</code> for that resource. The <code>Locale</code>
252: * is extracted from the target by examining the path segment of
253: * the HTTP request URI. The path segment is the abs_path token
254: * defined in RFC 2396. It is extracted from a second extension
255: * in the file name. So for example if the HTTP request URI was
256: * "http://some.host/usr;param=value/index.en_US.html" then the
257: * file name "index.en_US.html" would have the second file
258: * extension en_US converted into a <code>Locale</code>. This
259: * will not interfere if the file name was "compressed.tar.gz",
260: * it will simply ignore the "tar" second file extension and
261: * return <code>Locale.getDefault</code>.
262: *
263: * @param path the path part of the request URI to have its
264: * locale determined
265: *
266: * @return this will return the locale for the specified path
267: */
268: private Locale getLocale(Path path) {
269: String place = path.getCountry();
270: String talk = path.getLanguage();
271:
272: if (talk == null) {
273: return Locale.getDefault();
274: } else if (place == null) {
275: return new Locale(talk);
276: }
277: return new Locale(talk, place);
278: }
279:
280: /**
281: * This method will extract the type attribute of this URI. The
282: * MIME type of the request URI is extracted from the name of the
283: * target. The name for the <code>Context</code> is the last path
284: * segment in the token defined by RFC 2396 as path_segments. So
285: * for example if the target was "some.host:8080/bin/index.html"
286: * then the name for that resource would be "index.html". Once
287: * the name has been extracted the MIME is defined by the file
288: * extension, which for the example is text/html. The MIME type
289: * mappings can be directly specified in a configuration file
290: * named either "Content.xml" or "content.xml".
291: *
292: * @param target the request URI to be parsed for its type
293: *
294: * @return the type of the given request URI path refers to
295: */
296: public String getContentType(URI target) {
297: return getContentType(target.getPath());
298: }
299:
300: /**
301: * This method will extract the type attribute of this path. The
302: * MIME type of the request path is extracted from the name of the
303: * target. The name for the <code>Context</code> is the last path
304: * segment in the token defined by RFC 2396 as path_segments. So
305: * for example if the target was "some.host:8080/bin/index.html"
306: * then the name for that resource would be "index.html". Once
307: * the name has been extracted the MIME is defined by the file
308: * extension, which for the example is text/html. The MIME type
309: * mappings can be directly specified in a configuration file
310: * named either "Content.xml" or "content.xml".
311: *
312: * @param path path that is to have its MIME type determined
313: *
314: * @return the type of the given resource path refers to
315: */
316: private String getContentType(Path path) {
317: String ext = path.getExtension();
318: String target = path.getPath();
319:
320: return getContentType(target, ext);
321: }
322:
323: /**
324: * This method will extract the type attribute of this path. The
325: * MIME type of the request path is extracted from the name of the
326: * target. The name for the <code>Context</code> is the last path
327: * segment is the token defined by RFC 2396 as path_segments. So
328: * for example if the target was "some.host:8080/bin/index.html"
329: * then the name for that resource would be "index.html". Once
330: * the name has been extracted the MIME is defined by the file
331: * extension, which for the example is text/html. The MIME type
332: * mappings can be directly specified in a configuration file
333: * named either "Content.xml" or "content.xml".
334: *
335: * @param path path that is to have its MIME type determined
336: * @param ext this is the file extension for the given path
337: *
338: * @return the type of the given resource path refers to
339: */
340: private String getContentType(String path, String ext) {
341: try {
342: String type = resolver.getContentType(path);
343:
344: if (type != null) {
345: return type;
346: } else if (ext != null) {
347: return content.getString(ext);
348: }
349: } catch (MissingResourceException e) {
350: }
351: return "application/octetstream";
352: }
353: }
|