001: /*
002:
003: Licensed to the Apache Software Foundation (ASF) under one or more
004: contributor license agreements. See the NOTICE file distributed with
005: this work for additional information regarding copyright ownership.
006: The ASF licenses this file to You under the Apache License, Version 2.0
007: (the "License"); you may not use this file except in compliance with
008: the License. You may obtain a copy of the License at
009:
010: http://www.apache.org/licenses/LICENSE-2.0
011:
012: Unless required by applicable law or agreed to in writing, software
013: distributed under the License is distributed on an "AS IS" BASIS,
014: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: See the License for the specific language governing permissions and
016: limitations under the License.
017:
018: */
019: package org.apache.batik.util;
020:
021: import java.net.MalformedURLException;
022: import java.net.URL;
023:
024: /**
025: * The default protocol handler this handles the most common
026: * protocols, such as 'file' 'http' 'ftp'.
027: * The parsing should be general enought to support most
028: * 'normal' URL formats, so in many cases
029: *
030: * @author <a href="mailto:deweese@apache.org">Thomas DeWeese</a>
031: * @version $Id: ParsedURLDefaultProtocolHandler.java 475477 2006-11-15 22:44:28Z cam $
032: */
033: public class ParsedURLDefaultProtocolHandler extends
034: AbstractParsedURLProtocolHandler {
035:
036: /**
037: * Default constructor sets no protocol so this becomes
038: * default handler.
039: */
040: public ParsedURLDefaultProtocolHandler() {
041: super (null);
042: }
043:
044: /**
045: * Subclass constructor allows subclasses to provide protocol,
046: * to be handled.
047: */
048: protected ParsedURLDefaultProtocolHandler(String protocol) {
049: super (protocol);
050: }
051:
052: /**
053: * Subclasses can override these method to construct alternate
054: * subclasses of ParsedURLData.
055: */
056: protected ParsedURLData constructParsedURLData() {
057: return new ParsedURLData();
058: }
059:
060: /**
061: * Subclasses can override these method to construct alternate
062: * subclasses of ParsedURLData.
063: * @param url the java.net.URL class we reference.
064: */
065: protected ParsedURLData constructParsedURLData(URL url) {
066: return new ParsedURLData(url);
067: }
068:
069: /**
070: * Parses the string and returns the results of parsing in the
071: * ParsedURLData object.
072: * @param urlStr the string to parse as a URL.
073: */
074: public ParsedURLData parseURL(String urlStr) {
075: try {
076: URL url = new URL(urlStr);
077: // System.err.println("System Parse: " + urlStr);
078: return constructParsedURLData(url);
079: } catch (MalformedURLException mue) {
080: // Built in URL wouldn't take it...
081: // mue.printStackTrace();
082: }
083:
084: // new Exception("Custom Parse: " + urlStr).printStackTrace();
085: // System.err.println("Custom Parse: " + urlStr);
086:
087: ParsedURLData ret = constructParsedURLData();
088:
089: if (urlStr == null)
090: return ret;
091:
092: int pidx = 0, idx;
093: int len = urlStr.length();
094:
095: // Pull fragement id off first...
096: idx = urlStr.indexOf('#');
097: ret.ref = null;
098: if (idx != -1) {
099: if (idx + 1 < len)
100: ret.ref = urlStr.substring(idx + 1);
101: urlStr = urlStr.substring(0, idx);
102: len = urlStr.length();
103: }
104:
105: if (len == 0)
106: return ret;
107:
108: // Protocol is only allowed to include -+.a-zA-Z
109: // So as soon as we hit something else we know we
110: // are done (if it is a ':' then we have protocol otherwise
111: // we don't.
112: idx = 0;
113: char ch = urlStr.charAt(idx);
114: while ((ch == '-') || (ch == '+') || (ch == '.')
115: || ((ch >= 'a') && (ch <= 'z'))
116: || ((ch >= 'A') && (ch <= 'Z'))) {
117: idx++;
118: if (idx == len) {
119: ch = 0;
120: break;
121: }
122: ch = urlStr.charAt(idx);
123: }
124:
125: if (ch == ':') {
126: // Has a protocol spec...
127: ret.protocol = urlStr.substring(pidx, idx).toLowerCase();
128: pidx = idx + 1; // Skip ':'
129: }
130:
131: // See if we have host/port spec.
132: idx = urlStr.indexOf('/');
133: if ((idx == -1)
134: || ((pidx + 2 < len) && (urlStr.charAt(pidx) == '/') && (urlStr
135: .charAt(pidx + 1) == '/'))) {
136: // No slashes (apache.org) or a double slash
137: // (//apache.org/....) so
138: // we should have host[:port] before next slash.
139: if (idx != -1)
140: pidx += 2; // Skip double slash...
141:
142: idx = urlStr.indexOf('/', pidx); // find end of host:Port spec
143: String hostPort;
144: if (idx == -1)
145: // Just host and port nothing following...
146: hostPort = urlStr.substring(pidx);
147: else
148: // Path spec follows...
149: hostPort = urlStr.substring(pidx, idx);
150:
151: int hidx = idx; // Remember location of '/'
152:
153: // pull apart host and port number...
154: idx = hostPort.indexOf(':');
155: ret.port = -1;
156: if (idx == -1) {
157: // Just Host...
158: if (hostPort.length() == 0)
159: ret.host = null;
160: else
161: ret.host = hostPort;
162: } else {
163: // Host and port
164: if (idx == 0)
165: ret.host = null;
166: else
167: ret.host = hostPort.substring(0, idx);
168:
169: if (idx + 1 < hostPort.length()) {
170: String portStr = hostPort.substring(idx + 1);
171: try {
172: ret.port = Integer.parseInt(portStr);
173: } catch (NumberFormatException nfe) {
174: // bad port leave as '-1'
175: }
176: }
177: }
178: if (((ret.host == null) || (ret.host.indexOf('.') == -1))
179: && (ret.port == -1))
180: // no '.' in a host spec??? and no port, probably
181: // just a path.
182: ret.host = null;
183: else
184: pidx = hidx;
185: }
186:
187: if ((pidx == -1) || (pidx >= len))
188: return ret; // Nothing follows
189:
190: ret.path = urlStr.substring(pidx);
191: return ret;
192: }
193:
194: public static String unescapeStr(String str) {
195: int idx = str.indexOf('%');
196: if (idx == -1)
197: return str; // quick out..
198:
199: int prev = 0;
200: StringBuffer ret = new StringBuffer();
201: while (idx != -1) {
202: if (idx != prev)
203: ret.append(str.substring(prev, idx));
204:
205: if (idx + 2 >= str.length())
206: break;
207: prev = idx + 3;
208: idx = str.indexOf('%', prev);
209:
210: int ch1 = charToHex(str.charAt(idx + 1));
211: int ch2 = charToHex(str.charAt(idx + 1));
212: if ((ch1 == -1) || (ch2 == -1))
213: continue;
214: ret.append((char) (ch1 << 4 | ch2));
215: }
216:
217: return ret.toString();
218: }
219:
220: public static int charToHex(int ch) {
221: switch (ch) {
222: case '0':
223: case '1':
224: case '2':
225: case '3':
226: case '4':
227: case '5':
228: case '6':
229: case '7':
230: case '8':
231: case '9':
232: return ch - '0';
233: case 'a':
234: case 'A':
235: return 10;
236: case 'b':
237: case 'B':
238: return 11;
239: case 'c':
240: case 'C':
241: return 12;
242: case 'd':
243: case 'D':
244: return 13;
245: case 'e':
246: case 'E':
247: return 14;
248: case 'f':
249: case 'F':
250: return 15;
251: default:
252: return -1;
253: }
254: }
255:
256: /**
257: * Parses the string as a sub URL of baseURL, and returns the
258: * results of parsing in the ParsedURLData object.
259: * @param baseURL the base url for parsing.
260: * @param urlStr the string to parse as a URL.
261: */
262: public ParsedURLData parseURL(ParsedURL baseURL, String urlStr) {
263: // Reference to same document (including fragment, and query).
264: if (urlStr.length() == 0)
265: return baseURL.data;
266:
267: // System.err.println("Base: " + baseURL + "\n" +
268: // "Sub: " + urlStr);
269:
270: int idx = 0, len = urlStr.length();
271: if (len == 0)
272: return baseURL.data;
273:
274: // Protocol is only allowed to include -+.a-zA-Z
275: // So as soon as we hit something else we know we
276: // are done (if it is a ':' then we have protocol otherwise
277: // we don't.
278: char ch = urlStr.charAt(idx);
279: while ((ch == '-') || (ch == '+') || (ch == '.')
280: || ((ch >= 'a') && (ch <= 'z'))
281: || ((ch >= 'A') && (ch <= 'Z'))) {
282: idx++;
283: if (idx == len) {
284: ch = 0;
285: break;
286: }
287: ch = urlStr.charAt(idx);
288: }
289: String protocol = null;
290: if (ch == ':') {
291: // Has a protocol spec...
292: protocol = urlStr.substring(0, idx).toLowerCase();
293: }
294:
295: if (protocol != null) {
296: // Temporary if we have a protocol then assume absolute
297: // URL. Technically this is the correct handling but much
298: // software supports relative URLs with a protocol that
299: // matches the base URL's protocol.
300: // if (true)
301: // return parseURL(urlStr);
302: if (!protocol.equals(baseURL.getProtocol()))
303: // Different protocols, assume absolute URL ignore base...
304: return parseURL(urlStr);
305:
306: // Same protocols, if char after ':' is a '/' then it's
307: // still absolute...
308: idx++;
309: if (idx == urlStr.length())
310: // Just a Protocol???
311: return parseURL(urlStr);
312:
313: if (urlStr.charAt(idx) == '/')
314: // Absolute URL...
315: return parseURL(urlStr);
316:
317: // Still relative just drop the protocol (we will pick it
318: // back up from the baseURL later...).
319: urlStr = urlStr.substring(idx);
320: }
321:
322: if (urlStr.startsWith("/")) {
323: if ((urlStr.length() > 1) && (urlStr.charAt(1) == '/')) {
324: // Relative but only uses protocol from base
325: return parseURL(baseURL.getProtocol() + ":" + urlStr);
326: }
327: // Relative 'absolute' path, uses protocol and authority
328: // (host) from base
329: return parseURL(baseURL.getPortStr() + urlStr);
330: }
331:
332: if (urlStr.startsWith("#")) {
333: String base = baseURL.getPortStr();
334: if (baseURL.getPath() != null)
335: base += baseURL.getPath();
336: return parseURL(base + urlStr);
337: }
338:
339: String path = baseURL.getPath();
340: // No path? well we will treat this as being relative to it's self.
341: if (path == null)
342: path = "";
343: idx = path.lastIndexOf('/');
344: if (idx == -1)
345: // baseURL is just a filename (in current dir) so use current dir
346: // as base of new URL.
347: path = "";
348: else
349: path = path.substring(0, idx + 1);
350:
351: // System.err.println("Base Path: " + path);
352: // System.err.println("Base PortStr: " + baseURL.getPortStr());
353: return parseURL(baseURL.getPortStr() + path + urlStr);
354: }
355: }
|