Source Code Cross Referenced for URI.java in  » Portal » Open-Portal » com » sun » portal » rewriter » util » uri » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Portal » Open Portal » com.sun.portal.rewriter.util.uri 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
0003:         * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
0004:         */
0005:
0006:        package com.sun.portal.rewriter.util.uri;
0007:
0008:        import java.io.Serializable;
0009:
0010:        public final class URI implements  Comparable, Serializable {
0011:            private transient String scheme; // null ==> relative URI
0012:            private transient String fragment;
0013:            private transient String authority; // Registry or server
0014:            private transient String userInfo;
0015:            private transient String host; // null ==> registry-based
0016:            private transient int port = -1; // -1 ==> undefined
0017:            private transient String path; // null ==> opaque
0018:            private transient String query;
0019:            private volatile transient String schemeSpecificPart;
0020:            private volatile transient int hash; // Zero ==> undefined
0021:            private int length = 16;
0022:            private volatile String string; // The only serializable field
0023:
0024:            private URI() {
0025:            }//constructor
0026:
0027:            public URI(final String str) throws URISyntaxException {
0028:                length = str.length();
0029:                Parser lParser = new Parser(str);
0030:                lParser.parse(false);
0031:            }//constructor
0032:
0033:            public URI parseServerAuthority() throws URISyntaxException {
0034:                if ((host != null) || (authority == null)) {
0035:                    return this ;
0036:                }
0037:
0038:                defineString();
0039:                new Parser(string).parse(true);
0040:                return this ;
0041:            }//parseServerAuthority()
0042:
0043:            public URI normalize() {
0044:                return normalize(this );
0045:            }//normalize()
0046:
0047:            public URI resolve(final URI uri) {
0048:                return resolve(this , uri);
0049:            }//resolve()
0050:
0051:            public String getScheme() {
0052:                return scheme;
0053:            }//getScheme()
0054:
0055:            public boolean isAbsolute() {
0056:                return scheme != null;
0057:            }//isAbsolute()
0058:
0059:            public boolean isOpaque() {
0060:                return path == null;
0061:            }//isOpaque()
0062:
0063:            public String getAuthority() {
0064:                return authority;
0065:            }//getAuthority()
0066:
0067:            public String getUserInfo() {
0068:                return userInfo;
0069:            }//getUserInfo()
0070:
0071:            public String getHost() {
0072:                return host;
0073:            }//getHost()
0074:
0075:            public int getPort() {
0076:                return port;
0077:            }//getPort()
0078:
0079:            public String getPath() {
0080:                return path;
0081:            }//getPath()
0082:
0083:            public String getQuery() {
0084:                return query;
0085:            }//getQuery()
0086:
0087:            public String getFragment() {
0088:                return fragment;
0089:            }//getFragment()
0090:
0091:            public boolean equals(final Object ob) {
0092:                if (ob == this ) {
0093:                    return true;
0094:                }
0095:                if (!(ob instanceof  URI)) {
0096:                    return false;
0097:                }
0098:                URI that = (URI) ob;
0099:                if (this .isOpaque() != that.isOpaque()) {
0100:                    return false;
0101:                }
0102:
0103:                if (!equalIgnoringCase(this .scheme, that.scheme)) {
0104:                    return false;
0105:                }
0106:
0107:                if (!equal(this .fragment, that.fragment)) {
0108:                    return false;
0109:                }
0110:
0111:                // Opaque
0112:                if (this .isOpaque()) {
0113:                    return equal(this .schemeSpecificPart,
0114:                            that.schemeSpecificPart);
0115:                }
0116:
0117:                // Hierarchical
0118:                if (!equal(this .path, that.path)) {
0119:                    return false;
0120:                }
0121:
0122:                if (!equal(this .query, that.query)) {
0123:                    return false;
0124:                }
0125:
0126:                // Authorities
0127:                if (this .authority == that.authority) {
0128:                    return true;
0129:                }
0130:
0131:                if (this .host != null) {
0132:                    // Server-based
0133:                    if (!equal(this .userInfo, that.userInfo)) {
0134:                        return false;
0135:                    }
0136:
0137:                    if (!equalIgnoringCase(this .host, that.host)) {
0138:                        return false;
0139:                    }
0140:
0141:                    if (this .port != that.port) {
0142:                        return false;
0143:                    }
0144:                } else if (this .authority != null) {
0145:                    // Registry-based
0146:                    if (!equal(this .authority, that.authority)) {
0147:                        return false;
0148:                    }
0149:                } else if (this .authority != that.authority) {
0150:                    return false;
0151:                }
0152:
0153:                return true;
0154:            }//equals()
0155:
0156:            public int hashCode() {
0157:                if (hash != 0) {
0158:                    return hash;
0159:                }
0160:
0161:                int h = hashIgnoringCase(0, scheme);
0162:                h = hash(h, fragment);
0163:                if (isOpaque()) {
0164:                    h = hash(h, schemeSpecificPart);
0165:                } else {
0166:                    h = hash(h, path);
0167:                    h = hash(h, query);
0168:                    if (host != null) {
0169:                        h = hash(h, userInfo);
0170:                        h = hashIgnoringCase(h, host);
0171:                        h += 1949 * port;
0172:                    } else {
0173:                        h = hash(h, authority);
0174:                    }
0175:                }
0176:                hash = h;
0177:                return h;
0178:            }//hashCode()
0179:
0180:            public int compareTo(final Object ob) {
0181:                final URI that = (URI) ob;
0182:                int c;
0183:
0184:                if ((c = compareIgnoringCase(this .scheme, that.scheme)) != 0)
0185:                    return c;
0186:
0187:                if (this .isOpaque()) {
0188:                    if (that.isOpaque()) {
0189:                        // Both opaque
0190:                        if ((c = compare(this .schemeSpecificPart,
0191:                                that.schemeSpecificPart)) != 0)
0192:                            return c;
0193:                        return compare(this .fragment, that.fragment);
0194:                    }
0195:                    return +1; // Opaque > hierarchical
0196:                } else if (that.isOpaque()) {
0197:                    return -1; // Hierarchical < opaque
0198:                }
0199:
0200:                // Hierarchical
0201:                if ((this .host != null) && (that.host != null)) {
0202:                    // Both server-based
0203:                    if ((c = compare(this .userInfo, that.userInfo)) != 0)
0204:                        return c;
0205:                    if ((c = compareIgnoringCase(this .host, that.host)) != 0)
0206:                        return c;
0207:                    if ((c = this .port - that.port) != 0)
0208:                        return c;
0209:                } else {
0210:                    // If one or both authorities are registry-based then we simply
0211:                    // compare them in the usual, case-sensitive way.  If one is
0212:                    // registry-based and one is server-based then the strings are
0213:                    // guaranteed to be unequal, hence the comparison will never return
0214:                    // zero and the compareTo and equals methods will remain
0215:                    // consistent.
0216:                    if ((c = compare(this .authority, that.authority)) != 0)
0217:                        return c;
0218:                }
0219:
0220:                if ((c = compare(this .path, that.path)) != 0) {
0221:                    return c;
0222:                }
0223:
0224:                if ((c = compare(this .query, that.query)) != 0) {
0225:                    return c;
0226:                }
0227:
0228:                return compare(this .fragment, that.fragment);
0229:            }//compareTo()
0230:
0231:            public String toString() {
0232:                defineString();
0233:                return string;
0234:            }
0235:
0236:            private static int toLower(final char c) {
0237:                if ((c >= 'A') && (c <= 'Z')) {
0238:                    return c + ('a' - 'A');
0239:                }
0240:
0241:                return c;
0242:            }//toLower()
0243:
0244:            private static boolean equal(final String s, final String t) {
0245:                if (s == t) {
0246:                    return true;
0247:                }
0248:
0249:                if ((s != null) && (t != null)) {
0250:                    if (s.length() != t.length()) {
0251:                        return false;
0252:                    }
0253:
0254:                    if (s.indexOf('%') < 0) {
0255:                        return s.equals(t);
0256:                    }
0257:
0258:                    int n = s.length();
0259:                    for (int i = 0; i < n;) {
0260:                        char c = s.charAt(i);
0261:                        char d = t.charAt(i);
0262:                        if (c != '%') {
0263:                            if (c != d) {
0264:                                return false;
0265:                            }
0266:
0267:                            i++;
0268:                            continue;
0269:                        }
0270:                        i++;
0271:                        if (toLower(s.charAt(i)) != toLower(t.charAt(i))) {
0272:                            return false;
0273:                        }
0274:
0275:                        i++;
0276:                        if (toLower(s.charAt(i)) != toLower(t.charAt(i))) {
0277:                            return false;
0278:                        }
0279:
0280:                        i++;
0281:                    }
0282:                    return true;
0283:                }
0284:                return false;
0285:            }//equal()
0286:
0287:            // US-ASCII only
0288:            private static boolean equalIgnoringCase(final String s,
0289:                    final String t) {
0290:                if (s == t) {
0291:                    return true;
0292:                }
0293:
0294:                if ((s != null) && (t != null)) {
0295:                    int n = s.length();
0296:                    if (t.length() != n) {
0297:                        return false;
0298:                    }
0299:
0300:                    for (int i = 0; i < n; i++) {
0301:                        if (toLower(s.charAt(i)) != toLower(t.charAt(i))) {
0302:                            return false;
0303:                        }
0304:                    }
0305:                    return true;
0306:                }
0307:                return false;
0308:            }//equalIgnoringCase()
0309:
0310:            private static int hash(final int hash, final String s) {
0311:                if (s == null) {
0312:                    return hash;
0313:                }
0314:
0315:                return hash * 127 + s.hashCode();
0316:            }//hash()
0317:
0318:            // US-ASCII only
0319:            private static int hashIgnoringCase(final int hash, final String s) {
0320:                if (s == null) {
0321:                    return hash;
0322:                }
0323:
0324:                int h = hash;
0325:                int n = s.length();
0326:                for (int i = 0; i < n; i++) {
0327:                    h = 31 * h + toLower(s.charAt(i));
0328:                }
0329:
0330:                return h;
0331:            }//hashIgnoringCase()
0332:
0333:            private static int compare(final String s, final String t) {
0334:                if (s == t) {
0335:                    return 0;
0336:                }
0337:
0338:                if (s != null) {
0339:                    if (t != null) {
0340:                        return s.compareTo(t);
0341:                    }
0342:
0343:                    else {
0344:                        return -1;
0345:                    }
0346:                } else {
0347:                    return +1;
0348:                }
0349:            }//compare()
0350:
0351:            // US-ASCII only
0352:            private static int compareIgnoringCase(final String s,
0353:                    final String t) {
0354:                if (s == t)
0355:                    return 0;
0356:                if (s != null) {
0357:                    if (t != null) {
0358:                        int sn = s.length();
0359:                        int tn = t.length();
0360:                        int n = sn < tn ? sn : tn;
0361:                        for (int i = 0; i < n; i++) {
0362:                            int c = toLower(s.charAt(i)) - toLower(t.charAt(i));
0363:                            if (c != 0)
0364:                                return c;
0365:                        }
0366:                        return sn - tn;
0367:                    }
0368:                    return +1;
0369:                } else {
0370:                    return -1;
0371:                }
0372:            }
0373:
0374:            private static void appendAuthority(final StringBuffer sb,
0375:                    final String authority, final String userInfo,
0376:                    final String host, final int port) {
0377:                if (host != null) {
0378:                    sb.append("//");
0379:                    if (userInfo != null) {
0380:                        sb.append(quote(userInfo, L_USERINFO, H_USERINFO));
0381:                        sb.append('@');
0382:                    }
0383:                    boolean needBrackets = ((host.indexOf(':') >= 0)
0384:                            && !host.startsWith("[") && !host.endsWith("]"));
0385:                    if (needBrackets)
0386:                        sb.append('[');
0387:                    sb.append(host);
0388:                    if (needBrackets)
0389:                        sb.append(']');
0390:                    if (port != -1) {
0391:                        sb.append(':');
0392:                        sb.append(port);
0393:                    }
0394:                } else if (authority != null) {
0395:                    sb.append("//");
0396:                    sb.append(quote(authority, L_REG_NAME | L_SERVER,
0397:                            H_REG_NAME | H_SERVER));
0398:                }
0399:            }
0400:
0401:            private void appendSchemeSpecificPart(final StringBuffer sb,
0402:                    final String opaquePart, final String authority,
0403:                    final String userInfo, final String host, final int port,
0404:                    final String path, final String query) {
0405:                if (opaquePart != null) {
0406:                    sb.append(quote(opaquePart, L_URIC, H_URIC));
0407:                } else {
0408:                    appendAuthority(sb, authority, userInfo, host, port);
0409:                    if (path != null) {
0410:                        sb.append(quote(path, L_PATH, H_PATH));
0411:                    }
0412:
0413:                    if (query != null) {
0414:                        sb.append('?');
0415:                        sb.append(quote(query, L_URIC, H_URIC));
0416:                    }
0417:                }
0418:            }
0419:
0420:            private static void appendFragment(final StringBuffer sb,
0421:                    final String fragment) {
0422:                if (fragment != null) {
0423:                    sb.append('#');
0424:                    sb.append(quote(fragment, L_URIC, H_URIC));
0425:                }
0426:            }
0427:
0428:            private String toString(final String scheme,
0429:                    final String opaquePart, final String authority,
0430:                    final String userInfo, final String host, final int port,
0431:                    final String path, final String query, final String fragment) {
0432:                final StringBuffer sb = new StringBuffer(length);
0433:                if (scheme != null) {
0434:                    sb.append(scheme);
0435:                    sb.append(':');
0436:                }
0437:                appendSchemeSpecificPart(sb, opaquePart, authority, userInfo,
0438:                        host, port, path, query);
0439:                appendFragment(sb, fragment);
0440:                return sb.toString();
0441:            }
0442:
0443:            private void defineString() {
0444:                if (string != null)
0445:                    return;
0446:                string = toString(scheme, isOpaque() ? schemeSpecificPart
0447:                        : null, authority, userInfo, host, port, path, query,
0448:                        fragment);
0449:            }
0450:
0451:            // -- Normalization, resolution, and relativization --
0452:            // RFC2396 5.2 (6)
0453:            private static String resolvePath(final String base,
0454:                    final String child, final boolean absolute) {
0455:                final int i = base.lastIndexOf('/');
0456:                final int cn = child.length();
0457:                String path = "";
0458:
0459:                if (cn == 0) {
0460:                    // 5.2 (6a)
0461:                    if (i >= 0) {
0462:                        path = base.substring(0, i + 1);
0463:                    }
0464:                } else {
0465:                    StringBuffer sb = new StringBuffer(child.length() + i + 1);
0466:                    // 5.2 (6a)
0467:                    if (i >= 0) {
0468:                        sb.append(base.substring(0, i + 1));
0469:                    }
0470:                    // 5.2 (6b)
0471:                    sb.append(child);
0472:                    path = sb.toString();
0473:                }
0474:
0475:                // 5.2 (6c-f)
0476:                final String np = normalize(path);
0477:
0478:                // 5.2 (6g): If the result is absolute but the path begins with "../",
0479:                // then we simply leave the path as-is
0480:
0481:                return np;
0482:            }//resolvePath()
0483:
0484:            // RFC2396 5.2
0485:            private static URI resolve(final URI base, final URI child) {
0486:                if (base.isOpaque() || child.isOpaque()) {
0487:                    return child;
0488:                }
0489:
0490:                // 5.2 (2): Reference to current document (lone fragment)
0491:                if ((child.scheme == null) && (child.authority == null)
0492:                        && child.path.equals("") && (child.fragment != null)
0493:                        && (child.query == null)) {
0494:                    if ((base.fragment != null)
0495:                            && child.fragment.equals(base.fragment)) {
0496:                        return base;
0497:                    }
0498:                    URI ru = new URI();
0499:                    ru.scheme = base.scheme;
0500:                    ru.authority = base.authority;
0501:                    ru.userInfo = base.userInfo;
0502:                    ru.host = base.host;
0503:                    ru.port = base.port;
0504:                    ru.path = base.path;
0505:                    ru.fragment = child.fragment;
0506:                    ru.query = base.query;
0507:                    return ru;
0508:                }
0509:
0510:                // 5.2 (3): Child is absolute
0511:                if (child.scheme != null) {
0512:                    return child;
0513:                }
0514:
0515:                URI ru = new URI(); // Resolved URI
0516:                ru.scheme = base.scheme;
0517:                ru.query = child.query;
0518:                ru.fragment = child.fragment;
0519:
0520:                // 5.2 (4): Authority
0521:                if (child.authority == null) {
0522:                    ru.authority = base.authority;
0523:                    ru.host = base.host;
0524:                    ru.userInfo = base.userInfo;
0525:                    ru.port = base.port;
0526:
0527:                    String cp = (child.path == null) ? "" : child.path;
0528:                    if ((cp.length() > 0) && (cp.charAt(0) == '/')) {
0529:                        // 5.2 (5): Child path is absolute
0530:                        ru.path = child.path;
0531:                    } else {
0532:                        // 5.2 (6): Resolve relative path
0533:                        ru.path = resolvePath(base.path, cp, base.isAbsolute());
0534:                    }
0535:                } else {
0536:                    ru.authority = child.authority;
0537:                    ru.host = child.host;
0538:                    ru.userInfo = child.userInfo;
0539:                    ru.host = child.host;
0540:                    ru.port = child.port;
0541:                    ru.path = child.path;
0542:                }
0543:
0544:                // 5.2 (7): Recombine (nothing to do here)
0545:                return ru;
0546:            }
0547:
0548:            // If the given URI's path is normal then return the URI;
0549:            // o.w., return a new URI containing the normalized path.
0550:            private static URI normalize(final URI u) {
0551:                if (u.isOpaque() || (u.path == null) || (u.path.length() == 0))
0552:                    return u;
0553:
0554:                String np = normalize(u.path);
0555:                if (np == u.path)
0556:                    return u;
0557:
0558:                URI v = new URI();
0559:                v.scheme = u.scheme;
0560:                v.fragment = u.fragment;
0561:                v.authority = u.authority;
0562:                v.userInfo = u.userInfo;
0563:                v.host = u.host;
0564:                v.port = u.port;
0565:                v.path = np;
0566:                v.query = u.query;
0567:                return v;
0568:            }
0569:
0570:            private static int needsNormalization(final String path) {
0571:                boolean normal = true;
0572:                int ns = 0; // Number of segments
0573:                final int end = path.length() - 1; // Index of last char in path
0574:                int p = 0; // Index of next char in path
0575:
0576:                // Skip initial slashes
0577:                while (p <= end) {
0578:                    if (path.charAt(p) != '/') {
0579:                        break;
0580:                    }
0581:
0582:                    p++;
0583:                }
0584:                if (p > 1) {
0585:                    normal = false;
0586:                }
0587:
0588:                // Scan segments
0589:                while (p <= end) {
0590:                    if ((path.charAt(p) == '.')
0591:                            && ((p == end) || ((path.charAt(p + 1) == '/') || ((path
0592:                                    .charAt(p + 1) == '.') && ((p + 1 == end) || (path
0593:                                    .charAt(p + 2) == '/')))))) {
0594:                        normal = false;
0595:                    }
0596:                    ns++;
0597:
0598:                    while (p <= end) {
0599:                        if (path.charAt(p++) != '/') {
0600:                            continue;
0601:                        }
0602:
0603:                        while (p <= end) {
0604:                            if (path.charAt(p) != '/') {
0605:                                break;
0606:                            }
0607:
0608:                            normal = false;
0609:                            p++;
0610:                        }
0611:
0612:                        break;
0613:                    }
0614:                }
0615:
0616:                return normal ? -1 : ns;
0617:            }//needsNormalization()
0618:
0619:            private static void split(final char[] path, final int[] segs) {
0620:                final int end = path.length - 1; // Index of last char in path
0621:                int p = 0; // Index of next char in path
0622:                int i = 0; // Index of current segment
0623:
0624:                // Skip initial slashes
0625:                while (p <= end) {
0626:                    if (path[p] != '/') {
0627:                        break;
0628:                    }
0629:                    path[p] = '\0';
0630:                    p++;
0631:                }
0632:
0633:                while (p <= end) {
0634:                    // Note start of segment
0635:                    segs[i++] = p++;
0636:
0637:                    // Find beginning of next segment
0638:                    while (p <= end) {
0639:                        if (path[p++] != '/') {
0640:                            continue;
0641:                        }
0642:
0643:                        path[p - 1] = '\0';
0644:
0645:                        // Skip redundant slashes
0646:                        while (p <= end) {
0647:                            if (path[p] != '/') {
0648:                                break;
0649:                            }
0650:                            path[p++] = '\0';
0651:                        }
0652:                        break;
0653:                    }
0654:                }
0655:
0656:                if (i != segs.length) {
0657:                    throw new InternalError(); // ASSERT
0658:                }
0659:            }//split()
0660:
0661:            // Join the segments in the given path according to the given segment-index
0662:            // array, ignoring those segments whose index entries have been set to -1,
0663:            // and inserting slashes as needed.  Return the length of the resulting
0664:            // path.
0665:            //
0666:            // Preconditions:
0667:            //   segs[i] == -1 implies segment i is to be ignored
0668:            //   path computed by split, as above, with '\0' having replaced '/'
0669:            //
0670:            // Postconditions:
0671:            //   path[0] .. path[return value] == Resulting path
0672:            private static int join(final char[] path, final int[] segs) {
0673:                final int ns = segs.length; // Number of segments
0674:                final int end = path.length - 1; // Index of last char in path
0675:                int p = 0; // Index of next path char to write
0676:
0677:                if (path[p] == '\0') {
0678:                    // Restore initial slash for absolute paths
0679:                    path[p++] = '/';
0680:                }
0681:
0682:                for (int i = 0; i < ns; i++) {
0683:                    int q = segs[i]; // Current segment
0684:                    if (q == -1) {
0685:                        // Ignore this segment
0686:                        continue;
0687:                    }
0688:
0689:                    if (p == q) {
0690:                        // We're already at this segment, so just skip to its end
0691:                        while ((p <= end) && (path[p] != '\0')) {
0692:                            p++;
0693:                        }
0694:
0695:                        if (p <= end) {
0696:                            // Preserve trailing slash
0697:                            path[p++] = '/';
0698:
0699:                            //nag fix for test case for path in url:
0700:                            //"https://www.vlc.com.au/http://www.vlc.com.au/abc.html"
0701:                            //after normalization path was /http:/www.vlc.com.au/abc.html as
0702:                            //oppsed to /http://www.vlc.com.au/abc.html - observer :// after protocol
0703:                            if (!(p > end) && (path[p] == '\0')) {
0704:                                path[p++] = '/';
0705:                            }
0706:                        }
0707:                    } else if (p < q) {
0708:                        // Copy q down to p
0709:                        while ((q <= end) && (path[q] != '\0')) {
0710:                            path[p++] = path[q++];
0711:                        }
0712:
0713:                        if (q <= end) {
0714:                            // Preserve trailing slash
0715:                            path[p++] = '/';
0716:                            //nag fix for test case for path in url:
0717:                            //"https://www.vlc.com.au/../http://www.vlc.com.au/abc.html"
0718:                            //after normalization path was ../http:/www.vlc.com.au/abc.html as
0719:                            //oppsed to ../http://www.vlc.com.au/abc.html - observer :// after protocol
0720:                            if ((q + 1 <= end) && (path[q + 1] == '\0')) {
0721:                                path[p++] = '/';
0722:                            }
0723:                        }
0724:                    } else {
0725:                        throw new InternalError(); // ASSERT false
0726:                    }
0727:                }
0728:
0729:                return p;
0730:            }//join()
0731:
0732:            // Remove "." segments from the given path, and remove segment pairs
0733:            // consisting of a non-".." segment followed by a ".." segment.
0734:            private static void removeDots(final char[] path, final int[] segs) {
0735:                final int ns = segs.length;
0736:                final int end = path.length - 1;
0737:
0738:                for (int i = 0; i < ns; i++) {
0739:                    int dots = 0; // Number of dots found (0, 1, or 2)
0740:
0741:                    // Find next occurrence of "." or ".."
0742:                    do {
0743:                        int p = segs[i];
0744:                        if (path[p] == '.') {
0745:                            if (p == end) {
0746:                                dots = 1;
0747:                                break;
0748:                            } else if (path[p + 1] == '\0') {
0749:                                dots = 1;
0750:                                break;
0751:                            } else if ((path[p + 1] == '.')
0752:                                    && ((p + 1 == end) || (path[p + 2] == '\0'))) {
0753:                                dots = 2;
0754:                                break;
0755:                            }
0756:                        }
0757:                        i++;
0758:                    } while (i < ns);
0759:
0760:                    if ((i > ns) || (dots == 0)) {
0761:                        break;
0762:                    }
0763:
0764:                    if (dots == 1) {
0765:                        // Remove this occurrence of "."
0766:                        segs[i] = -1;
0767:                    } else {
0768:                        // If there is a preceding non-".." segment, remove both that
0769:                        // segment and this occurrence of ".."; otherwise, leave this
0770:                        // ".." segment as-is.
0771:                        int j;
0772:                        for (j = i - 1; j >= 0; j--) {
0773:                            if (segs[j] != -1) {
0774:                                break;
0775:                            }
0776:                        }
0777:
0778:                        if (j >= 0) {
0779:                            int q = segs[j];
0780:                            if (!((path[q] == '.') && (path[q + 1] == '.') && (path[q + 2] == '\0'))) {
0781:                                segs[i] = -1;
0782:                                segs[j] = -1;
0783:                            }
0784:                        }
0785:                    }
0786:                }//for loop
0787:            }//removeDots()
0788:
0789:            // DEVIATION: If the normalized path is relative, and if the first
0790:            // segment could be parsed as a scheme name, then prepend a "." segment
0791:            private static void maybeAddLeadingDot(final char[] path,
0792:                    final int[] segs) {
0793:
0794:                if (path[0] == '\0') {
0795:                    // The path is absolute
0796:                    return;
0797:                }
0798:
0799:                int ns = segs.length;
0800:                int f = 0; // Index of first segment
0801:                while (f < ns) {
0802:                    if (segs[f] >= 0) {
0803:                        break;
0804:                    }
0805:
0806:                    f++;
0807:                }
0808:
0809:                if ((f >= ns) || (f == 0)) {
0810:                    // The path is empty, or else the original first segment survived,
0811:                    // in which case we already know that no leading "." is needed
0812:                    return;
0813:                }
0814:
0815:                int p = segs[f];
0816:                boolean exception = false;
0817:
0818:                try {
0819:                    while ((path[p] != ':') && (path[p] != '\0')) {
0820:                        p++;
0821:                    }
0822:                } catch (Exception e) {
0823:                    exception = true;
0824:                }
0825:
0826:                if (exception || path[p] == '\0') {
0827:                    // No colon in first segment, so no "." needed
0828:                    return;
0829:                }
0830:
0831:                // At this point we know that the first segment is unused,
0832:                // hence we can insert a "." segment at that position
0833:                path[0] = '.';
0834:                path[1] = '\0';
0835:                segs[0] = 0;
0836:            }//maybeAddLeadingDot()
0837:
0838:            // Normalize the given path string.  A normal path string has no empty
0839:            // segments (i.e., occurrences of "//"), no segments equal to ".", and no
0840:            // segments equal to ".." that are preceded by a segment not equal to "..".
0841:            // In contrast to Unix-style pathname normalization, for URI paths we
0842:            // always retain trailing slashes.
0843:            public static String normalize(final String ps) {
0844:
0845:                // Does this path need normalization?
0846:                final int ns = needsNormalization(ps); // Number of segments
0847:                if (ns < 0) {
0848:                    // Nope -- just return it
0849:                    return ps;
0850:                }
0851:
0852:                char[] path = ps.toCharArray(); // Path in char-array form
0853:
0854:                // Split path into segments
0855:                int[] segs = new int[ns]; // Segment-index array
0856:                split(path, segs);
0857:
0858:                // Remove dots
0859:                removeDots(path, segs);
0860:
0861:                // Prevent scheme-name confusion
0862:                maybeAddLeadingDot(path, segs);
0863:
0864:                // Join the remaining segments and return the result
0865:                return new String(path, 0, join(path, segs));
0866:            }
0867:
0868:            // -- Character classes for parsing --
0869:
0870:            // RFC2396 precisely specifies which characters in the US-ASCII charset are
0871:            // permissible in the various components of a URI reference.  We here
0872:            // define a set of mask pairs to aid in enforcing these restrictions.  Each
0873:            // mask pair consists of two longs, a low mask and a high mask.  Taken
0874:            // together they represent a 128-bit mask, where bit i is set iff the
0875:            // character with value i is permitted.
0876:            //
0877:            // This approach is more efficient than sequentially searching arrays of
0878:            // permitted characters.  It could be made still more efficient by
0879:            // precompiling the mask information so that a character's presence in a
0880:            // given mask could be determined by a single table lookup.
0881:            // Compute the low-order mask for the characters in the given string
0882:            private static long lowMask(final String chars) {
0883:                final int n = chars.length();
0884:                long m = 0;
0885:                for (int i = 0; i < n; i++) {
0886:                    char c = chars.charAt(i);
0887:                    if (c < 64)
0888:                        m |= (1L << c);
0889:                }
0890:                return m;
0891:            }
0892:
0893:            // Compute the high-order mask for the characters in the given string
0894:            private static long highMask(final String chars) {
0895:                final int n = chars.length();
0896:                long m = 0;
0897:                for (int i = 0; i < n; i++) {
0898:                    char c = chars.charAt(i);
0899:                    if ((c >= 64) && (c < 128))
0900:                        m |= (1L << (c - 64));
0901:                }
0902:                return m;
0903:            }
0904:
0905:            // Compute a low-order mask for the characters
0906:            // between first and last, inclusive
0907:            private static long lowMask(final char first, final char last) {
0908:                long m = 0;
0909:                final int f = Math.max(Math.min(first, 63), 0);
0910:                final int l = Math.max(Math.min(last, 63), 0);
0911:                for (int i = f; i <= l; i++)
0912:                    m |= 1L << i;
0913:                return m;
0914:            }
0915:
0916:            // Compute a high-order mask for the characters
0917:            // between first and last, inclusive
0918:            private static long highMask(final char first, final char last) {
0919:                long m = 0;
0920:                final int f = Math.max(Math.min(first, 127), 64) - 64;
0921:                final int l = Math.max(Math.min(last, 127), 64) - 64;
0922:                for (int i = f; i <= l; i++) {
0923:                    m |= 1L << i;
0924:                }
0925:                return m;
0926:            }
0927:
0928:            // Tell whether the given character is permitted by the given mask pair
0929:            private static boolean match(final char c, final long lowMask,
0930:                    final long highMask) {
0931:                if (c < 64)
0932:                    return ((1L << c) & lowMask) != 0;
0933:                if (c < 128)
0934:                    return ((1L << (c - 64)) & highMask) != 0;
0935:                return false;
0936:            }
0937:
0938:            // Character-class masks, in reverse order from RFC2396 because
0939:            // initializers for static fields cannot make forward references.
0940:
0941:            // digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
0942:            //            "8" | "9"
0943:            private static final long L_DIGIT = lowMask('0', '9');
0944:            private static final long H_DIGIT = 0L;
0945:
0946:            // upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
0947:            //            "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
0948:            //            "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
0949:            private static final long L_UPALPHA = 0L;
0950:            private static final long H_UPALPHA = highMask('A', 'Z');
0951:
0952:            // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
0953:            //            "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
0954:            //            "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
0955:            private static final long L_LOWALPHA = 0L;
0956:            private static final long H_LOWALPHA = highMask('a', 'z');
0957:
0958:            // alpha         = lowalpha | upalpha
0959:            private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA;
0960:            private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA;
0961:
0962:            // alphanum      = alpha | digit
0963:            private static final long L_ALPHANUM = L_DIGIT | L_ALPHA;
0964:            private static final long H_ALPHANUM = H_DIGIT | H_ALPHA;
0965:
0966:            // hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
0967:            //                         "a" | "b" | "c" | "d" | "e" | "f"
0968:            private static final long L_HEX = L_DIGIT;
0969:            private static final long H_HEX = highMask('A', 'F')
0970:                    | highMask('a', 'f');
0971:
0972:            // mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
0973:            //                 "(" | ")"
0974:            private static final long L_MARK = lowMask("-_.!~*'()");
0975:            private static final long H_MARK = highMask("-_.!~*'()");
0976:
0977:            // unreserved    = alphanum | mark
0978:            private static final long L_UNRESERVED = L_ALPHANUM | L_MARK;
0979:            private static final long H_UNRESERVED = H_ALPHANUM | H_MARK;
0980:
0981:            // reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
0982:            //                 "$" | "," | "[" | "]"
0983:            // Added per RFC2732: "[", "]"
0984:            private static final long L_RESERVED = lowMask(";/?:@&=+$,[]");
0985:            private static final long H_RESERVED = highMask(";/?:@&=+$,[]");
0986:
0987:            // The zero'th bit is used to indicate that escape pairs and non-US-ASCII
0988:            // characters are allowed; this is handled by the scanEscape method below.
0989:            private static final long L_ESCAPED = 1L;
0990:            private static final long H_ESCAPED = 0L;
0991:
0992:            // uric          = reserved | unreserved | escaped
0993:            private static final long L_URIC = L_RESERVED | L_UNRESERVED
0994:                    | L_ESCAPED;
0995:            private static final long H_URIC = H_RESERVED | H_UNRESERVED
0996:                    | H_ESCAPED;
0997:
0998:            // pchar         = unreserved | escaped |
0999:            //                 ":" | "@" | "&" | "=" | "+" | "$" | ","
1000:            private static final long L_PCHAR = L_UNRESERVED | L_ESCAPED
1001:                    | lowMask(":@&=+$,");
1002:            private static final long H_PCHAR = H_UNRESERVED | H_ESCAPED
1003:                    | highMask(":@&=+$,");
1004:
1005:            // All valid path characters
1006:            private static final long L_PATH = L_PCHAR | lowMask(";/");
1007:            private static final long H_PATH = H_PCHAR | highMask(";/");
1008:
1009:            // Dash, for use in domainlabel and toplabel
1010:            private static final long L_DASH = lowMask("-");
1011:            private static final long H_DASH = highMask("-");
1012:
1013:            // Dot, for use in hostnames
1014:            private static final long L_DOT = lowMask(".");
1015:            private static final long H_DOT = highMask(".");
1016:
1017:            // userinfo      = *( unreserved | escaped |
1018:            //                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
1019:            private static final long L_USERINFO = L_UNRESERVED | L_ESCAPED
1020:                    | lowMask(";:&=+$,");
1021:            private static final long H_USERINFO = H_UNRESERVED | H_ESCAPED
1022:                    | highMask(";:&=+$,");
1023:
1024:            // reg_name      = 1*( unreserved | escaped | "$" | "," |
1025:            //                     ";" | ":" | "@" | "&" | "=" | "+" )
1026:            private static final long L_REG_NAME = L_UNRESERVED | L_ESCAPED
1027:                    | lowMask("$,;:@&=+");
1028:            private static final long H_REG_NAME = H_UNRESERVED | H_ESCAPED
1029:                    | highMask("$,;:@&=+");
1030:
1031:            // All valid characters for server-based authorities
1032:            private static final long L_SERVER = L_USERINFO | L_ALPHANUM
1033:                    | L_DASH | lowMask(".:@[]");
1034:            private static final long H_SERVER = H_USERINFO | H_ALPHANUM
1035:                    | H_DASH | highMask(".:@[]");
1036:
1037:            // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
1038:            private static final long L_SCHEME = L_ALPHA | L_DIGIT
1039:                    | lowMask("+-.");
1040:            private static final long H_SCHEME = H_ALPHA | H_DIGIT
1041:                    | highMask("+-.");
1042:
1043:            private static void appendEscape(final StringBuffer sb, final byte b) {
1044:                //Bug No:  4701655 - Don't escape the chars
1045:                sb.append((char) b);
1046:
1047:                /*sb.append( '%' );
1048:                sb.append( hexDigits[( b >> 4 ) & 0x0f] );
1049:                sb.append( hexDigits[( b >> 0 ) & 0x0f] );*/
1050:            }//appendEscape()
1051:
1052:            private static void appendEncoded(final StringBuffer sb,
1053:                    final char c) {
1054:                /*ByteBuffer bb = null;
1055:                try
1056:                {
1057:                    bb = ThreadLocalCoders.encoderFor( "UTF-8" )
1058:                	    .encode( CharBuffer.wrap( "" + c ) );
1059:                }
1060:                catch ( CharacterCodingException x )
1061:                {
1062:                    //assert
1063:                    false;
1064:                }
1065:                while ( bb.hasRemaining() )
1066:                {
1067:                    int b = bb.get() & 0xff;
1068:                    if ( b >= 0x80 )
1069:                	appendEscape( sb, (byte) b );
1070:                    else
1071:                	sb.append( (char) b );
1072:                }*/
1073:                sb.append(c);
1074:            }//appendEncoded()
1075:
1076:            // Quote any characters in s that are not permitted
1077:            // by the given mask pair
1078:            private static String quote(final String s, final long lowMask,
1079:                    final long highMask) {
1080:                StringBuffer sb = null;
1081:                final boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);
1082:                for (int i = 0; i < s.length(); i++) {
1083:                    char c = s.charAt(i);
1084:                    if (c < '\u0080') {
1085:                        if (!match(c, lowMask, highMask)) {
1086:                            if (sb == null) {
1087:                                sb = new StringBuffer();
1088:                                sb.append(s.substring(0, i));
1089:                            }
1090:
1091:                            appendEscape(sb, (byte) c);
1092:                        } else {
1093:                            if (sb != null) {
1094:                                sb.append(c);
1095:                            }
1096:                        }
1097:                    } else if (allowNonASCII
1098:                            && (Character.isSpaceChar(c) || Character
1099:                                    .isISOControl(c))) {
1100:                        if (sb == null) {
1101:                            sb = new StringBuffer();
1102:                            sb.append(s.substring(0, i));
1103:                        }
1104:
1105:                        appendEncoded(sb, c);
1106:                    } else {
1107:                        if (sb != null) {
1108:                            sb.append(c);
1109:                        }
1110:                    }
1111:                }
1112:
1113:                return (sb == null) ? s : sb.toString();
1114:            }
1115:
1116:            // -- Parsing --
1117:            // For convenience we wrap the input URI string in a new instance of the
1118:            // following internal class.  This saves always having to pass the input
1119:            // string as an argument to each internal scan/parse method.
1120:            private class Parser {
1121:                private String input; // URI input string
1122:                private boolean requireServerAuthority = false;
1123:
1124:                Parser(final String s) {
1125:                    input = s;
1126:                    string = s;
1127:                }//constructor
1128:
1129:                // -- Methods for throwing URISyntaxException in various ways --
1130:                private void fail(final String reason)
1131:                        throws URISyntaxException {
1132:                    throw new URISyntaxException(input, reason);
1133:                }//fail()
1134:
1135:                private void fail(final String reason, final int p)
1136:                        throws URISyntaxException {
1137:                    throw new URISyntaxException(input, reason, p);
1138:                }//fail
1139:
1140:                private void failExpecting(final String expected, final int p)
1141:                        throws URISyntaxException {
1142:                    fail("Expected " + expected, p);
1143:                }//failExpecting()
1144:
1145:                // -- Simple access to the input string --
1146:                // Return a substring of the input string
1147:                private String substring(final int start, final int end) {
1148:                    return input.substring(start, end);
1149:                }//substring()
1150:
1151:                // Return the char at position p,
1152:                // assuming that p < input.length()
1153:                private char charAt(final int p) {
1154:                    return input.charAt(p);
1155:                }//charAt()
1156:
1157:                // Tells whether start < end and, if so, whether charAt(start) == c
1158:                private boolean at(final int start, final int end, final char c) {
1159:                    return (start < end) && (charAt(start) == c);
1160:                }//at()
1161:
1162:                // Tells whether start + s.length() < end and, if so,
1163:                // whether the chars at the start position match s exactly
1164:                private boolean at(final int start, final int end,
1165:                        final String s) {
1166:                    int p = start;
1167:                    final int sn = s.length();
1168:                    if (sn > end - p)
1169:                        return false;
1170:                    int i = 0;
1171:                    while (i < sn) {
1172:                        if (charAt(p++) != s.charAt(i++)) {
1173:                            break;
1174:                        }
1175:                    }
1176:
1177:                    return (i == sn);
1178:                }//at()
1179:
1180:                // -- Scanning --
1181:
1182:                // The various scan and parse methods that follow use a uniform
1183:                // convention of taking the current start position and end index as
1184:                // their first two arguments.  The start is inclusive while the end is
1185:                // exclusive, just as in the String class, i.e., a start/end pair
1186:                // denotes the left-open interval [start, end) of the input string.
1187:                //
1188:                // These methods never proceed past the end position.  They may return
1189:                // -1 to indicate outright failure, but more often they simply return
1190:                // the position of the first char after the last char scanned.  Thus
1191:                // a typical idiom is
1192:                //
1193:                //     int p = start;
1194:                //     int q = scan(p, end, ...);
1195:                //     if (q > p)
1196:                //         // We scanned something
1197:                //         ...;
1198:                //     else if (q == p)
1199:                //         // We scanned nothing
1200:                //         ...;
1201:                //     else if (q == -1)
1202:                //         // Something went wrong
1203:                //         ...;
1204:
1205:                // Scan a specific char: If the char at the given start position is
1206:                // equal to c, return the index of the next char; otherwise, return the
1207:                // start position.
1208:                private int scan(final int start, final int end, final char c) {
1209:                    if ((start < end) && (charAt(start) == c))
1210:                        return start + 1;
1211:                    return start;
1212:                }
1213:
1214:                // Scan forward from the given start position.  Stop at the first char
1215:                // in the err string (in which case -1 is returned), or the first char
1216:                // in the stop string (in which case the index of the preceding char is
1217:                // returned), or the end of the input string (in which case the length
1218:                // of the input string is returned).  May return the start position if
1219:                // nothing matches.
1220:                private int scan(final int start, final int end,
1221:                        final String err, final String stop) {
1222:                    int p = start;
1223:                    while (p < end) {
1224:                        char c = charAt(p);
1225:
1226:                        if (err.indexOf(c) >= 0) {
1227:                            return -1;
1228:                        }
1229:
1230:                        if (stop.indexOf(c) >= 0) {
1231:                            break;
1232:                        }
1233:                        p++;
1234:                    }
1235:                    return p;
1236:                }//scan()
1237:
1238:                // Scan a potential escape sequence, starting at the given position,
1239:                // with the given first char (i.e., charAt(start) == c).
1240:                //
1241:                // This method assumes that if escapes are allowed then visible
1242:                // non-US-ASCII chars are also allowed.
1243:                private int scanEscape(final int start, final int n,
1244:                        final char first) throws URISyntaxException {
1245:                    final int p = start;
1246:                    final char c = first;
1247:                    if (c == '%') {
1248:                        //by default only hex numbers are allowed..
1249:                        // Process escape pair
1250:                        if ((p + 3 <= n) && match(charAt(p + 1), L_HEX, H_HEX)
1251:                                && match(charAt(p + 2), L_HEX, H_HEX)) {
1252:                            return p + 3;
1253:                        }
1254:                        //nag fix - '&' followed by '%' is allowed
1255:                        else if (charAt(p + 1) == '&') {
1256:                            return p + 1;
1257:                        }
1258:
1259:                        fail("Malformed escape pair", p);
1260:                    } else if ((c > 128) && !Character.isSpaceChar(c)
1261:                            && !Character.isISOControl(c)) {
1262:                        // Allow unescaped but visible non-US-ASCII chars
1263:                        return p + 1;
1264:                    }
1265:                    return p;
1266:                }
1267:
1268:                //for all unescaped chars in URI, browser like Netscape 6.2 and all
1269:                //do the escaping while making a request so, tollerate the invlaid chars
1270:                // Scan chars that match the given mask pair
1271:                private int scan(final int start, final int n,
1272:                        final long lowMask, final long highMask)
1273:                        throws URISyntaxException {
1274:                    int p = start;
1275:                    while (p < n) {
1276:                        char c = charAt(p);
1277:                        //Bug No:  4701655 - Accept windows style URI's i.e .\abc.html
1278:                        if ((c == '\\') || match(c, lowMask, highMask)) {
1279:                            p++;
1280:                            continue;
1281:                        }
1282:                        if ((lowMask & L_ESCAPED) != 0) {
1283:                            int q = scanEscape(p, n, c);
1284:                            if (q > p) {
1285:                                p = q;
1286:                                continue;
1287:                            }
1288:                        }
1289:                        break;
1290:                    }
1291:                    return p;
1292:                }//scan()
1293:
1294:                // Check that each of the chars in [start, end) matches the given mask
1295:                private void checkChars(final int start, final int end,
1296:                        final long lowMask, final long highMask,
1297:                        final String what) throws URISyntaxException {
1298:                    final int p = scan(start, end, lowMask, highMask);
1299:
1300:                    if (p < end) {
1301:                        fail("Illegal character in " + what, p);
1302:                    }
1303:                }//checkChars()
1304:
1305:                // Check that the char at position p matches the given mask
1306:                private void checkChar(final int p, final long lowMask,
1307:                        final long highMask, final String what)
1308:                        throws URISyntaxException {
1309:                    checkChars(p, p + 1, lowMask, highMask, what);
1310:                }
1311:
1312:                // -- Parsing --
1313:
1314:                // [<scheme>:]<scheme-specific-part>[#<fragment>]
1315:                void parse(final boolean rsa) throws URISyntaxException {
1316:                    requireServerAuthority = rsa;
1317:                    final int ssp; // Start of scheme-specific part
1318:                    final int n = input.length();
1319:                    int p = scan(0, n, "/?#", ":");
1320:                    if ((p >= 0) && at(p, n, ':')) {
1321:                        if (p == 0) {
1322:                            failExpecting("scheme name", 0);
1323:                        }
1324:
1325:                        checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
1326:                        checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
1327:                        scheme = substring(0, p);
1328:                        p++; // Skip ':'
1329:                        ssp = p;
1330:
1331:                        if (at(p, n, '/')) {
1332:                            p = parseHierarchical(p, n);
1333:                        } else {
1334:                            int q = scan(p, n, "", "#");
1335:                            if (q <= p) {
1336:                                failExpecting("scheme-specific part", p);
1337:                            }
1338:
1339:                            checkChars(p, q, L_URIC, H_URIC, "opaque part");
1340:                            p = q;
1341:                        }
1342:                    } else {
1343:                        ssp = 0;
1344:                        p = parseHierarchical(0, n);
1345:                    }
1346:
1347:                    schemeSpecificPart = substring(ssp, p);
1348:
1349:                    if (at(p, n, '#')) {
1350:                        //reference check
1351:                        //BugNo:4762844, 4744455
1352:                        //Don't Check the validity of the Reference
1353:                        //checkChars( p + 1, n, L_URIC, H_URIC, "fragment" );
1354:                        fragment = substring(p + 1, n);
1355:                        p = n;
1356:                    }
1357:
1358:                    if (p < n) {
1359:                        fail("end of URI", p);
1360:                    }
1361:                }//parse()
1362:
1363:                // [//authority]<path>[?<query>]
1364:                //
1365:                // DEVIATION from RFC2396: We allow an empty authority component as
1366:                // long as it's followed by a non-empty path, query component, or
1367:                // fragment component.  This is so that URIs such as "file:///foo/bar"
1368:                // will parse.  This seems to be the intent of RFC2396, though the
1369:                // grammar does not permit it.  If the authority is empty then the
1370:                // userInfo, host, and port components are undefined.
1371:                //
1372:                // DEVIATION from RFC2396: We allow empty relative paths.  This seems
1373:                // to be the intent of RFC2396, but the grammar does not permit it.
1374:                // The primary consequence of this deviation is that "#f" parses as a
1375:                // relative URI with an empty path.
1376:                private int parseHierarchical(final int start, final int n)
1377:                        throws URISyntaxException {
1378:                    int p = start;
1379:                    if (at(p, n, '/') && at(p + 1, n, '/')) {
1380:                        p += 2;
1381:                        int q = scan(p, n, "", "/?#");
1382:                        if (q > p) {
1383:                            p = parseAuthority(p, q);
1384:                        } else if (q < n) {
1385:                            // DEVIATION: Allow empty authority prior to non-empty
1386:                            // path, query component or fragment identifier
1387:                        } else {
1388:                            failExpecting("authority", p);
1389:                        }
1390:                    }
1391:
1392:                    int q = scan(p, n, "", "?#"); // DEVIATION: May be empty
1393:                    //Nag As the browser do the encoding automatically, let us not validate the
1394:                    //path and query
1395:                    //Bug No:4744455, 4762844
1396:                    //checkChars( p, q, L_PATH, H_PATH, "path" );
1397:                    //Bug No:4744455
1398:                    path = substring(p, q);
1399:                    p = q;
1400:
1401:                    if (at(p, n, '?')) {
1402:                        p++;
1403:                        q = scan(p, n, "", "#");
1404:                        //Nag:
1405:                        //As the browser does the encoding automatically, let us not validate the
1406:                        //path and query
1407:                        //Bug No:4744455, 4762844
1408:                        //checkChars( p, q, L_URIC, H_URIC, "query" );
1409:                        //Bug No:4744455
1410:
1411:                        query = substring(p, q);
1412:                        p = q;
1413:                    }
1414:                    return p;
1415:                }//parseHierarchical()
1416:
1417:                // authority     = server | reg_name
1418:                //
1419:                // Ambiguity: An authority that is a registry name rather than a server
1420:                // might have a prefix that parses as a server.  We use the fact that
1421:                // the authority component is always followed by '/' or the end of the
1422:                // input string to resolve this: If the complete authority did not
1423:                // parse as a server then we try to parse it as a registry name.
1424:                private int parseAuthority(final int start, final int n)
1425:                        throws URISyntaxException {
1426:                    final int p = start;
1427:                    int q = p;
1428:                    URISyntaxException ex = null;
1429:
1430:                    final boolean serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
1431:                    final boolean regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);
1432:
1433:                    if (regChars && !serverChars) {
1434:                        // Must be a registry-based authority
1435:                        authority = substring(p, n);
1436:                        return n;
1437:                    }
1438:
1439:                    if (serverChars) {
1440:                        // Might be (probably is) a server-based authority, so attempt
1441:                        // to parse it as such.  If the attempt fails, try to treat it
1442:                        // as a registry-based authority.
1443:                        try {
1444:                            q = parseServer(p, n);
1445:                            if (q < n)
1446:                                failExpecting("end of authority", q);
1447:                            authority = substring(p, n);
1448:                        } catch (URISyntaxException x) {
1449:                            // Undo results of failed parse
1450:                            userInfo = null;
1451:                            host = null;
1452:                            port = -1;
1453:                            if (requireServerAuthority) {
1454:                                // If we're insisting upon a server-based authority,
1455:                                // then just re-throw the exception
1456:                                throw x;
1457:                            } else {
1458:                                // Save the exception in case it doesn't parse as a
1459:                                // registry either
1460:                                ex = x;
1461:                                q = p;
1462:                            }
1463:                        }
1464:                    }
1465:
1466:                    if (q < n) {
1467:                        if (regChars) {
1468:                            // Registry-based authority
1469:                            authority = substring(p, n);
1470:                        } else if (ex != null) {
1471:                            // Re-throw exception; it was probably due to
1472:                            // a malformed IPv6 address
1473:                            throw ex;
1474:                        } else {
1475:                            fail("Illegal character in authority", q);
1476:                        }
1477:                    }
1478:
1479:                    return n;
1480:                }//parseAuthority()
1481:
1482:                // [<userinfo>@]<host>[:<port>]
1483:                private int parseServer(final int start, final int n)
1484:                        throws URISyntaxException {
1485:                    int p = start;
1486:                    int q;
1487:
1488:                    // userinfo
1489:                    q = scan(p, n, "/?#", "@");
1490:                    if ((q >= p) && at(q, n, '@')) {
1491:                        checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
1492:                        userInfo = substring(p, q);
1493:                        p = q + 1; // Skip '@'
1494:                    }
1495:
1496:                    // hostname, IPv4 address, or IPv6 address
1497:                    if (at(p, n, '[')) {
1498:                        // DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
1499:                        p++;
1500:                        q = scan(p, n, "/?#", "]");
1501:                        if ((q > p) && at(q, n, ']')) {
1502:                            parseIPv6Reference(p, q);
1503:                            p = q + 1;
1504:                        } else {
1505:                            failExpecting("closing bracket for IPv6 address", q);
1506:                        }
1507:                    } else {
1508:                        q = parseIPv4Address(p, n);
1509:                        if (q <= p)
1510:                            q = parseHostname(p, n);
1511:                        p = q;
1512:                    }
1513:
1514:                    // port
1515:                    if (at(p, n, ':')) {
1516:                        p++;
1517:                        q = scan(p, n, "", "/");
1518:                        if (q > p) {
1519:                            checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
1520:                            try {
1521:                                port = Integer.parseInt(substring(p, q));
1522:                            } catch (NumberFormatException x) {
1523:                                fail("Malformed port number", p);
1524:                            }
1525:                            p = q;
1526:                        }
1527:                    }
1528:                    if (p < n)
1529:                        failExpecting("port number", p);
1530:
1531:                    return p;
1532:                }//parseServer()
1533:
1534:                // Scan a string of decimal digits whose value fits in a byte
1535:                private int scanByte(final int start, final int n)
1536:                        throws URISyntaxException {
1537:                    final int p = start;
1538:                    final int q = scan(p, n, L_DIGIT, H_DIGIT);
1539:
1540:                    if (q <= p) {
1541:                        return q;
1542:                    }
1543:
1544:                    if (Integer.parseInt(substring(p, q)) > 255) {
1545:                        return p;
1546:                    }
1547:
1548:                    return q;
1549:                }//scanByte()
1550:
1551:                // Scan an IPv4 address.
1552:                //
1553:                // If the strict argument is true then we require that the given
1554:                // interval contain nothing besides an IPv4 address; if it is false
1555:                // then we only require that it start with an IPv4 address.
1556:                //
1557:                // If the interval does not contain or start with (depending upon the
1558:                // strict argument) a legal IPv4 address characters then we return -1
1559:                // immediately; otherwise we insist that these characters parse as a
1560:                // legal IPv4 address and throw an exception on failure.
1561:                //
1562:                // We assume that any string of decimal digits and dots must be an IPv4
1563:                // address.  It won't parse as a hostname anyway, so making that
1564:                // assumption here allows more meaningful exceptions to be thrown.
1565:                private int scanIPv4Address(final int start, final int n,
1566:                        final boolean strict) throws URISyntaxException {
1567:                    int p = start;
1568:                    int q;
1569:                    final int m = scan(p, n, L_DIGIT | L_DOT, H_DIGIT | H_DOT);
1570:                    if ((m <= p) || (strict && (m != n))) {
1571:                        return -1;
1572:                    }
1573:
1574:                    for (;;) {
1575:                        // Per RFC2732: At most three digits per byte
1576:                        // Further constraint: Each element fits in a byte
1577:                        if ((q = scanByte(p, m)) <= p) {
1578:                            break;
1579:                        }
1580:
1581:                        p = q;
1582:                        if ((q = scan(p, m, '.')) <= p) {
1583:                            break;
1584:                        }
1585:
1586:                        p = q;
1587:                        if ((q = scanByte(p, m)) <= p) {
1588:                            break;
1589:                        }
1590:
1591:                        p = q;
1592:                        if ((q = scan(p, m, '.')) <= p) {
1593:                            break;
1594:                        }
1595:
1596:                        p = q;
1597:                        if ((q = scanByte(p, m)) <= p) {
1598:                            break;
1599:                        }
1600:
1601:                        p = q;
1602:                        if ((q = scan(p, m, '.')) <= p) {
1603:                            break;
1604:                        }
1605:
1606:                        p = q;
1607:                        if ((q = scanByte(p, m)) <= p) {
1608:                            break;
1609:                        }
1610:
1611:                        p = q;
1612:                        if (q < m) {
1613:                            break;
1614:                        }
1615:
1616:                        return q;
1617:                    }
1618:
1619:                    fail("Malformed IPv4 address", q);
1620:                    return -1;
1621:                }//scanIPv4Address()
1622:
1623:                // Take an IPv4 address: Throw an exception if the given interval
1624:                // contains anything except an IPv4 address
1625:                private int takeIPv4Address(final int start, final int n,
1626:                        final String expected) throws URISyntaxException {
1627:                    final int p = scanIPv4Address(start, n, true);
1628:                    if (p <= start)
1629:                        failExpecting(expected, start);
1630:                    return p;
1631:                }
1632:
1633:                // Attempt to parse an IPv4 address, returning -1 on failure but
1634:                // allowing the given interval to contain characters after the IPv4
1635:                // address (e.g., [:<port>])
1636:                private int parseIPv4Address(final int start, final int n)
1637:                        throws URISyntaxException {
1638:                    final int p = scanIPv4Address(start, n, false);
1639:                    if (p > start)
1640:                        host = substring(start, p);
1641:                    return p;
1642:                }
1643:
1644:                // hostname      = *( domainlabel "." ) toplabel [ "." ]
1645:                // domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
1646:                // toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
1647:                private int parseHostname(final int start, final int n)
1648:                        throws URISyntaxException {
1649:                    int p = start;
1650:                    int q;
1651:                    int l = -1; // Start of last parsed label
1652:
1653:                    do {
1654:                        // domainlabel = alphanum [ *( alphanum | "-" ) alphanum ]
1655:                        q = scan(p, n, L_ALPHANUM, H_ALPHANUM);
1656:                        if (q <= p)
1657:                            break;
1658:                        l = p;
1659:                        if (q > p) {
1660:                            p = q;
1661:                            q = scan(p, n, L_ALPHANUM | L_DASH, H_ALPHANUM
1662:                                    | H_DASH);
1663:                            if (q > p) {
1664:                                if (charAt(q - 1) == '-')
1665:                                    fail("Illegal character in hostname", q - 1);
1666:                                p = q;
1667:                            }
1668:                        }
1669:                        q = scan(p, n, '.');
1670:                        if (q <= p)
1671:                            break;
1672:                        p = q;
1673:                    } while (p < n);
1674:
1675:                    if ((p < n) && !at(p, n, ':'))
1676:                        fail("Illegal character in hostname", p);
1677:
1678:                    if (l < 0)
1679:                        failExpecting("hostname", start);
1680:
1681:                    // Make sure last parsed label (= toplabel) starts with a letter
1682:                    if (!match(charAt(l), L_ALPHA, H_ALPHA))
1683:                        fail("Illegal character in hostname", l);
1684:
1685:                    host = substring(start, p);
1686:                    return p;
1687:                }
1688:
1689:                // IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture
1690:                //
1691:                // Bug: The grammar in RFC2373 Appendix B does not allow addresses of
1692:                // the form ::12.34.56.78, which are clearly shown in the examples
1693:                // earlier in the document.  Here is the original grammar:
1694:                //
1695:                //   IPv6address = hexpart [ ":" IPv4address ]
1696:                //   hexpart     = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]
1697:                //   hexseq      = hex4 *( ":" hex4)
1698:                //   hex4        = 1*4HEXDIG
1699:                //
1700:                // We therefore use the following revised grammar:
1701:                //
1702:                //   IPv6address = hexseq [ ":" IPv4address ]
1703:                //                 | hexseq [ "::" [ hexpost ] ]
1704:                //                 | "::" [ hexpost ]
1705:                //   hexpost     = hexseq | hexseq ":" IPv4address | IPv4address
1706:                //   hexseq      = hex4 *( ":" hex4)
1707:                //   hex4        = 1*4HEXDIG
1708:                //
1709:                // This covers all and only the following cases:
1710:                //
1711:                //   hexseq
1712:                //   hexseq : IPv4address
1713:                //   hexseq ::
1714:                //   hexseq :: hexseq
1715:                //   hexseq :: hexseq : IPv4address
1716:                //   hexseq :: IPv4address
1717:                //   :: hexseq
1718:                //   :: hexseq : IPv4address
1719:                //   :: IPv4address
1720:                //   ::
1721:                //
1722:                // Finally, we also limit the length of an IPv6 address so that no more
1723:                // than sixteen bytes may be specified.
1724:
1725:                private int ipv6byteCount = 0;
1726:
1727:                private int parseIPv6Reference(final int start, final int n)
1728:                        throws URISyntaxException {
1729:                    int p = start;
1730:                    final int q;
1731:
1732:                    q = scanHexSeq(p, n);
1733:                    if (q > p) {
1734:                        p = q;
1735:                        if (at(p, n, "::"))
1736:                            p = scanHexPost(p + 2, n);
1737:                        else if (at(p, n, ':')) {
1738:                            p = takeIPv4Address(p + 1, n, "IPv4 address");
1739:                            ipv6byteCount += 4;
1740:                        }
1741:                    } else if (at(p, n, "::")) {
1742:                        p = scanHexPost(p + 2, n);
1743:                    }
1744:                    if (p < n)
1745:                        fail("Malformed IPv6 address", start);
1746:                    if (ipv6byteCount > 16)
1747:                        fail("IPv6 address too long", start);
1748:
1749:                    host = substring(start - 1, p + 1);
1750:                    return p;
1751:                }
1752:
1753:                private int scanHexPost(final int start, final int n)
1754:                        throws URISyntaxException {
1755:                    int p = start;
1756:                    int q;
1757:
1758:                    if (p == n)
1759:                        return p;
1760:
1761:                    q = scanHexSeq(p, n);
1762:                    if (q > p) {
1763:                        p = q;
1764:                        if (at(p, n, ':')) {
1765:                            p++;
1766:                            p = takeIPv4Address(p, n,
1767:                                    "hex digits or IPv4 address");
1768:                            ipv6byteCount += 4;
1769:                        }
1770:                    } else {
1771:                        p = takeIPv4Address(p, n, "hex digits or IPv4 address");
1772:                        ipv6byteCount += 4;
1773:                    }
1774:                    return p;
1775:                }
1776:
1777:                // Scan a hex sequence; return -1 if one could not be scanned
1778:                private int scanHexSeq(final int start, final int n)
1779:                        throws URISyntaxException {
1780:                    int p = start;
1781:                    int q;
1782:
1783:                    q = scan(p, n, L_HEX, H_HEX);
1784:                    if (q <= p) {
1785:                        return -1;
1786:                    }
1787:
1788:                    if (at(q, n, '.')) // Beginning of IPv4 address
1789:                    {
1790:                        return -1;
1791:                    }
1792:
1793:                    ipv6byteCount += 2;
1794:                    p = q;
1795:                    while (p < n) {
1796:                        if (!at(p, n, ':')) {
1797:                            break;
1798:                        }
1799:
1800:                        if (at(p + 1, n, ':')) {
1801:                            break; // "::"
1802:                        }
1803:
1804:                        p++;
1805:                        q = scan(p, n, L_HEX, H_HEX);
1806:                        if (q <= p) {
1807:                            failExpecting("digits for an IPv6 address", p);
1808:                        }
1809:
1810:                        if (at(q, n, '.')) { // Beginning of IPv4 address
1811:                            p--;
1812:                            break;
1813:                        }
1814:
1815:                        if (q > p + 4) {
1816:                            fail("IPv6 hexadecimal digit sequence too long", p);
1817:                        }
1818:
1819:                        ipv6byteCount += 2;
1820:                        p = q;
1821:                    }
1822:
1823:                    return p;
1824:                }
1825:            }//class Parser
1826:
1827:            public static void main(String[] args) throws Exception {
1828:                String uri = "#";
1829:
1830:                //"http://rajanagendra.India.Sun.COM/ips/desktop?action=content&provider=ipsdtPopupContainer&last=false&leafChannel=ipsdtSampleRSS&fontFace1=Sans-serif&size=100%&containerName=ipsdtTableContainer3&action=content&provider=ipsdtTabContainer&provider_cmds=%3CA+HREF%3D%22http%3A%2F%2Frajanagendra.India.Sun.COM%2Fips-static%2Fdocs%2Fen%2Fdesktop%2Frsschann.htm%22+target%3D%22wthelp%22+onClick%3D%22javascript%3A+var+helpWin%3Dwindow.open%28%27http%3A%2F%2Frajanagendra.India.Sun.COM%2Fips-static%2Fdocs%2Fen%2Fdesktop%2Frsschann.htm%27%2C+%27wthelp%27%2C+%27width%3D600%2Cheight%3D500%2Chotkeys%3Dno%2Cstatus%3Dno%2Cresizable%3Dyes%2Cscrollbars%3Dyes%2Ctoolbar%3Dyes%27%29%3B+helpWin.focus%28%29%3Breturn+false%3B%22%3E%3CIMG+SRC%3D%27%2Fips-static%2Fdesktop%2Fimages%2Fb_help.gif%27+ALT%3D%27Help%27+BORDER%3D0%3E%3C%2FA%3E%3CA+HREF%3D%22javascript%3Avoid%280%29%22+onClick%3D%22openUrlInParent%28%27desktop%3Faction%3Dprocess%26provider%3DipsdtTableContainer3%26ipsdtTableContainer3.channelAction%3Dattach%26ipsdtTableContainer3.targetProvider%3DipsdtSampleRSS%27%29%3B+window.close%28%29%22%3E%3CIMG+SRC%3D%22%2Fips-static%2Fdesktop%2Fimages%2Fb_attach.gif%22+ALT%3D%22Attach++ipsdtSampleRSS%22+BORDER%3D0%3E%3C%2FA%3E%3CA+HREF%3D%22javascript%3Avoid%280%29%22+onClick%3D%22openUrlInParent%28%27desktop%3Faction%3Dprocess%26provider%3DipsdtTableContainer3%26ipsdtTableContainer3.channelAction%3Dremove%26ipsdtTableContainer3.targetProvider%3DipsdtSampleRSS%27%29%3B+window.close%28%29%22%3E%3CIMG+SRC%3D%22%2Fips-static%2Fdesktop%2Fimages%2Fb_remove.gif%22+ALT%3D%22Remove++ipsdtSampleRSS%22+BORDER%3D0%3E%3C%2FA%3E";
1831:
1832:                System.out.println(new URI(uri));
1833:                //System.out.println( normalize( "null" ) );
1834:            }//main()
1835:        }//class URI
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.