Source for java.net.URI

   1: /* URI.java -- An URI class
   2:    Copyright (C) 2002, 2004, 2005, 2006, 2008  Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.net;
  40: 
  41: import gnu.java.lang.CPStringBuilder;
  42: 
  43: import java.io.IOException;
  44: import java.io.ObjectInputStream;
  45: import java.io.ObjectOutputStream;
  46: import java.io.Serializable;
  47: import java.util.regex.Matcher;
  48: import java.util.regex.Pattern;
  49: 
  50: /**
  51:  * <p>
  52:  * A URI instance represents that defined by
  53:  * <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC3986</a>,
  54:  * with some deviations.
  55:  * </p>
  56:  * <p>
  57:  * At its highest level, a URI consists of:
  58:  * </p>
  59:  * <code>[<em>scheme</em><strong>:</strong>]<em>scheme-specific-part</em>
  60:  * [<strong>#</strong><em>fragment</em>]</code>
  61:  * </p>
  62:  * <p>
  63:  * where <strong>#</strong> and <strong>:</strong> are literal characters,
  64:  * and those parts enclosed in square brackets are optional.
  65:  * </p>
  66:  * <p>
  67:  * There are two main types of URI.  An <em>opaque</em> URI is one
  68:  * which just consists of the above three parts, and is not further
  69:  * defined.  An example of such a URI would be <em>mailto:</em> URI.
  70:  * In contrast, <em>hierarchical</em> URIs give further definition
  71:  * to the scheme-specific part, so as represent some part of a hierarchical
  72:  * structure.
  73:  * </p>
  74:  * <p>
  75:  * <code>[<strong>//</strong><em>authority</em>][<em>path</em>]
  76:  * [<strong>?</strong><em>query</em>]</code>
  77:  * </p>
  78:  * <p>
  79:  * with <strong>/</strong> and <strong>?</strong> being literal characters.
  80:  * When server-based, the authority section is further subdivided into:
  81:  * </p>
  82:  * <p>
  83:  * <code>[<em>user-info</em><strong>@</strong>]<em>host</em>
  84:  * [<strong>:</strong><em>port</em>]</code>
  85:  * </p>
  86:  * <p>
  87:  * with <strong>@</strong> and <strong>:</strong> as literal characters.
  88:  * Authority sections that are not server-based are said to be registry-based.
  89:  * </p>
  90:  * <p>
  91:  * Hierarchical URIs can be either relative or absolute.  Absolute URIs
  92:  * always start with a `<strong>/</strong>', while relative URIs don't
  93:  * specify a scheme.  Opaque URIs are always absolute.
  94:  * </p>
  95:  * <p>
  96:  * Each part of the URI may have one of three states: undefined, empty
  97:  * or containing some content.  The former two of these are represented
  98:  * by <code>null</code> and the empty string in Java, respectively.
  99:  * The scheme-specific part may never be undefined.  It also follows from
 100:  * this that the path sub-part may also not be undefined, so as to ensure
 101:  * the former.
 102:  * </p>
 103:  * <h2>Character Escaping and Quoting</h2>
 104:  * <p>
 105:  * The characters that can be used within a valid URI are restricted.
 106:  * There are two main classes of characters which can't be used as is
 107:  * within the URI:
 108:  * </p>
 109:  * <ol>
 110:  * <li><strong>Characters outside the US-ASCII character set</strong>.
 111:  * These have to be <strong>escaped</strong> in order to create
 112:  * an RFC-compliant URI; this means replacing the character with the
 113:  * appropriate hexadecimal value, preceded by a `%'.</li>
 114:  * <li><strong>Illegal characters</strong> (e.g. space characters,
 115:  * control characters) are quoted, which results in them being encoded
 116:  * in the same way as non-US-ASCII characters.</li>
 117:  * </ol>
 118:  * <p>
 119:  * The set of valid characters differs depending on the section of the URI:
 120:  * </p>
 121:  * <ul>
 122:  * <li><strong>Scheme</strong>: Must be an alphanumeric, `-', `.' or '+'.</li>
 123:  * <li><strong>Authority</strong>:Composed of the username, host, port, `@'
 124:  * and `:'.</li>
 125:  * <li><strong>Username</strong>: Allows unreserved or percent-encoded
 126:  * characters, sub-delimiters and `:'.</li>
 127:  * <li><strong>Host</strong>: Allows unreserved or percent-encoded
 128:  * characters, sub-delimiters and square brackets (`[' and `]') for IPv6
 129:  * addresses.</li>
 130:  * <li><strong>Port</strong>: Digits only.</li>
 131:  * <li><strong>Path</strong>: Allows the path characters and `/'.
 132:  * <li><strong>Query</strong>: Allows the path characters, `?' and '/'.
 133:  * <li><strong>Fragment</strong>: Allows the path characters, `?' and '/'.
 134:  * </ul>
 135:  * <p>
 136:  * These definitions reference the following sets of characters:
 137:  * </p>
 138:  * <ul>
 139:  * <li><strong>Unreserved characters</strong>: The alphanumerics plus
 140:  * `-', `.', `_', and `~'.</li>
 141:  * <li><strong>Sub-delimiters</strong>: `!', `$', `&', `(', `)', `*',
 142:  * `+', `,', `;', `=' and the single-quote itself.</li>
 143:  * <li><strong>Path characters</strong>: Unreserved and percent-encoded
 144:  * characters and the sub-delimiters along with `@' and `:'.</li>
 145:  * </ul>
 146:  * <p>
 147:  * The constructors and accessor methods allow the use and retrieval of
 148:  * URI components which contain non-US-ASCII characters directly.
 149:  * They are only escaped when the <code>toASCIIString()</code> method
 150:  * is used.  In contrast, illegal characters are always quoted, with the
 151:  * exception of the return values of the non-raw accessors.
 152:  * </p>
 153:  *
 154:  * @author Ito Kazumitsu (ito.kazumitsu@hitachi-cable.co.jp)
 155:  * @author Dalibor Topic (robilad@kaffe.org)
 156:  * @author Michael Koch (konqueror@gmx.de)
 157:  * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
 158:  * @since 1.4
 159:  */
 160: public final class URI
 161:   implements Comparable<URI>, Serializable
 162: {
 163:   /**
 164:    * For serialization compatability.
 165:    */
 166:   static final long serialVersionUID = -6052424284110960213L;
 167: 
 168:   /**
 169:    * Regular expression for parsing URIs.
 170:    *
 171:    * Taken from RFC 2396, Appendix B.
 172:    * This expression doesn't parse IPv6 addresses.
 173:    */
 174:   private static final String URI_REGEXP =
 175:     "^(([^:/?#]+):)?((//([^/?#]*))?([^?#]*)(\\?([^#]*))?)?(#(.*))?";
 176: 
 177:   /**
 178:    * Regular expression for parsing the authority segment.
 179:    */
 180:   private static final String AUTHORITY_REGEXP =
 181:     "(([^?#]*)@)?([^?#:]*)(:([0-9]*))?";
 182: 
 183:   /**
 184:    * Valid characters (taken from rfc2396/3986)
 185:    */
 186:   private static final String RFC2396_DIGIT = "0123456789";
 187:   private static final String RFC2396_LOWALPHA = "abcdefghijklmnopqrstuvwxyz";
 188:   private static final String RFC2396_UPALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
 189:   private static final String RFC2396_ALPHA =
 190:     RFC2396_LOWALPHA + RFC2396_UPALPHA;
 191:   private static final String RFC2396_ALPHANUM = RFC2396_DIGIT + RFC2396_ALPHA;
 192:   private static final String RFC3986_UNRESERVED = RFC2396_ALPHANUM + "-._~";
 193:   private static final String RFC3986_SUBDELIMS = "!$&'()*+,;=";
 194:   private static final String RFC3986_REG_NAME =
 195:     RFC3986_UNRESERVED + RFC3986_SUBDELIMS + "%";
 196:   private static final String RFC3986_PCHAR = RFC3986_UNRESERVED +
 197:     RFC3986_SUBDELIMS + ":@%";
 198:   private static final String RFC3986_SEGMENT = RFC3986_PCHAR;
 199:   private static final String RFC3986_PATH_SEGMENTS = RFC3986_SEGMENT + "/";
 200:   private static final String RFC3986_SSP = RFC3986_PCHAR + "?/";
 201:   private static final String RFC3986_HOST = RFC3986_REG_NAME + "[]";
 202:   private static final String RFC3986_USERINFO = RFC3986_REG_NAME + ":";
 203: 
 204:   /**
 205:    * Index of scheme component in parsed URI.
 206:    */
 207:   private static final int SCHEME_GROUP = 2;
 208: 
 209:   /**
 210:    * Index of scheme-specific-part in parsed URI.
 211:    */
 212:   private static final int SCHEME_SPEC_PART_GROUP = 3;
 213: 
 214:   /**
 215:    * Index of authority component in parsed URI.
 216:    */
 217:   private static final int AUTHORITY_GROUP = 5;
 218: 
 219:   /**
 220:    * Index of path component in parsed URI.
 221:    */
 222:   private static final int PATH_GROUP = 6;
 223: 
 224:   /**
 225:    * Index of query component in parsed URI.
 226:    */
 227:   private static final int QUERY_GROUP = 8;
 228: 
 229:   /**
 230:    * Index of fragment component in parsed URI.
 231:    */
 232:   private static final int FRAGMENT_GROUP = 10;
 233: 
 234:   /**
 235:    * Index of userinfo component in parsed authority section.
 236:    */
 237:   private static final int AUTHORITY_USERINFO_GROUP = 2;
 238: 
 239:   /**
 240:    * Index of host component in parsed authority section.
 241:    */
 242:   private static final int AUTHORITY_HOST_GROUP = 3;
 243: 
 244:   /**
 245:    * Index of port component in parsed authority section.
 246:    */
 247:   private static final int AUTHORITY_PORT_GROUP = 5;
 248: 
 249:   /**
 250:    * The compiled version of the URI regular expression.
 251:    */
 252:   private static final Pattern URI_PATTERN;
 253: 
 254:   /**
 255:    * The compiled version of the authority regular expression.
 256:    */
 257:   private static final Pattern AUTHORITY_PATTERN;
 258: 
 259:   /**
 260:    * The set of valid hexadecimal characters.
 261:    */
 262:   private static final String HEX = "0123456789ABCDEF";
 263: 
 264:   private transient String scheme;
 265:   private transient String rawSchemeSpecificPart;
 266:   private transient String schemeSpecificPart;
 267:   private transient String rawAuthority;
 268:   private transient String authority;
 269:   private transient String rawUserInfo;
 270:   private transient String userInfo;
 271:   private transient String rawHost;
 272:   private transient String host;
 273:   private transient int port = -1;
 274:   private transient String rawPath;
 275:   private transient String path;
 276:   private transient String rawQuery;
 277:   private transient String query;
 278:   private transient String rawFragment;
 279:   private transient String fragment;
 280:   private String string;
 281: 
 282:   /**
 283:    * Static initializer to pre-compile the regular expressions.
 284:    */
 285:   static
 286:   {
 287:     URI_PATTERN = Pattern.compile(URI_REGEXP);
 288:     AUTHORITY_PATTERN = Pattern.compile(AUTHORITY_REGEXP);
 289:   }
 290: 
 291:   private void readObject(ObjectInputStream is)
 292:     throws ClassNotFoundException, IOException
 293:   {
 294:     this.string = (String) is.readObject();
 295:     try
 296:       {
 297:         parseURI(this.string);
 298:       }
 299:     catch (URISyntaxException x)
 300:       {
 301:         // Should not happen.
 302:         throw new RuntimeException(x);
 303:       }
 304:   }
 305: 
 306:   private void writeObject(ObjectOutputStream os) throws IOException
 307:   {
 308:     if (string == null)
 309:       string = toString();
 310:     os.writeObject(string);
 311:   }
 312: 
 313:   /**
 314:    * <p>
 315:    * Returns the string content of the specified group of the supplied
 316:    * matcher.  The returned value is modified according to the following:
 317:    * </p>
 318:    * <ul>
 319:    * <li>If the resulting string has a length greater than 0, then
 320:    * that string is returned.</li>
 321:    * <li>If a string of zero length, is matched, then the content
 322:    * of the preceding group is considered.  If this is also an empty
 323:    * string, then <code>null</code> is returned to indicate an undefined
 324:    * value.  Otherwise, the value is truly the empty string and this is
 325:    * the returned value.</li>
 326:    * </ul>
 327:    * <p>
 328:    * This method is used for matching against all parts of the URI
 329:    * that may be either undefined or empty (i.e. all those but the
 330:    * scheme-specific part and the path).  In each case, the preceding
 331:    * group is the content of the original group, along with some
 332:    * additional distinguishing feature.  For example, the preceding
 333:    * group for the query includes the preceding question mark,
 334:    * while that of the fragment includes the hash symbol.  The presence
 335:    * of these features enables disambiguation between the two cases
 336:    * of a completely unspecified value and a simple non-existant value.
 337:    * The scheme differs in that it will never return an empty string;
 338:    * the delimiter follows the scheme rather than preceding it, so
 339:    * it becomes part of the following section.  The same is true
 340:    * of the user information.
 341:    * </p>
 342:    *
 343:    * @param match the matcher, which contains the results of the URI
 344:    *              matched against the URI regular expression.
 345:    * @return either the matched content, <code>null</code> for undefined
 346:    *         values, or an empty string for a URI part with empty content.
 347:    */
 348:   private static String getURIGroup(Matcher match, int group)
 349:   {
 350:     String matched = match.group(group);
 351:     if (matched == null || matched.length() == 0)
 352:       {
 353:         String prevMatched = match.group(group -1);
 354:         if (prevMatched == null || prevMatched.length() == 0)
 355:           return null;
 356:         else
 357:           return "";
 358:       }
 359:     return matched;
 360:   }
 361: 
 362:   /**
 363:    * Sets fields of this URI by parsing the given string.
 364:    *
 365:    * @param str The string to parse
 366:    *
 367:    * @exception URISyntaxException If the given string violates RFC 2396
 368:    */
 369:   private void parseURI(String str) throws URISyntaxException
 370:   {
 371:     Matcher matcher = URI_PATTERN.matcher(str);
 372: 
 373:     if (matcher.matches())
 374:       {
 375:         scheme = getURIGroup(matcher, SCHEME_GROUP);
 376:         rawSchemeSpecificPart = matcher.group(SCHEME_SPEC_PART_GROUP);
 377:         schemeSpecificPart = unquote(rawSchemeSpecificPart);
 378:         if (!isOpaque())
 379:           {
 380:             rawAuthority = getURIGroup(matcher, AUTHORITY_GROUP);
 381:             rawPath = matcher.group(PATH_GROUP);
 382:             rawQuery = getURIGroup(matcher, QUERY_GROUP);
 383:           }
 384:         rawFragment = getURIGroup(matcher, FRAGMENT_GROUP);
 385:       }
 386:     else
 387:       throw new URISyntaxException(str,
 388:                                    "doesn't match URI regular expression");
 389:     parseServerAuthority();
 390: 
 391:     // We must eagerly unquote the parts, because this is the only time
 392:     // we may throw an exception.
 393:     authority = unquote(rawAuthority);
 394:     userInfo = unquote(rawUserInfo);
 395:     host = unquote(rawHost);
 396:     path = unquote(rawPath);
 397:     query = unquote(rawQuery);
 398:     fragment = unquote(rawFragment);
 399:   }
 400: 
 401:   /**
 402:    * Unquote "%" + hex quotes characters
 403:    *
 404:    * @param str The string to unquote or null.
 405:    *
 406:    * @return The unquoted string or null if str was null.
 407:    *
 408:    * @exception URISyntaxException If the given string contains invalid
 409:    * escape sequences.
 410:    */
 411:   private static String unquote(String str) throws URISyntaxException
 412:   {
 413:     if (str == null)
 414:       return null;
 415:     byte[] buf = new byte[str.length()];
 416:     int pos = 0;
 417:     for (int i = 0; i < str.length(); i++)
 418:       {
 419:         char c = str.charAt(i);
 420:         if (c == '%')
 421:           {
 422:             if (i + 2 >= str.length())
 423:               throw new URISyntaxException(str, "Invalid quoted character");
 424:             int hi = Character.digit(str.charAt(++i), 16);
 425:             int lo = Character.digit(str.charAt(++i), 16);
 426:             if (lo < 0 || hi < 0)
 427:               throw new URISyntaxException(str, "Invalid quoted character");
 428:             buf[pos++] = (byte) (hi * 16 + lo);
 429:           }
 430:         else
 431:           buf[pos++] = (byte) c;
 432:       }
 433:     try
 434:       {
 435:         return new String(buf, 0, pos, "utf-8");
 436:       }
 437:     catch (java.io.UnsupportedEncodingException x2)
 438:       {
 439:         throw (Error) new InternalError().initCause(x2);
 440:       }
 441:   }
 442: 
 443:   /**
 444:    * Quote characters illegal in URIs in given string.
 445:    *
 446:    * Replace illegal characters by encoding their UTF-8
 447:    * representation as "%" + hex code for each resulting
 448:    * UTF-8 character.
 449:    *
 450:    * @param str The string to quote
 451:    *
 452:    * @return The quoted string.
 453:    */
 454:   private static String quote(String str)
 455:   {
 456:     return quote(str, RFC3986_SSP);
 457:   }
 458: 
 459:   /**
 460:    * Quote characters illegal in URI authorities in given string.
 461:    *
 462:    * Replace illegal characters by encoding their UTF-8
 463:    * representation as "%" + hex code for each resulting
 464:    * UTF-8 character.
 465:    *
 466:    * @param str The string to quote
 467:    *
 468:    * @return The quoted string.
 469:    */
 470:   private static String quoteAuthority(String str)
 471:   {
 472:     // Technically, we should be using RFC2396_AUTHORITY, but
 473:     // it contains no additional characters.
 474:     return quote(str, RFC3986_REG_NAME);
 475:   }
 476: 
 477:   /**
 478:    * Quotes the characters in the supplied string that are not part of
 479:    * the specified set of legal characters.
 480:    *
 481:    * @param str the string to quote
 482:    * @param legalCharacters the set of legal characters
 483:    *
 484:    * @return the quoted string.
 485:    */
 486:   private static String quote(String str, String legalCharacters)
 487:   {
 488:     CPStringBuilder sb = new CPStringBuilder(str.length());
 489:     for (int i = 0; i < str.length(); i++)
 490:       {
 491:         char c = str.charAt(i);
 492:         if ((legalCharacters.indexOf(c) == -1)
 493:             && (c <= 127))
 494:           {
 495:             sb.append('%');
 496:             sb.append(HEX.charAt(c / 16));
 497:             sb.append(HEX.charAt(c % 16));
 498:           }
 499:         else
 500:           sb.append(c);
 501:       }
 502:     return sb.toString();
 503:   }
 504: 
 505:   /**
 506:    * Quote characters illegal in URI hosts in given string.
 507:    *
 508:    * Replace illegal characters by encoding their UTF-8
 509:    * representation as "%" + hex code for each resulting
 510:    * UTF-8 character.
 511:    *
 512:    * @param str The string to quote
 513:    *
 514:    * @return The quoted string.
 515:    */
 516:   private static String quoteHost(String str)
 517:   {
 518:     return quote(str, RFC3986_HOST);
 519:   }
 520: 
 521:   /**
 522:    * Quote characters illegal in URI paths in given string.
 523:    *
 524:    * Replace illegal characters by encoding their UTF-8
 525:    * representation as "%" + hex code for each resulting
 526:    * UTF-8 character.
 527:    *
 528:    * @param str The string to quote
 529:    *
 530:    * @return The quoted string.
 531:    */
 532:   private static String quotePath(String str)
 533:   {
 534:     // Technically, we should be using RFC2396_PATH, but
 535:     // it contains no additional characters.
 536:     return quote(str, RFC3986_PATH_SEGMENTS);
 537:   }
 538: 
 539:   /**
 540:    * Quote characters illegal in URI user infos in given string.
 541:    *
 542:    * Replace illegal characters by encoding their UTF-8
 543:    * representation as "%" + hex code for each resulting
 544:    * UTF-8 character.
 545:    *
 546:    * @param str The string to quote
 547:    *
 548:    * @return The quoted string.
 549:    */
 550:   private static String quoteUserInfo(String str)
 551:   {
 552:     return quote(str, RFC3986_USERINFO);
 553:   }
 554: 
 555:   /**
 556:    * Creates an URI from the given string
 557:    *
 558:    * @param str The string to create the URI from
 559:    *
 560:    * @exception URISyntaxException If the given string violates RFC 2396
 561:    * @exception NullPointerException If str is null
 562:    */
 563:   public URI(String str) throws URISyntaxException
 564:   {
 565:     this.string = str;
 566:     parseURI(str);
 567:   }
 568: 
 569:   /**
 570:    * Create an URI from the given components
 571:    *
 572:    * @param scheme The scheme name
 573:    * @param userInfo The username and authorization info
 574:    * @param host The hostname
 575:    * @param port The port number
 576:    * @param path The path
 577:    * @param query The query
 578:    * @param fragment The fragment
 579:    *
 580:    * @exception URISyntaxException If the given string violates RFC 2396
 581:    */
 582:   public URI(String scheme, String userInfo, String host, int port,
 583:              String path, String query, String fragment)
 584:     throws URISyntaxException
 585:   {
 586:     this((scheme == null ? "" : scheme + ":")
 587:          + (userInfo == null && host == null && port == -1 ? "" : "//")
 588:          + (userInfo == null ? "" : quoteUserInfo(userInfo) + "@")
 589:          + (host == null ? "" : quoteHost(host))
 590:          + (port == -1 ? "" : ":" + String.valueOf(port))
 591:          + (path == null ? "" : quotePath(path))
 592:          + (query == null ? "" : "?" + quote(query))
 593:          + (fragment == null ? "" : "#" + quote(fragment)));
 594:   }
 595: 
 596:   /**
 597:    * Create an URI from the given components
 598:    *
 599:    * @param scheme The scheme name
 600:    * @param authority The authority
 601:    * @param path The apth
 602:    * @param query The query
 603:    * @param fragment The fragment
 604:    *
 605:    * @exception URISyntaxException If the given string violates RFC 2396
 606:    */
 607:   public URI(String scheme, String authority, String path, String query,
 608:              String fragment) throws URISyntaxException
 609:   {
 610:     this((scheme == null ? "" : scheme + ":")
 611:          + (authority == null ? "" : "//" + quoteAuthority(authority))
 612:          + (path == null ? "" : quotePath(path))
 613:          + (query == null ? "" : "?" + quote(query))
 614:          + (fragment == null ? "" : "#" + quote(fragment)));
 615:   }
 616: 
 617:   /**
 618:    * Create an URI from the given components
 619:    *
 620:    * @param scheme The scheme name
 621:    * @param host The hostname
 622:    * @param path The path
 623:    * @param fragment The fragment
 624:    *
 625:    * @exception URISyntaxException If the given string violates RFC 2396
 626:    */
 627:   public URI(String scheme, String host, String path, String fragment)
 628:     throws URISyntaxException
 629:   {
 630:     this(scheme, null, host, -1, path, null, fragment);
 631:   }
 632: 
 633:   /**
 634:    * Create an URI from the given components
 635:    *
 636:    * @param scheme The scheme name
 637:    * @param ssp The scheme specific part
 638:    * @param fragment The fragment
 639:    *
 640:    * @exception URISyntaxException If the given string violates RFC 2396
 641:    */
 642:   public URI(String scheme, String ssp, String fragment)
 643:     throws URISyntaxException
 644:   {
 645:     this((scheme == null ? "" : scheme + ":")
 646:          + (ssp == null ? "" : quote(ssp))
 647:          + (fragment == null ? "" : "#" + quote(fragment)));
 648:   }
 649: 
 650:   /**
 651:    * Create an URI from the given string
 652:    *
 653:    * @param str The string to create the URI from
 654:    *
 655:    * @exception IllegalArgumentException If the given string violates RFC 2396
 656:    * @exception NullPointerException If str is null
 657:    */
 658:   public static URI create(String str)
 659:   {
 660:     try
 661:       {
 662:         return new URI(str);
 663:       }
 664:     catch (URISyntaxException e)
 665:       {
 666:         throw (IllegalArgumentException) new IllegalArgumentException()
 667:               .initCause(e);
 668:       }
 669:   }
 670: 
 671:   /**
 672:    * Attempts to parse this URI's authority component, if defined,
 673:    * into user-information, host, and port components.  The purpose
 674:    * of this method was to disambiguate between some authority sections,
 675:    * which form invalid server-based authories, but valid registry
 676:    * based authorities.  In the updated RFC 3986, the authority section
 677:    * is defined differently, with registry-based authorities part of
 678:    * the host section.  Thus, this method is now simply an explicit
 679:    * way of parsing any authority section.
 680:    *
 681:    * @return the URI, with the authority section parsed into user
 682:    *         information, host and port components.
 683:    * @throws URISyntaxException if the given string violates RFC 2396
 684:    */
 685:   public URI parseServerAuthority() throws URISyntaxException
 686:   {
 687:     if (rawAuthority != null)
 688:       {
 689:         Matcher matcher = AUTHORITY_PATTERN.matcher(rawAuthority);
 690: 
 691:         if (matcher.matches())
 692:           {
 693:             rawUserInfo = getURIGroup(matcher, AUTHORITY_USERINFO_GROUP);
 694:             rawHost = getURIGroup(matcher, AUTHORITY_HOST_GROUP);
 695: 
 696:             String portStr = getURIGroup(matcher, AUTHORITY_PORT_GROUP);
 697: 
 698:             if (portStr != null && ! portStr.isEmpty())
 699:               try
 700:                 {
 701:                   port = Integer.parseInt(portStr);
 702:                 }
 703:               catch (NumberFormatException e)
 704:                 {
 705:                   URISyntaxException use =
 706:                     new URISyntaxException
 707:                       (string, "doesn't match URI regular expression");
 708:                   use.initCause(e);
 709:                   throw use;
 710:                 }
 711:           }
 712:         else
 713:           throw new URISyntaxException(string,
 714:                                        "doesn't match URI regular expression");
 715:       }
 716:     return this;
 717:   }
 718: 
 719:   /**
 720:    * <p>
 721:    * Returns a normalized version of the URI.  If the URI is opaque,
 722:    * or its path is already in normal form, then this URI is simply
 723:    * returned.  Otherwise, the following transformation of the path
 724:    * element takes place:
 725:    * </p>
 726:    * <ol>
 727:    * <li>All `.' segments are removed.</li>
 728:    * <li>Each `..' segment which can be paired with a prior non-`..' segment
 729:    * is removed along with the preceding segment.</li>
 730:    * <li>A `.' segment is added to the front if the first segment contains
 731:    * a colon (`:').  This is a deviation from the RFC, which prevents
 732:    * confusion between the path and the scheme.</li>
 733:    * </ol>
 734:    * <p>
 735:    * The resulting URI will be free of `.' and `..' segments, barring those
 736:    * that were prepended or which couldn't be paired, respectively.
 737:    * </p>
 738:    *
 739:    * @return the normalized URI.
 740:    */
 741:   public URI normalize()
 742:   {
 743:     if (isOpaque() || path.indexOf("/./") == -1 && path.indexOf("/../") == -1)
 744:       return this;
 745:     try
 746:       {
 747:         return new URI(scheme, authority, normalizePath(path), query,
 748:                        fragment);
 749:       }
 750:     catch (URISyntaxException e)
 751:       {
 752:         throw (Error) new InternalError("Normalized URI variant could not "+
 753:                                         "be constructed").initCause(e);
 754:       }
 755:   }
 756: 
 757:   /**
 758:    * <p>
 759:    * Normalize the given path.  The following transformation takes place:
 760:    * </p>
 761:    * <ol>
 762:    * <li>All `.' segments are removed.</li>
 763:    * <li>Each `..' segment which can be paired with a prior non-`..' segment
 764:    * is removed along with the preceding segment.</li>
 765:    * <li>A `.' segment is added to the front if the first segment contains
 766:    * a colon (`:').  This is a deviation from the RFC, which prevents
 767:    * confusion between the path and the scheme.</li>
 768:    * </ol>
 769:    * <p>
 770:    * The resulting URI will be free of `.' and `..' segments, barring those
 771:    * that were prepended or which couldn't be paired, respectively.
 772:    * </p>
 773:    *
 774:    * @param relativePath the relative path to be normalized.
 775:    * @return the normalized path.
 776:    */
 777:   private String normalizePath(String relativePath)
 778:   {
 779:     /*
 780:        This follows the algorithm in section 5.2.4. of RFC3986,
 781:        but doesn't modify the input buffer.
 782:     */
 783:     CPStringBuilder input = new CPStringBuilder(relativePath);
 784:     CPStringBuilder output = new CPStringBuilder();
 785:     int start = 0;
 786:     while (start < input.length())
 787:       {
 788:         /* A */
 789:         if (input.indexOf("../",start) == start)
 790:           {
 791:             start += 3;
 792:             continue;
 793:           }
 794:         if (input.indexOf("./",start) == start)
 795:           {
 796:             start += 2;
 797:             continue;
 798:           }
 799:         /* B */
 800:         if (input.indexOf("/./",start) == start)
 801:           {
 802:             start += 2;
 803:             continue;
 804:           }
 805:         if (input.indexOf("/.",start) == start
 806:             && input.charAt(start + 2) != '.')
 807:           {
 808:             start += 1;
 809:             input.setCharAt(start,'/');
 810:             continue;
 811:           }
 812:         /* C */
 813:         if (input.indexOf("/../",start) == start)
 814:           {
 815:             start += 3;
 816:             removeLastSegment(output);
 817:             continue;
 818:           }
 819:         if (input.indexOf("/..",start) == start)
 820:           {
 821:             start += 2;
 822:             input.setCharAt(start,'/');
 823:             removeLastSegment(output);
 824:             continue;
 825:           }
 826:         /* D */
 827:         if (start == input.length() - 1 && input.indexOf(".",start) == start)
 828:           {
 829:             input.delete(0,1);
 830:             continue;
 831:           }
 832:         if (start == input.length() - 2 && input.indexOf("..",start) == start)
 833:           {
 834:             input.delete(0,2);
 835:             continue;
 836:           }
 837:         /* E */
 838:         int indexOfSlash = input.indexOf("/",start);
 839:         while (indexOfSlash == start)
 840:           {
 841:             output.append("/");
 842:             ++start;
 843:             indexOfSlash = input.indexOf("/",start);
 844:           }
 845:         if (indexOfSlash == -1)
 846:           indexOfSlash = input.length();
 847:         output.append(input.substring(start, indexOfSlash));
 848:         start = indexOfSlash;
 849:       }
 850:     return output.toString();
 851:   }
 852: 
 853:   /**
 854:    * Removes the last segment of the path from the specified buffer.
 855:    *
 856:    * @param buffer the buffer containing the path.
 857:    */
 858:   private void removeLastSegment(CPStringBuilder buffer)
 859:   {
 860:     int lastSlash = buffer.lastIndexOf("/");
 861:     if (lastSlash == -1)
 862:       buffer.setLength(0);
 863:     else
 864:       buffer.setLength(lastSlash);
 865:   }
 866: 
 867:   /**
 868:    * Resolves the given URI against this URI
 869:    *
 870:    * @param uri The URI to resolve against this URI
 871:    *
 872:    * @return The resulting URI, or null when it couldn't be resolved
 873:    * for some reason.
 874:    *
 875:    * @throws NullPointerException if uri is null
 876:    */
 877:   public URI resolve(URI uri)
 878:   {
 879:     if (uri.isAbsolute())
 880:       return uri;
 881:     if (uri.isOpaque())
 882:       return uri;
 883: 
 884:     String scheme = uri.getScheme();
 885:     String schemeSpecificPart = uri.getSchemeSpecificPart();
 886:     String authority = uri.getAuthority();
 887:     String path = uri.getPath();
 888:     String query = uri.getQuery();
 889:     String fragment = uri.getFragment();
 890: 
 891:     try
 892:       {
 893:         if (fragment != null && path != null && path.equals("")
 894:             && scheme == null && authority == null && query == null)
 895:           return new URI(this.scheme, this.schemeSpecificPart, fragment);
 896: 
 897:         if (authority == null)
 898:           {
 899:             authority = this.authority;
 900:             if (path == null)
 901:               path = "";
 902:             if (! (path.startsWith("/")))
 903:               {
 904:                 CPStringBuilder basepath = new CPStringBuilder(this.path);
 905:                 int i = this.path.lastIndexOf('/');
 906: 
 907:                 if (i >= 0)
 908:                   basepath.delete(i + 1, basepath.length());
 909: 
 910:                 basepath.append(path);
 911:                 path = normalizePath(basepath.toString());
 912:               }
 913:           }
 914:         return new URI(this.scheme, authority, path, query, fragment);
 915:       }
 916:     catch (URISyntaxException e)
 917:       {
 918:         throw (Error) new InternalError("Resolved URI variant could not "+
 919:                                         "be constructed").initCause(e);
 920:       }
 921:   }
 922: 
 923:   /**
 924:    * Resolves the given URI string against this URI
 925:    *
 926:    * @param str The URI as string to resolve against this URI
 927:    *
 928:    * @return The resulting URI
 929:    *
 930:    * @throws IllegalArgumentException If the given URI string
 931:    * violates RFC 2396
 932:    * @throws NullPointerException If uri is null
 933:    */
 934:   public URI resolve(String str) throws IllegalArgumentException
 935:   {
 936:     return resolve(create(str));
 937:   }
 938: 
 939:   /**
 940:    * <p>
 941:    * Relativizes the given URI against this URI.  The following
 942:    * algorithm is used:
 943:    * </p>
 944:    * <ul>
 945:    * <li>If either URI is opaque, the given URI is returned.</li>
 946:    * <li>If the schemes of the URIs differ, the given URI is returned.</li>
 947:    * <li>If the authority components of the URIs differ, then the given
 948:    * URI is returned.</li>
 949:    * <li>If the path of this URI is not a prefix of the supplied URI,
 950:    * then the given URI is returned.</li>
 951:    * <li>If all the above conditions hold, a new URI is created using the
 952:    * query and fragment components of the given URI, along with a path
 953:    * computed by removing the path of this URI from the start of the path
 954:    * of the supplied URI.</li>
 955:    * </ul>
 956:    *
 957:    * @param uri the URI to relativize agsint this URI
 958:    * @return the resulting URI
 959:    * @throws NullPointerException if the uri is null
 960:    */
 961:   public URI relativize(URI uri)
 962:   {
 963:     if (isOpaque() || uri.isOpaque())
 964:       return uri;
 965:     if (scheme == null && uri.getScheme() != null)
 966:       return uri;
 967:     if (scheme != null && !(scheme.equals(uri.getScheme())))
 968:       return uri;
 969:     if (rawAuthority == null && uri.getRawAuthority() != null)
 970:       return uri;
 971:     if (rawAuthority != null && !(rawAuthority.equals(uri.getRawAuthority())))
 972:       return uri;
 973:     String basePath = rawPath;
 974:     if (!(uri.getRawPath().equals(rawPath)))
 975:       {
 976:         if (!(basePath.endsWith("/")))
 977:           basePath = basePath.concat("/");
 978:         if (!(uri.getRawPath().startsWith(basePath)))
 979:           return uri;
 980:       }
 981:     try
 982:       {
 983:         return new URI(null, null,
 984:                        uri.getRawPath().substring(basePath.length()),
 985:                        uri.getRawQuery(), uri.getRawFragment());
 986:       }
 987:     catch (URISyntaxException e)
 988:       {
 989:         throw (Error) new InternalError("Relativized URI variant could not "+
 990:                                         "be constructed").initCause(e);
 991:       }
 992:   }
 993: 
 994:   /**
 995:    * Creates an URL from an URI
 996:    *
 997:    * @throws MalformedURLException If a protocol handler for the URL could
 998:    * not be found, or if some other error occurred while constructing the URL
 999:    * @throws IllegalArgumentException If the URI is not absolute
1000:    */
1001:   public URL toURL() throws IllegalArgumentException, MalformedURLException
1002:   {
1003:     if (isAbsolute())
1004:       return new URL(this.toString());
1005: 
1006:     throw new IllegalArgumentException("not absolute");
1007:   }
1008: 
1009:   /**
1010:    * Returns the scheme of the URI
1011:    */
1012:   public String getScheme()
1013:   {
1014:     return scheme;
1015:   }
1016: 
1017:   /**
1018:    * Tells whether this URI is absolute or not
1019:    */
1020:   public boolean isAbsolute()
1021:   {
1022:     return scheme != null;
1023:   }
1024: 
1025:   /**
1026:    * Tell whether this URI is opaque or not
1027:    */
1028:   public boolean isOpaque()
1029:   {
1030:     return ((scheme != null) && ! (schemeSpecificPart.startsWith("/")));
1031:   }
1032: 
1033:   /**
1034:    * Returns the raw scheme specific part of this URI.
1035:    * The scheme-specific part is never undefined, though it may be empty
1036:    */
1037:   public String getRawSchemeSpecificPart()
1038:   {
1039:     return rawSchemeSpecificPart;
1040:   }
1041: 
1042:   /**
1043:    * Returns the decoded scheme specific part of this URI.
1044:    */
1045:   public String getSchemeSpecificPart()
1046:   {
1047:     return schemeSpecificPart;
1048:   }
1049: 
1050:   /**
1051:    * Returns the raw authority part of this URI
1052:    */
1053:   public String getRawAuthority()
1054:   {
1055:     return rawAuthority;
1056:   }
1057: 
1058:   /**
1059:    * Returns the decoded authority part of this URI
1060:    */
1061:   public String getAuthority()
1062:   {
1063:     return authority;
1064:   }
1065: 
1066:   /**
1067:    * Returns the raw user info part of this URI
1068:    */
1069:   public String getRawUserInfo()
1070:   {
1071:     return rawUserInfo;
1072:   }
1073: 
1074:   /**
1075:    * Returns the decoded user info part of this URI
1076:    */
1077:   public String getUserInfo()
1078:   {
1079:     return userInfo;
1080:   }
1081: 
1082:   /**
1083:    * Returns the hostname of the URI
1084:    */
1085:   public String getHost()
1086:   {
1087:     return host;
1088:   }
1089: 
1090:   /**
1091:    * Returns the port number of the URI
1092:    */
1093:   public int getPort()
1094:   {
1095:     return port;
1096:   }
1097: 
1098:   /**
1099:    * Returns the raw path part of this URI
1100:    */
1101:   public String getRawPath()
1102:   {
1103:     return rawPath;
1104:   }
1105: 
1106:   /**
1107:    * Returns the path of the URI
1108:    */
1109:   public String getPath()
1110:   {
1111:     return path;
1112:   }
1113: 
1114:   /**
1115:    * Returns the raw query part of this URI
1116:    */
1117:   public String getRawQuery()
1118:   {
1119:     return rawQuery;
1120:   }
1121: 
1122:   /**
1123:    * Returns the query of the URI
1124:    */
1125:   public String getQuery()
1126:   {
1127:     return query;
1128:   }
1129: 
1130:   /**
1131:    * Return the raw fragment part of this URI
1132:    */
1133:   public String getRawFragment()
1134:   {
1135:     return rawFragment;
1136:   }
1137: 
1138:   /**
1139:    * Returns the fragment of the URI
1140:    */
1141:   public String getFragment()
1142:   {
1143:     return fragment;
1144:   }
1145: 
1146:   /**
1147:    * <p>
1148:    * Compares the URI with the given object for equality.  If the
1149:    * object is not a <code>URI</code>, then the method returns false.
1150:    * Otherwise, the following criteria are observed:
1151:    * </p>
1152:    * <ul>
1153:    * <li>The scheme of the URIs must either be null (undefined) in both cases,
1154:    * or equal, ignorant of case.</li>
1155:    * <li>The raw fragment of the URIs must either be null (undefined) in both
1156:    * cases, or equal, ignorant of case.</li>
1157:    * <li>Both URIs must be of the same type (opaque or hierarchial)</li>
1158:    * <li><strong>For opaque URIs:</strong></li>
1159:    * <ul>
1160:    * <li>The raw scheme-specific parts must be equal.</li>
1161:    * </ul>
1162:    * <li>For hierarchical URIs:</li>
1163:    * <ul>
1164:    * <li>The raw paths must be equal, ignorant of case.</li>
1165:    * <li>The raw queries are either both undefined or both equal, ignorant
1166:    * of case.</li>
1167:    * <li>The raw authority sections are either both undefined or:</li>
1168:    * <li><strong>For registry-based authorities:</strong></li>
1169:    * <ul><li>they are equal.</li></ul>
1170:    * <li><strong>For server-based authorities:</strong></li>
1171:    * <ul>
1172:    * <li>the hosts are equal, ignoring case</li>
1173:    * <li>the ports are equal</li>
1174:    * <li>the user information components are equal</li>
1175:    * </ul>
1176:    * </ul>
1177:    * </ul>
1178:    *
1179:    * @param obj the obj to compare the URI with.
1180:    * @return <code>true</code> if the objects are equal, according to
1181:    *         the specification above.
1182:    */
1183:   public boolean equals(Object obj)
1184:   {
1185:     if (!(obj instanceof URI))
1186:       return false;
1187:     URI uriObj = (URI) obj;
1188:     if (scheme == null)
1189:       {
1190:         if (uriObj.getScheme() != null)
1191:           return false;
1192:       }
1193:     else
1194:       if (!(scheme.equalsIgnoreCase(uriObj.getScheme())))
1195:         return false;
1196:     if (rawFragment == null)
1197:       {
1198:         if (uriObj.getRawFragment() != null)
1199:           return false;
1200:       }
1201:     else
1202:       if (!(rawFragment.equalsIgnoreCase(uriObj.getRawFragment())))
1203:         return false;
1204:     boolean opaqueThis = isOpaque();
1205:     boolean opaqueObj = uriObj.isOpaque();
1206:     if (opaqueThis && opaqueObj)
1207:       return rawSchemeSpecificPart.equals(uriObj.getRawSchemeSpecificPart());
1208:     else if (!opaqueThis && !opaqueObj)
1209:       {
1210:         boolean common = rawPath.equalsIgnoreCase(uriObj.getRawPath())
1211:           && ((rawQuery == null && uriObj.getRawQuery() == null)
1212:               || rawQuery.equalsIgnoreCase(uriObj.getRawQuery()));
1213:         if (rawAuthority == null && uriObj.getRawAuthority() == null)
1214:           return common;
1215:         if (host == null)
1216:           return common
1217:             && rawAuthority.equalsIgnoreCase(uriObj.getRawAuthority());
1218:         return common
1219:           && host.equalsIgnoreCase(uriObj.getHost())
1220:           && port == uriObj.getPort()
1221:           && (rawUserInfo == null ?
1222:               uriObj.getRawUserInfo() == null :
1223:               rawUserInfo.equalsIgnoreCase(uriObj.getRawUserInfo()));
1224:       }
1225:     else
1226:       return false;
1227:   }
1228: 
1229:   /**
1230:    * Computes the hashcode of the URI
1231:    */
1232:   public int hashCode()
1233:   {
1234:     return (getScheme() == null ? 0 : 13 * getScheme().hashCode())
1235:       + 17 * getRawSchemeSpecificPart().hashCode()
1236:       + (getRawFragment() == null ? 0 : 21 + getRawFragment().hashCode());
1237:   }
1238: 
1239:   /**
1240:    * Compare the URI with another URI.
1241:    * Undefined components are taken to be less than any other component.
1242:    * The following criteria are observed:
1243:    * </p>
1244:    * <ul>
1245:    * <li>Two URIs with different schemes are compared according to their
1246:    * scheme, regardless of case.</li>
1247:    * <li>A hierarchical URI is less than an opaque URI with the same
1248:    * scheme.</li>
1249:    * <li><strong>For opaque URIs:</strong></li>
1250:    * <ul>
1251:    * <li>URIs with differing scheme-specific parts are ordered according
1252:    * to the ordering of the scheme-specific part.</li>
1253:    * <li>URIs with the same scheme-specific part are ordered by the
1254:    * raw fragment.</li>
1255:    * </ul>
1256:    * <li>For hierarchical URIs:</li>
1257:    * <ul>
1258:    * <li>URIs are ordered according to their raw authority sections,
1259:    * if they are unequal.</li>
1260:    * <li><strong>For registry-based authorities:</strong></li>
1261:    * <ul><li>they are ordered according to the ordering of the authority
1262:    * component.</li></ul>
1263:    * <li><strong>For server-based authorities:</strong></li>
1264:    * <ul>
1265:    * <li>URIs are ordered according to the raw user information.</li>
1266:    * <li>URIs with the same user information are ordered by the host,
1267:    * ignoring case.</li>
1268:    * <lI>URIs with the same host are ordered by the port.</li>
1269:    * </ul>
1270:    * <li>URIs with the same authority section are ordered by the raw path.</li>
1271:    * <li>URIs with the same path are ordered by their raw query.</li>
1272:    * <li>URIs with the same query are ordered by their raw fragments.</li>
1273:    * </ul>
1274:    * </ul>
1275:    *
1276:    * @param uri The other URI to compare this URI with
1277:    * @return a negative integer, zero or a positive integer depending
1278:    *         on whether this URI is less than, equal to or greater
1279:    *         than that supplied, respectively.
1280:    */
1281:   public int compareTo(URI uri)
1282:     throws ClassCastException
1283:   {
1284:     if (scheme == null && uri.getScheme() != null)
1285:       return -1;
1286:     if (scheme != null)
1287:       {
1288:         int sCompare = scheme.compareToIgnoreCase(uri.getScheme());
1289:         if (sCompare != 0)
1290:           return sCompare;
1291:       }
1292:     boolean opaqueThis = isOpaque();
1293:     boolean opaqueObj = uri.isOpaque();
1294:     if (opaqueThis && !opaqueObj)
1295:       return 1;
1296:     if (!opaqueThis && opaqueObj)
1297:       return -1;
1298:     if (opaqueThis)
1299:       {
1300:         int ssCompare =
1301:           rawSchemeSpecificPart.compareTo(uri.getRawSchemeSpecificPart());
1302:         if (ssCompare == 0)
1303:           return compareFragments(uri);
1304:         else
1305:           return ssCompare;
1306:       }
1307:     if (rawAuthority == null && uri.getRawAuthority() != null)
1308:       return -1;
1309:     if (rawAuthority != null)
1310:       {
1311:         int aCompare = rawAuthority.compareTo(uri.getRawAuthority());
1312:         if (aCompare != 0)
1313:           {
1314:             if (host == null)
1315:               return aCompare;
1316:             if (rawUserInfo == null && uri.getRawUserInfo() != null)
1317:               return -1;
1318:             int uCompare = rawUserInfo.compareTo(uri.getRawUserInfo());
1319:             if (uCompare != 0)
1320:               return uCompare;
1321:             if (host == null && uri.getHost() != null)
1322:               return -1;
1323:             int hCompare = host.compareTo(uri.getHost());
1324:             if (hCompare != 0)
1325:               return hCompare;
1326:             int uriPort = uri.getPort();
1327:             return (uriPort == port) ? 0 : (uriPort > port) ? -1 : 1;
1328:           }
1329:       }
1330:     if (rawPath == null && uri.getRawPath() != null)
1331:       return -1;
1332:     if (rawPath != null)
1333:       {
1334:         int pCompare = rawPath.compareTo(uri.getRawPath());
1335:         if (pCompare != 0)
1336:           return pCompare;
1337:       }
1338:     if (rawQuery == null && uri.getRawQuery() != null)
1339:       return -1;
1340:     if (rawQuery != null)
1341:       {
1342:         int qCompare = rawQuery.compareTo(uri.getRawQuery());
1343:         if (qCompare != 0)
1344:           return qCompare;
1345:       }
1346:     return compareFragments(uri);
1347:   }
1348: 
1349:   /**
1350:    * Compares the fragment of this URI with that of the supplied URI.
1351:    *
1352:    * @param uri the URI to compare with this one.
1353:    * @return a negative integer, zero or a positive integer depending
1354:    *         on whether this uri's fragment is less than, equal to
1355:    *         or greater than the fragment of the uri supplied, respectively.
1356:    */
1357:   private int compareFragments(URI uri)
1358:   {
1359:     if (rawFragment == null && uri.getRawFragment() != null)
1360:       return -1;
1361:     else if (rawFragment == null)
1362:       return 0;
1363:     else
1364:       return rawFragment.compareTo(uri.getRawFragment());
1365:   }
1366: 
1367:   /**
1368:    * Returns the URI as a String.  If the URI was created using a constructor,
1369:    * then this will be the same as the original input string.
1370:    *
1371:    * @return a string representation of the URI.
1372:    */
1373:   public String toString()
1374:   {
1375:     return (scheme == null ? "" : scheme + ":")
1376:       + rawSchemeSpecificPart
1377:       + (rawFragment == null ? "" : "#" + rawFragment);
1378:   }
1379: 
1380:   /**
1381:    * Returns the URI as US-ASCII string.  This is the same as the result
1382:    * from <code>toString()</code> for URIs that don't contain any non-US-ASCII
1383:    * characters.  Otherwise, the non-US-ASCII characters are replaced
1384:    * by their percent-encoded representations.
1385:    *
1386:    * @return a string representation of the URI, containing only US-ASCII
1387:    *         characters.
1388:    */
1389:   public String toASCIIString()
1390:   {
1391:     String strRep = toString();
1392:     boolean inNonAsciiBlock = false;
1393:     CPStringBuilder buffer = new CPStringBuilder();
1394:     CPStringBuilder encBuffer = null;
1395:     for (int i = 0; i < strRep.length(); i++)
1396:       {
1397:         char c = strRep.charAt(i);
1398:         if (c <= 127)
1399:           {
1400:             if (inNonAsciiBlock)
1401:               {
1402:                 buffer.append(escapeCharacters(encBuffer.toString()));
1403:                 inNonAsciiBlock = false;
1404:               }
1405:             buffer.append(c);
1406:           }
1407:         else
1408:           {
1409:             if (!inNonAsciiBlock)
1410:               {
1411:                 encBuffer = new CPStringBuilder();
1412:                 inNonAsciiBlock = true;
1413:               }
1414:             encBuffer.append(c);
1415:           }
1416:       }
1417:     return buffer.toString();
1418:   }
1419: 
1420:   /**
1421:    * Converts the non-ASCII characters in the supplied string
1422:    * to their equivalent percent-encoded representations.
1423:    * That is, they are replaced by "%" followed by their hexadecimal value.
1424:    *
1425:    * @param str a string including non-ASCII characters.
1426:    * @return the string with the non-ASCII characters converted to their
1427:    *         percent-encoded representations.
1428:    */
1429:   private static String escapeCharacters(String str)
1430:   {
1431:     try
1432:       {
1433:         CPStringBuilder sb = new CPStringBuilder();
1434:         // this is far from optimal, but it works
1435:         byte[] utf8 = str.getBytes("utf-8");
1436:         for (int j = 0; j < utf8.length; j++)
1437:           {
1438:             sb.append('%');
1439:             sb.append(HEX.charAt((utf8[j] & 0xff) / 16));
1440:             sb.append(HEX.charAt((utf8[j] & 0xff) % 16));
1441:           }
1442:         return sb.toString();
1443:       }
1444:     catch (java.io.UnsupportedEncodingException x)
1445:       {
1446:         throw (Error) new InternalError("Escaping error").initCause(x);
1447:       }
1448:   }
1449: 
1450: }