New src/java.base/share/classes/java/net/URI.java

   1 /*
   2  * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.net;
  27 
  28 import java.io.File;
  29 import java.io.IOException;
  30 import java.io.InvalidObjectException;
  31 import java.io.ObjectInputStream;
  32 import java.io.ObjectOutputStream;
  33 import java.io.Serializable;
  34 import java.nio.ByteBuffer;
  35 import java.nio.CharBuffer;
  36 import java.nio.charset.CharsetDecoder;
  37 import java.nio.charset.CharsetEncoder;
  38 import java.nio.charset.CoderResult;
  39 import java.nio.charset.CodingErrorAction;
  40 import java.nio.charset.CharacterCodingException;
  41 import java.nio.file.Path;
  42 import java.text.Normalizer;
  43 import jdk.internal.access.JavaNetUriAccess;
  44 import jdk.internal.access.SharedSecrets;
  45 import jdk.internal.util.Exceptions;
  46 import sun.nio.cs.UTF_8;
  47 
  48 import static jdk.internal.util.Exceptions.filterNonSocketInfo;
  49 import static jdk.internal.util.Exceptions.formatMsg;
  50 
  51 /**
  52  * Represents a Uniform Resource Identifier (URI) reference.
  53  *
  54  * <p> Aside from some minor deviations noted below, an instance of this
  55  * class represents a URI reference as defined by
  56  * <a href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC&nbsp;2396: Uniform
  57  * Resource Identifiers (URI): Generic Syntax</i></a>, amended by <a
  58  * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC&nbsp;2732: Format for
  59  * Literal IPv6 Addresses in URLs</i></a>. The Literal IPv6 address format
  60  * also supports scope_ids. The syntax and usage of scope_ids is described
  61  * <a href="Inet6Address.html#scoped">here</a>.
  62  * This class provides constructors for creating URI instances from
  63  * their components or by parsing their string forms, methods for accessing the
  64  * various components of an instance, and methods for normalizing, resolving,
  65  * and relativizing URI instances.  Instances of this class are immutable.
  66  *
  67  *
  68  * <h2> URI syntax and components </h2>
  69  *
  70  * At the highest level a URI reference (hereinafter simply "URI") in string
  71  * form has the syntax
  72  *
  73  * <blockquote>
  74  * [<i>scheme</i><b>{@code :}</b>]<i>scheme-specific-part</i>[<b>{@code #}</b><i>fragment</i>]
  75  * </blockquote>
  76  *
  77  * where square brackets [...] delineate optional components and the characters
  78  * <b>{@code :}</b> and <b>{@code #}</b> stand for themselves.
  79  *
  80  * <p> An <i>absolute</i> URI specifies a scheme; a URI that is not absolute is
  81  * said to be <i>relative</i>.  URIs are also classified according to whether
  82  * they are <i>opaque</i> or <i>hierarchical</i>.
  83  *
  84  * <p> An <i>opaque</i> URI is an absolute URI whose scheme-specific part does
  85  * not begin with a slash character ({@code '/'}).  Opaque URIs are not
  86  * subject to further parsing.  Some examples of opaque URIs are:
  87  *
  88  * <blockquote><ul style="list-style-type:none">
  89  * <li>{@code mailto:java-net@www.example.com}</li>
  90  * <li>{@code news:comp.lang.java}</li>
  91  * <li>{@code urn:isbn:096139210x}</li>
  92  * </ul></blockquote>
  93  *
  94  * <p> A <i>hierarchical</i> URI is either an absolute URI whose
  95  * scheme-specific part begins with a slash character, or a relative URI, that
  96  * is, a URI that does not specify a scheme.  Some examples of hierarchical
  97  * URIs are:
  98  *
  99  * <blockquote>
 100  * {@code http://example.com/languages/java/}<br>
 101  * {@code sample/a/index.html#28}<br>
 102  * {@code ../../demo/b/index.html}<br>
 103  * {@code file:///~/calendar}
 104  * </blockquote>
 105  *
 106  * <p> A hierarchical URI is subject to further parsing according to the syntax
 107  *
 108  * <blockquote>
 109  * [<i>scheme</i><b>{@code :}</b>][<b>{@code //}</b><i>authority</i>][<i>path</i>][<b>{@code ?}</b><i>query</i>][<b>{@code #}</b><i>fragment</i>]
 110  * </blockquote>
 111  *
 112  * where the characters <b>{@code :}</b>, <b>{@code /}</b>,
 113  * <b>{@code ?}</b>, and <b>{@code #}</b> stand for themselves.  The
 114  * scheme-specific part of a hierarchical URI consists of the characters
 115  * between the scheme and fragment components.
 116  *
 117  * <p> The authority component of a hierarchical URI is, if specified, either
 118  * <i>server-based</i> or <i>registry-based</i>.  A server-based authority
 119  * parses according to the familiar syntax
 120  *
 121  * <blockquote>
 122  * [<i>user-info</i><b>{@code @}</b>]<i>host</i>[<b>{@code :}</b><i>port</i>]
 123  * </blockquote>
 124  *
 125  * where the characters <b>{@code @}</b> and <b>{@code :}</b> stand for
 126  * themselves.  Nearly all URI schemes currently in use are server-based.  An
 127  * authority component that does not parse in this way is considered to be
 128  * registry-based.
 129  *
 130  * <p> The path component of a hierarchical URI is itself said to be absolute
 131  * if it begins with a slash character ({@code '/'}); otherwise it is
 132  * relative.  The path of a hierarchical URI that is either absolute or
 133  * specifies an authority is always absolute.
 134  *
 135  * <p> All told, then, a URI instance has the following nine components:
 136  *
 137  * <table class="striped" style="margin-left:2em">
 138  * <caption style="display:none">Describes the components of a URI:scheme,scheme-specific-part,authority,user-info,host,port,path,query,fragment</caption>
 139  * <thead>
 140  * <tr><th scope="col">Component</th><th scope="col">Type</th></tr>
 141  * </thead>
 142  * <tbody style="text-align:left">
 143  * <tr><th scope="row">scheme</th><td>{@code String}</td></tr>
 144  * <tr><th scope="row">scheme-specific-part</th><td>{@code String}</td></tr>
 145  * <tr><th scope="row">authority</th><td>{@code String}</td></tr>
 146  * <tr><th scope="row">user-info</th><td>{@code String}</td></tr>
 147  * <tr><th scope="row">host</th><td>{@code String}</td></tr>
 148  * <tr><th scope="row">port</th><td>{@code int}</td></tr>
 149  * <tr><th scope="row">path</th><td>{@code String}</td></tr>
 150  * <tr><th scope="row">query</th><td>{@code String}</td></tr>
 151  * <tr><th scope="row">fragment</th><td>{@code String}</td></tr>
 152  * </tbody>
 153  * </table>
 154  *
 155  * In a given instance any particular component is either <i>undefined</i> or
 156  * <i>defined</i> with a distinct value.  Undefined string components are
 157  * represented by {@code null}, while undefined integer components are
 158  * represented by {@code -1}.  A string component may be defined to have the
 159  * empty string as its value; this is not equivalent to that component being
 160  * undefined.
 161  *
 162  * <p> Whether a particular component is or is not defined in an instance
 163  * depends upon the type of the URI being represented.  An absolute URI has a
 164  * scheme component.  An opaque URI has a scheme, a scheme-specific part, and
 165  * possibly a fragment, but has no other components.  A hierarchical URI always
 166  * has a path (though it may be empty) and a scheme-specific-part (which at
 167  * least contains the path), and may have any of the other components.  If the
 168  * authority component is present and is server-based then the host component
 169  * will be defined and the user-information and port components may be defined.
 170  *
 171  *
 172  * <h3> Operations on URI instances </h3>
 173  *
 174  * The key operations supported by this class are those of
 175  * <i>normalization</i>, <i>resolution</i>, and <i>relativization</i>.
 176  *
 177  * <p> <i>Normalization</i> is the process of removing unnecessary {@code "."}
 178  * and {@code ".."} segments from the path component of a hierarchical URI.
 179  * Each {@code "."} segment is simply removed.  A {@code ".."} segment is
 180  * removed only if it is preceded by a non-{@code ".."} segment.
 181  * Normalization has no effect upon opaque URIs.
 182  *
 183  * <p> <i>Resolution</i> is the process of resolving one URI against another,
 184  * <i>base</i> URI.  The resulting URI is constructed from components of both
 185  * URIs in the manner specified by RFC&nbsp;2396, taking components from the
 186  * base URI for those not specified in the original.  For hierarchical URIs,
 187  * the path of the original is resolved against the path of the base and then
 188  * normalized.  The result, for example, of resolving
 189  *
 190  * <blockquote>
 191  * {@code sample/a/index.html#28}
 192  * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
 193  * &nbsp;&nbsp;&nbsp;&nbsp;(1)
 194  * </blockquote>
 195  *
 196  * against the base URI {@code http://example.com/languages/java/} is the result
 197  * URI
 198  *
 199  * <blockquote>
 200  * {@code http://example.com/languages/java/sample/a/index.html#28}
 201  * </blockquote>
 202  *
 203  * Resolving the relative URI
 204  *
 205  * <blockquote>
 206  * {@code ../../demo/b/index.html}&nbsp;&nbsp;&nbsp;&nbsp;(2)
 207  * </blockquote>
 208  *
 209  * against this result yields, in turn,
 210  *
 211  * <blockquote>
 212  * {@code http://example.com/languages/java/demo/b/index.html}
 213  * </blockquote>
 214  *
 215  * Resolution of both absolute and relative URIs, and of both absolute and
 216  * relative paths in the case of hierarchical URIs, is supported.  Resolving
 217  * the URI {@code file:///~calendar} against any other URI simply yields the
 218  * original URI, since it is absolute.  Resolving the relative URI (2) above
 219  * against the relative base URI (1) yields the normalized, but still relative,
 220  * URI
 221  *
 222  * <blockquote>
 223  * {@code demo/b/index.html}
 224  * </blockquote>
 225  *
 226  * <p> <i>Relativization</i>, finally, can be regarded as the inverse of resolution.
 227  * Let <i>u</i> be any normalized absolute URI ending with a slash character ({@code '/'})
 228  * and <i>v</i> be any normalized relative URI not beginning with a period character ({@code '.'})
 229  * or slash character ({@code '/'}). Then, the following statement is true:
 230  *
 231  * <blockquote>
 232  *   <i>u</i>{@code .relativize(}<i>u</i>{@code .resolve(}<i>v</i>{@code )).equals(}<i>v</i>{@code )}
 233  * </blockquote>
 234  *
 235  * Let <i>u</i> be any normalized absolute URI ending with a slash character ({@code '/'})
 236  * and <i>v</i> be any normalized absolute URI. Then, the following statement is true:
 237  *
 238  * <blockquote>
 239  *   <i>u</i>{@code .resolve(}<i>u</i>{@code .relativize(}<i>v</i>{@code )).equals(}<i>v</i>{@code )}
 240  * </blockquote>
 241  *
 242  * This operation is often useful when constructing a document containing URIs
 243  * that must be made relative to the base URI of the document wherever
 244  * possible.  For example, relativizing the URI
 245  *
 246  * <blockquote>
 247  * {@code http://example.com/languages/java/sample/a/index.html#28}
 248  * </blockquote>
 249  *
 250  * against the base URI
 251  *
 252  * <blockquote>
 253  * {@code http://example.com/languages/java/}
 254  * </blockquote>
 255  *
 256  * yields the relative URI {@code sample/a/index.html#28}.
 257  *
 258  *
 259  * <h3> Character categories </h3>
 260  *
 261  * RFC&nbsp;2396 specifies precisely which characters are permitted in the
 262  * various components of a URI reference.  The following categories, most of
 263  * which are taken from that specification, are used below to describe these
 264  * constraints:
 265  *
 266  * <table class="striped" style="margin-left:2em">
 267  * <caption style="display:none">Describes categories alpha,digit,alphanum,unreserved,punct,reserved,escaped,and other</caption>
 268  *   <thead>
 269  *   <tr><th scope="col">Category</th><th scope="col">Description</th></tr>
 270  *   </thead>
 271  *   <tbody style="text-align:left">
 272  *   <tr><th scope="row" style="vertical-align:top">alpha</th>
 273  *       <td>The US-ASCII alphabetic characters,
 274  *        {@code 'A'}&nbsp;through&nbsp;{@code 'Z'}
 275  *        and {@code 'a'}&nbsp;through&nbsp;{@code 'z'}</td></tr>
 276  *   <tr><th scope="row" style="vertical-align:top">digit</th>
 277  *       <td>The US-ASCII decimal digit characters,
 278  *       {@code '0'}&nbsp;through&nbsp;{@code '9'}</td></tr>
 279  *   <tr><th scope="row" style="vertical-align:top">alphanum</th>
 280  *       <td>All <i>alpha</i> and <i>digit</i> characters</td></tr>
 281  *   <tr><th scope="row" style="vertical-align:top">unreserved</th>
 282  *       <td>All <i>alphanum</i> characters together with those in the string
 283  *        {@code "_-!.~'()*"}</td></tr>
 284  *   <tr><th scope="row" style="vertical-align:top">punct</th>
 285  *       <td>The characters in the string {@code ",;:$&+="}</td></tr>
 286  *   <tr><th scope="row" style="vertical-align:top">reserved</th>
 287  *       <td>All <i>punct</i> characters together with those in the string
 288  *        {@code "?/[]@"}</td></tr>
 289  *   <tr><th scope="row" style="vertical-align:top">escaped</th>
 290  *       <td>Escaped octets, that is, triplets consisting of the percent
 291  *           character ({@code '%'}) followed by two hexadecimal digits
 292  *           ({@code '0'}-{@code '9'}, {@code 'A'}-{@code 'F'}, and
 293  *           {@code 'a'}-{@code 'f'})</td></tr>
 294  *   <tr><th scope="row" style="vertical-align:top">other</th>
 295  *       <td>The Unicode characters that are not in the US-ASCII character set,
 296  *           are not control characters (according to the {@link
 297  *           java.lang.Character#isISOControl(char) Character.isISOControl}
 298  *           method), and are not space characters (according to the {@link
 299  *           java.lang.Character#isSpaceChar(char) Character.isSpaceChar}
 300  *           method)&nbsp;&nbsp;<i>(<b>Deviation from RFC 2396</b>, which is
 301  *           limited to US-ASCII)</i></td></tr>
 302  * </tbody>
 303  * </table>
 304  *
 305  * <p><a id="legal-chars"></a> The set of all legal URI characters consists of
 306  * the <i>unreserved</i>, <i>reserved</i>, <i>escaped</i>, and <i>other</i>
 307  * characters.
 308  *
 309  *
 310  * <h3> Escaped octets, quotation, encoding, and decoding </h3>
 311  *
 312  * RFC 2396 allows escaped octets to appear in the user-info, path, query, and
 313  * fragment components.  Escaping serves two purposes in URIs:
 314  *
 315  * <ul>
 316  *
 317  *   <li><p> To <i>encode</i> non-US-ASCII characters when a URI is required to
 318  *   conform strictly to RFC&nbsp;2396 by not containing any <i>other</i>
 319  *   characters.  </p></li>
 320  *
 321  *   <li><p> To <i>quote</i> characters that are otherwise illegal in a
 322  *   component.  The user-info, path, query, and fragment components differ
 323  *   slightly in terms of which characters are considered legal and illegal.
 324  *   </p></li>
 325  *
 326  * </ul>
 327  *
 328  * These purposes are served in this class by three related operations:
 329  *
 330  * <ul>
 331  *
 332  *   <li><p><a id="encode"></a> A character is <i>encoded</i> by replacing it
 333  *   with the sequence of escaped octets that represent that character in the
 334  *   UTF-8 character set.  The Euro currency symbol ({@code '\u005Cu20AC'}),
 335  *   for example, is encoded as {@code "%E2%82%AC"}.  <i>(<b>Deviation from
 336  *   RFC&nbsp;2396</b>, which does not specify any particular character
 337  *   set.)</i> </p></li>
 338  *
 339  *   <li><p><a id="quote"></a> An illegal character is <i>quoted</i> simply by
 340  *   encoding it.  The space character, for example, is quoted by replacing it
 341  *   with {@code "%20"}.  UTF-8 contains US-ASCII, hence for US-ASCII
 342  *   characters this transformation has exactly the effect required by
 343  *   RFC&nbsp;2396. </p></li>
 344  *
 345  *   <li><p><a id="decode"></a>
 346  *   A sequence of escaped octets is <i>decoded</i> by
 347  *   replacing it with the sequence of characters that it represents in the
 348  *   UTF-8 character set.  UTF-8 contains US-ASCII, hence decoding has the
 349  *   effect of de-quoting any quoted US-ASCII characters as well as that of
 350  *   decoding any encoded non-US-ASCII characters.  If a <a
 351  *   href="../nio/charset/CharsetDecoder.html#ce">decoding error</a> occurs
 352  *   when decoding the escaped octets then the erroneous octets are replaced by
 353  *   {@code '\u005CuFFFD'}, the Unicode replacement character.  </p></li>
 354  *
 355  * </ul>
 356  *
 357  * These operations are exposed in the constructors and methods of this class
 358  * as follows:
 359  *
 360  * <ul>
 361  *
 362  *   <li><p> The {@linkplain #URI(java.lang.String) single-argument
 363  *   constructor} requires any illegal characters in its argument to be
 364  *   quoted and preserves any escaped octets and <i>other</i> characters that
 365  *   are present.  </p></li>
 366  *
 367  *   <li><p> The {@linkplain
 368  *   #URI(java.lang.String,java.lang.String,java.lang.String,int,java.lang.String,java.lang.String,java.lang.String)
 369  *   multi-argument constructors} quote illegal characters as
 370  *   required by the components in which they appear.  The percent character
 371  *   ({@code '%'}) is always quoted by these constructors.  Any <i>other</i>
 372  *   characters are preserved.  </p></li>
 373  *
 374  *   <li><p> The {@link #getRawUserInfo() getRawUserInfo}, {@link #getRawPath()
 375  *   getRawPath}, {@link #getRawQuery() getRawQuery}, {@link #getRawFragment()
 376  *   getRawFragment}, {@link #getRawAuthority() getRawAuthority}, and {@link
 377  *   #getRawSchemeSpecificPart() getRawSchemeSpecificPart} methods return the
 378  *   values of their corresponding components in raw form, without interpreting
 379  *   any escaped octets.  The strings returned by these methods may contain
 380  *   both escaped octets and <i>other</i> characters, and will not contain any
 381  *   illegal characters.  </p></li>
 382  *
 383  *   <li><p> The {@link #getUserInfo() getUserInfo}, {@link #getPath()
 384  *   getPath}, {@link #getQuery() getQuery}, {@link #getFragment()
 385  *   getFragment}, {@link #getAuthority() getAuthority}, and {@link
 386  *   #getSchemeSpecificPart() getSchemeSpecificPart} methods decode any escaped
 387  *   octets in their corresponding components.  The strings returned by these
 388  *   methods may contain both <i>other</i> characters and illegal characters,
 389  *   and will not contain any escaped octets.  </p></li>
 390  *
 391  *   <li><p> The {@link #toString() toString} method returns a URI string with
 392  *   all necessary quotation but which may contain <i>other</i> characters.
 393  *   </p></li>
 394  *
 395  *   <li><p> The {@link #toASCIIString() toASCIIString} method returns a fully
 396  *   quoted and encoded URI string that does not contain any <i>other</i>
 397  *   characters.  </p></li>
 398  *
 399  * </ul>
 400  *
 401  *
 402  * <h3> Identities </h3>
 403  *
 404  * For any URI <i>u</i>, it is always the case that
 405  *
 406  * <blockquote>
 407  * {@code new URI(}<i>u</i>{@code .toString()).equals(}<i>u</i>{@code )}&nbsp;.
 408  * </blockquote>
 409  *
 410  * For any URI <i>u</i> that does not contain redundant syntax such as two
 411  * slashes before an empty authority (as in {@code file:///tmp/}&nbsp;) or a
 412  * colon following a host name but no port (as in
 413  * {@code http://www.example.com:}&nbsp;), and that does not encode characters
 414  * except those that must be quoted, the following identities also hold:
 415  * <pre>
 416  *     new URI(<i>u</i>.getScheme(),
 417  *             <i>u</i>.getSchemeSpecificPart(),
 418  *             <i>u</i>.getFragment())
 419  *     .equals(<i>u</i>)</pre>
 420  * in all cases,
 421  * <pre>
 422  *     new URI(<i>u</i>.getScheme(),
 423  *             <i>u</i>.getAuthority(),
 424  *             <i>u</i>.getPath(), <i>u</i>.getQuery(),
 425  *             <i>u</i>.getFragment())
 426  *     .equals(<i>u</i>)</pre>
 427  * if <i>u</i> is hierarchical, and
 428  * <pre>
 429  *     new URI(<i>u</i>.getScheme(),
 430  *             <i>u</i>.getUserInfo(), <i>u</i>.getHost(), <i>u</i>.getPort(),
 431  *             <i>u</i>.getPath(), <i>u</i>.getQuery(),
 432  *             <i>u</i>.getFragment())
 433  *     .equals(<i>u</i>)</pre>
 434  * if <i>u</i> is hierarchical and has either no authority or a server-based
 435  * authority.
 436  *
 437  *
 438  * <h3> URIs, URLs, and URNs </h3>
 439  *
 440  * A URI is a uniform resource <i>identifier</i> while a URL is a uniform
 441  * resource <i>locator</i>.  Hence every URL is a URI, abstractly speaking, but
 442  * not every URI is a URL.  This is because there is another subcategory of
 443  * URIs, uniform resource <i>names</i> (URNs), which name resources but do not
 444  * specify how to locate them.  The {@code mailto}, {@code news}, and
 445  * {@code isbn} URIs shown above are examples of URNs.
 446  *
 447  * <p> The conceptual distinction between URIs and URLs is reflected in the
 448  * differences between this class and the {@link URL} class.
 449  *
 450  * <p> An instance of this class represents a URI reference in the syntactic
 451  * sense defined by RFC&nbsp;2396.  A URI may be either absolute or relative.
 452  * A URI string is parsed according to the generic syntax without regard to the
 453  * scheme, if any, that it specifies.  No lookup of the host, if any, is
 454  * performed, and no scheme-dependent stream handler is constructed.  Equality,
 455  * hashing, and comparison are defined strictly in terms of the character
 456  * content of the instance.  In other words, a URI instance is little more than
 457  * a structured string that supports the syntactic, scheme-independent
 458  * operations of comparison, normalization, resolution, and relativization.
 459  *
 460  * <p> An instance of the {@link URL} class, by contrast, represents the
 461  * syntactic components of a URL together with some of the information required
 462  * to access the resource that it describes.  A URL must be absolute, that is,
 463  * it must always specify a scheme.  A URL string is parsed according to its
 464  * scheme.  A stream handler is always established for a URL, and in fact it is
 465  * impossible to create a URL instance for a scheme for which no handler is
 466  * available.  Equality and hashing depend upon both the scheme and the
 467  * Internet address of the host, if any; comparison is not defined.  In other
 468  * words, a URL is a structured string that supports the syntactic operation of
 469  * resolution as well as the network I/O operations of looking up the host and
 470  * opening a connection to the specified resource.
 471  *
 472  * @apiNote
 473  *
 474  * Applications working with file paths and file URIs should take great
 475  * care to use the appropriate methods to convert between the two.
 476  * The {@link Path#of(URI)} factory method and the {@link File#File(URI)}
 477  * constructor can be used to create {@link Path} or {@link File}
 478  * objects from a file URI. {@link Path#toUri()} and {@link File#toURI()}
 479  * can be used to create a {@link URI} from a file path.
 480  * Applications should never try to {@linkplain
 481  * #URI(String, String, String, int, String, String, String)
 482  * construct}, {@linkplain #URI(String) parse}, or
 483  * {@linkplain #resolve(String) resolve} a {@code URI}
 484  * from the direct string representation of a {@code File} or {@code Path}
 485  * instance.
 486  * <p>
 487  * Some components of a URL or URI, such as <i>userinfo</i>, may
 488  * be abused to construct misleading URLs or URIs. Applications
 489  * that deal with URLs or URIs should take into account
 490  * the recommendations advised in <a
 491  * href="https://tools.ietf.org/html/rfc3986#section-7">RFC3986,
 492  * Section 7, Security Considerations</a>.
 493  *
 494  * @author Mark Reinhold
 495  * @since 1.4
 496  *
 497  * @spec https://www.rfc-editor.org/info/rfc2279
 498  *      RFC 2279: UTF-8, a transformation format of ISO 10646
 499  * @spec https://www.rfc-editor.org/info/rfc2373
 500  *      RFC 2373: IP Version 6 Addressing Architecture
 501  * @spec https://www.rfc-editor.org/info/rfc2396
 502  *      RFC 2396: Uniform Resource Identifiers (URI): Generic Syntax
 503  * @spec https://www.rfc-editor.org/info/rfc2732
 504  *      RFC 2732: Format for Literal IPv6 Addresses in URL's
 505  * @spec https://www.rfc-editor.org/info/rfc3986
 506  *      RFC 3986: Uniform Resource Identifier (URI): Generic Syntax
 507  *
 508  * @see <a href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC&nbsp;2279: UTF-8, a
 509  * transformation format of ISO 10646</i></a>
 510  * @see <a href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC&nbsp;2373: IPv6 Addressing
 511  * Architecture</i></a>
 512  * @see <a href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC&nbsp;2396: Uniform
 513  * Resource Identifiers (URI): Generic Syntax</i></a>
 514  * @see <a href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC&nbsp;2732: Format for
 515  * Literal IPv6 Addresses in URLs</i></a>
 516  * @see <a href="URISyntaxException.html">URISyntaxException</a>
 517  */
 518 
 519 public final class URI
 520     implements Comparable<URI>, Serializable
 521 {
 522 
 523     // Note: Comments containing the word "ASSERT" indicate places where a
 524     // throw of an InternalError should be replaced by an appropriate assertion
 525     // statement once asserts are enabled in the build.
 526     @java.io.Serial
 527     static final long serialVersionUID = -6052424284110960213L;
 528 
 529 
 530     // -- Properties and components of this instance --
 531 
 532     // Components of all URIs: [<scheme>:]<scheme-specific-part>[#<fragment>]
 533     private transient String scheme;            // null ==> relative URI
 534     private transient String fragment;
 535 
 536     // Hierarchical URI components: [//<authority>]<path>[?<query>]
 537     private transient String authority;         // Registry or server
 538 
 539     // Server-based authority: [<userInfo>@]<host>[:<port>]
 540     private transient String userInfo;
 541     private transient String host;              // null ==> registry-based
 542     private transient int port = -1;            // -1 ==> undefined
 543 
 544     // Remaining components of hierarchical URIs
 545     private transient String path;              // null ==> opaque
 546     private transient String query;
 547 
 548     // The remaining fields may be computed on demand, which is safe even in
 549     // the face of multiple threads racing to initialize them
 550     private transient String schemeSpecificPart;
 551     private transient int hash;        // Zero ==> undefined
 552 
 553     private transient String decodedUserInfo;
 554     private transient String decodedAuthority;
 555     private transient String decodedPath;
 556     private transient String decodedQuery;
 557     private transient String decodedFragment;
 558     private transient String decodedSchemeSpecificPart;
 559 
 560     /**
 561      * The string form of this URI.
 562      *
 563      * @serial
 564      */
 565     private volatile String string;             // The only serializable field
 566 
 567 
 568 
 569     // -- Constructors and factories --
 570 
 571     private URI() { }                           // Used internally
 572 
 573     /**
 574      * Constructs a URI by parsing the given string.
 575      *
 576      * <p> This constructor parses the given string exactly as specified by the
 577      * grammar in <a
 578      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
 579      * Appendix&nbsp;A, <b><i>except for the following deviations:</i></b> </p>
 580      *
 581      * <ul>
 582      *
 583      *   <li><p> An empty authority component is permitted as long as it is
 584      *   followed by a non-empty path, a query component, or a fragment
 585      *   component.  This allows the parsing of URIs such as
 586      *   {@code "file:///foo/bar"}, which seems to be the intent of
 587      *   RFC&nbsp;2396 although the grammar does not permit it.  If the
 588      *   authority component is empty then the user-information, host, and port
 589      *   components are undefined. </p></li>
 590      *
 591      *   <li><p> Empty relative paths are permitted; this seems to be the
 592      *   intent of RFC&nbsp;2396 although the grammar does not permit it.  The
 593      *   primary consequence of this deviation is that a standalone fragment
 594      *   such as {@code "#foo"} parses as a relative URI with an empty path
 595      *   and the given fragment, and can be usefully <a
 596      *   href="#resolve-frag">resolved</a> against a base URI.
 597      *
 598      *   <li><p> IPv4 addresses in host components are parsed rigorously, as
 599      *   specified by <a
 600      *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>: Each
 601      *   element of a dotted-quad address must contain no more than three
 602      *   decimal digits.  Each element is further constrained to have a value
 603      *   no greater than 255. </p></li>
 604      *
 605      *   <li> <p> Hostnames in host components that comprise only a single
 606      *   domain label are permitted to start with an <i>alphanum</i>
 607      *   character. This seems to be the intent of <a
 608      *   href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>
 609      *   section&nbsp;3.2.2 although the grammar does not permit it. The
 610      *   consequence of this deviation is that the authority component of a
 611      *   hierarchical URI such as {@code s://123}, will parse as a server-based
 612      *   authority. </p></li>
 613      *
 614      *   <li><p> IPv6 addresses are permitted for the host component.  An IPv6
 615      *   address must be enclosed in square brackets ({@code '['} and
 616      *   {@code ']'}) as specified by <a
 617      *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>.  The
 618      *   IPv6 address itself must parse according to <a
 619      *   href="http://www.ietf.org/rfc/rfc2373.txt">RFC&nbsp;2373</a>.  IPv6
 620      *   addresses are further constrained to describe no more than sixteen
 621      *   bytes of address information, a constraint implicit in RFC&nbsp;2373
 622      *   but not expressible in the grammar. </p></li>
 623      *
 624      *   <li><p> Characters in the <i>other</i> category are permitted wherever
 625      *   RFC&nbsp;2396 permits <i>escaped</i> octets, that is, in the
 626      *   user-information, path, query, and fragment components, as well as in
 627      *   the authority component if the authority is registry-based.  This
 628      *   allows URIs to contain Unicode characters beyond those in the US-ASCII
 629      *   character set. </p></li>
 630      *
 631      * </ul>
 632      *
 633      * @param  str   The string to be parsed into a URI
 634      *
 635      * @throws  NullPointerException
 636      *          If {@code str} is {@code null}
 637      *
 638      * @throws  URISyntaxException
 639      *          If the given string violates RFC&nbsp;2396, as augmented
 640      *          by the above deviations
 641      * @spec https://www.rfc-editor.org/info/rfc2373
 642      *      RFC 2373: IP Version 6 Addressing Architecture
 643      * @spec https://www.rfc-editor.org/info/rfc2396
 644      *      RFC 2396: Uniform Resource Identifiers (URI): Generic Syntax
 645      * @spec https://www.rfc-editor.org/info/rfc2732
 646      *      RFC 2732: Format for Literal IPv6 Addresses in URL's
 647      */
 648     public URI(String str) throws URISyntaxException {
 649         new Parser(str).parse(false);
 650     }
 651 
 652     /**
 653      * Constructs a hierarchical URI from the given components.
 654      *
 655      * <p> If a scheme is given then the path, if also given, must either be
 656      * empty or begin with a slash character ({@code '/'}).  Otherwise a
 657      * component of the new URI may be left undefined by passing {@code null}
 658      * for the corresponding parameter or, in the case of the {@code port}
 659      * parameter, by passing {@code -1}.
 660      *
 661      * <p> This constructor first builds a URI string from the given components
 662      * according to the rules specified in <a
 663      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
 664      * section&nbsp;5.2, step&nbsp;7: </p>
 665      *
 666      * <ol>
 667      *
 668      *   <li><p> Initially, the result string is empty. </p></li>
 669      *
 670      *   <li><p> If a scheme is given then it is appended to the result,
 671      *   followed by a colon character ({@code ':'}).  </p></li>
 672      *
 673      *   <li><p> If user information, a host, or a port are given then the
 674      *   string {@code "//"} is appended.  </p></li>
 675      *
 676      *   <li><p> If user information is given then it is appended, followed by
 677      *   a commercial-at character ({@code '@'}).  Any character not in the
 678      *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
 679      *   categories is <a href="#quote">quoted</a>.  </p></li>
 680      *
 681      *   <li><p> If a host is given then it is appended.  If the host is a
 682      *   literal IPv6 address but is not enclosed in square brackets
 683      *   ({@code '['} and {@code ']'}) then the square brackets are added.
 684      *   </p></li>
 685      *
 686      *   <li><p> If a port number is given then a colon character
 687      *   ({@code ':'}) is appended, followed by the port number in decimal.
 688      *   </p></li>
 689      *
 690      *   <li><p> If a path is given then it is appended.  Any character not in
 691      *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
 692      *   categories, and not equal to the slash character ({@code '/'}) or the
 693      *   commercial-at character ({@code '@'}), is quoted.  </p></li>
 694      *
 695      *   <li><p> If a query is given then a question-mark character
 696      *   ({@code '?'}) is appended, followed by the query.  Any character that
 697      *   is not a <a href="#legal-chars">legal URI character</a> is quoted.
 698      *   </p></li>
 699      *
 700      *   <li><p> Finally, if a fragment is given then a hash character
 701      *   ({@code '#'}) is appended, followed by the fragment.  Any character
 702      *   that is not a legal URI character is quoted.  </p></li>
 703      *
 704      * </ol>
 705      *
 706      * <p> The resulting URI string is then parsed as if by invoking the {@link
 707      * #URI(String)} constructor and then invoking the {@link
 708      * #parseServerAuthority()} method upon the result; this may cause a {@link
 709      * URISyntaxException} to be thrown.  </p>
 710      *
 711      * @param   scheme    Scheme name
 712      * @param   userInfo  User name and authorization information
 713      * @param   host      Host name
 714      * @param   port      Port number
 715      * @param   path      Path
 716      * @param   query     Query
 717      * @param   fragment  Fragment
 718      *
 719      * @throws URISyntaxException
 720      *         If both a scheme and a path are given but the path is relative,
 721      *         if the URI string constructed from the given components violates
 722      *         RFC&nbsp;2396, or if the authority component of the string is
 723      *         present but cannot be parsed as a server-based authority
 724      * @spec https://www.rfc-editor.org/info/rfc2396
 725      *      RFC 2396: Uniform Resource Identifiers (URI): Generic Syntax
 726      */
 727     public URI(String scheme,
 728                String userInfo, String host, int port,
 729                String path, String query, String fragment)
 730         throws URISyntaxException
 731     {
 732         String s = toString(scheme, null,
 733                             null, userInfo, host, port,
 734                             path, query, fragment);
 735         checkPath(s, scheme, path);
 736         new Parser(s).parse(true);
 737     }
 738 
 739     /**
 740      * Constructs a hierarchical URI from the given components.
 741      *
 742      * <p> If a scheme is given then the path, if also given, must either be
 743      * empty or begin with a slash character ({@code '/'}).  Otherwise a
 744      * component of the new URI may be left undefined by passing {@code null}
 745      * for the corresponding parameter.
 746      *
 747      * <p> This constructor first builds a URI string from the given components
 748      * according to the rules specified in <a
 749      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
 750      * section&nbsp;5.2, step&nbsp;7: </p>
 751      *
 752      * <ol>
 753      *
 754      *   <li><p> Initially, the result string is empty.  </p></li>
 755      *
 756      *   <li><p> If a scheme is given then it is appended to the result,
 757      *   followed by a colon character ({@code ':'}).  </p></li>
 758      *
 759      *   <li><p> If an authority is given then the string {@code "//"} is
 760      *   appended, followed by the authority.  If the authority contains a
 761      *   literal IPv6 address then the address must be enclosed in square
 762      *   brackets ({@code '['} and {@code ']'}).  Any character not in the
 763      *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
 764      *   categories, and not equal to the commercial-at character
 765      *   ({@code '@'}), is <a href="#quote">quoted</a>.  </p></li>
 766      *
 767      *   <li><p> If a path is given then it is appended.  Any character not in
 768      *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
 769      *   categories, and not equal to the slash character ({@code '/'}) or the
 770      *   commercial-at character ({@code '@'}), is quoted.  </p></li>
 771      *
 772      *   <li><p> If a query is given then a question-mark character
 773      *   ({@code '?'}) is appended, followed by the query.  Any character that
 774      *   is not a <a href="#legal-chars">legal URI character</a> is quoted.
 775      *   </p></li>
 776      *
 777      *   <li><p> Finally, if a fragment is given then a hash character
 778      *   ({@code '#'}) is appended, followed by the fragment.  Any character
 779      *   that is not a legal URI character is quoted.  </p></li>
 780      *
 781      * </ol>
 782      *
 783      * <p> The resulting URI string is then parsed as if by invoking the {@link
 784      * #URI(String)} constructor and then invoking the {@link
 785      * #parseServerAuthority()} method upon the result; this may cause a {@link
 786      * URISyntaxException} to be thrown.  </p>
 787      *
 788      * @param   scheme     Scheme name
 789      * @param   authority  Authority
 790      * @param   path       Path
 791      * @param   query      Query
 792      * @param   fragment   Fragment
 793      *
 794      * @throws URISyntaxException
 795      *         If both a scheme and a path are given but the path is relative,
 796      *         if the URI string constructed from the given components violates
 797      *         RFC&nbsp;2396, or if the authority component of the string is
 798      *         present but cannot be parsed as a server-based authority
 799      * @spec https://www.rfc-editor.org/info/rfc2396
 800      *      RFC 2396: Uniform Resource Identifiers (URI): Generic Syntax
 801      */
 802     public URI(String scheme,
 803                String authority,
 804                String path, String query, String fragment)
 805         throws URISyntaxException
 806     {
 807         String s = toString(scheme, null,
 808                             authority, null, null, -1,
 809                             path, query, fragment);
 810         checkPath(s, scheme, path);
 811         new Parser(s).parse(false);
 812     }
 813 
 814     /**
 815      * Constructs a hierarchical URI from the given components.
 816      *
 817      * <p> A component may be left undefined by passing {@code null}.
 818      *
 819      * <p> This convenience constructor works as if by invoking the
 820      * seven-argument constructor as follows:
 821      *
 822      * <blockquote>
 823      * {@code new} {@link #URI(String, String, String, int, String, String, String)
 824      * URI}{@code (scheme, null, host, -1, path, null, fragment);}
 825      * </blockquote>
 826      *
 827      * @param   scheme    Scheme name
 828      * @param   host      Host name
 829      * @param   path      Path
 830      * @param   fragment  Fragment
 831      *
 832      * @throws  URISyntaxException
 833      *          If the URI string constructed from the given components
 834      *          violates RFC&nbsp;2396
 835      */
 836     public URI(String scheme, String host, String path, String fragment)
 837         throws URISyntaxException
 838     {
 839         this(scheme, null, host, -1, path, null, fragment);
 840     }
 841 
 842     /**
 843      * Constructs a URI from the given components.
 844      *
 845      * <p> A component may be left undefined by passing {@code null}.
 846      *
 847      * <p> This constructor first builds a URI in string form using the given
 848      * components as follows:  </p>
 849      *
 850      * <ol>
 851      *
 852      *   <li><p> Initially, the result string is empty.  </p></li>
 853      *
 854      *   <li><p> If a scheme is given then it is appended to the result,
 855      *   followed by a colon character ({@code ':'}).  </p></li>
 856      *
 857      *   <li><p> If a scheme-specific part is given then it is appended.  Any
 858      *   character that is not a <a href="#legal-chars">legal URI character</a>
 859      *   is <a href="#quote">quoted</a>.  </p></li>
 860      *
 861      *   <li><p> Finally, if a fragment is given then a hash character
 862      *   ({@code '#'}) is appended to the string, followed by the fragment.
 863      *   Any character that is not a legal URI character is quoted.  </p></li>
 864      *
 865      * </ol>
 866      *
 867      * <p> The resulting URI string is then parsed in order to create the new
 868      * URI instance as if by invoking the {@link #URI(String)} constructor;
 869      * this may cause a {@link URISyntaxException} to be thrown.  </p>
 870      *
 871      * @param   scheme    Scheme name
 872      * @param   ssp       Scheme-specific part
 873      * @param   fragment  Fragment
 874      *
 875      * @throws  URISyntaxException
 876      *          If the URI string constructed from the given components
 877      *          violates RFC&nbsp;2396
 878      */
 879     public URI(String scheme, String ssp, String fragment)
 880         throws URISyntaxException
 881     {
 882         new Parser(toString(scheme, ssp,
 883                             null, null, null, -1,
 884                             null, null, fragment))
 885             .parse(false);
 886     }
 887 
 888     /**
 889      * Constructs a simple URI consisting of only a scheme and a pre-validated
 890      * path. Provides a fast-path for some internal cases.
 891      */
 892     URI(String scheme, String path) {
 893         assert validSchemeAndPath(scheme, path);
 894         this.scheme = scheme;
 895         this.path = path;
 896     }
 897 
 898     private static boolean validSchemeAndPath(String scheme, String path) {
 899         try {
 900             URI u = new URI(scheme + ':' + path);
 901             return scheme.equals(u.scheme) && path.equals(u.path);
 902         } catch (URISyntaxException e) {
 903             return false;
 904         }
 905     }
 906 
 907     /**
 908      * Creates a URI by parsing the given string.
 909      *
 910      * <p> This convenience factory method works as if by invoking the {@link
 911      * #URI(String)} constructor; any {@link URISyntaxException} thrown by the
 912      * constructor is caught and wrapped in a new {@link
 913      * IllegalArgumentException} object, which is then thrown.
 914      *
 915      * <p> This method is provided for use in situations where it is known that
 916      * the given string is a legal URI, for example for URI constants declared
 917      * within a program, and so it would be considered a programming error
 918      * for the string not to parse as such.  The constructors, which throw
 919      * {@link URISyntaxException} directly, should be used in situations where a
 920      * URI is being constructed from user input or from some other source that
 921      * may be prone to errors.  </p>
 922      *
 923      * @param  str   The string to be parsed into a URI
 924      * @return The new URI
 925      *
 926      * @throws  NullPointerException
 927      *          If {@code str} is {@code null}
 928      *
 929      * @throws  IllegalArgumentException
 930      *          If the given string violates RFC&nbsp;2396
 931      */
 932     public static URI create(String str) {
 933         try {
 934             return new URI(str);
 935         } catch (URISyntaxException x) {
 936             throw new IllegalArgumentException(x.getMessage(), x);
 937         }
 938     }
 939 
 940 
 941     // -- Operations --
 942 
 943     /**
 944      * Attempts to parse this URI's authority component, if defined, into
 945      * user-information, host, and port components.
 946      *
 947      * <p> If this URI's authority component has already been recognized as
 948      * being server-based then it will already have been parsed into
 949      * user-information, host, and port components.  In this case, or if this
 950      * URI has no authority component, this method simply returns this URI.
 951      *
 952      * <p> Otherwise this method attempts once more to parse the authority
 953      * component into user-information, host, and port components, and throws
 954      * an exception describing why the authority component could not be parsed
 955      * in that way.
 956      *
 957      * <p> This method is provided because the generic URI syntax specified in
 958      * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>
 959      * cannot always distinguish a malformed server-based authority from a
 960      * legitimate registry-based authority.  It must therefore treat some
 961      * instances of the former as instances of the latter.  The authority
 962      * component in the URI string {@code "//foo:bar"}, for example, is not a
 963      * legal server-based authority but it is legal as a registry-based
 964      * authority.
 965      *
 966      * <p> In many common situations, for example when working URIs that are
 967      * known to be either URNs or URLs, the hierarchical URIs being used will
 968      * always be server-based.  They therefore must either be parsed as such or
 969      * treated as an error.  In these cases a statement such as
 970      *
 971      * <blockquote>
 972      * {@code URI }<i>u</i>{@code  = new URI(str).parseServerAuthority();}
 973      * </blockquote>
 974      *
 975      * <p> can be used to ensure that <i>u</i> always refers to a URI that, if
 976      * it has an authority component, has a server-based authority with proper
 977      * user-information, host, and port components.  Invoking this method also
 978      * ensures that if the authority could not be parsed in that way then an
 979      * appropriate diagnostic message can be issued based upon the exception
 980      * that is thrown. </p>
 981      *
 982      * @return  A URI whose authority field has been parsed
 983      *          as a server-based authority
 984      *
 985      * @throws  URISyntaxException
 986      *          If the authority component of this URI is defined
 987      *          but cannot be parsed as a server-based authority
 988      *          according to RFC&nbsp;2396
 989      *
 990      * @spec https://www.rfc-editor.org/info/rfc2396
 991      *      RFC 2396: Uniform Resource Identifiers (URI): Generic Syntax
 992      */
 993     public URI parseServerAuthority()
 994         throws URISyntaxException
 995     {
 996         // We could be clever and cache the error message and index from the
 997         // exception thrown during the original parse, but that would require
 998         // either more fields or a more-obscure representation.
 999         if ((host != null) || (authority == null))
1000             return this;
1001         new Parser(toString()).parse(true);
1002         return this;
1003     }
1004 
1005     /**
1006      * Normalizes this URI's path.
1007      *
1008      * <p> If this URI is opaque, or if its path is already in normal form,
1009      * then this URI is returned.  Otherwise a new URI is constructed that is
1010      * identical to this URI except that its path is computed by normalizing
1011      * this URI's path in a manner consistent with <a
1012      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
1013      * section&nbsp;5.2, step&nbsp;6, sub-steps&nbsp;c through&nbsp;f; that is:
1014      * </p>
1015      *
1016      * <ol>
1017      *
1018      *   <li><p> All {@code "."} segments are removed. </p></li>
1019      *
1020      *   <li><p> If a {@code ".."} segment is preceded by a non-{@code ".."}
1021      *   segment then both of these segments are removed.  This step is
1022      *   repeated until it is no longer applicable. </p></li>
1023      *
1024      *   <li><p> If the path is relative, and if its first segment contains a
1025      *   colon character ({@code ':'}), then a {@code "."} segment is
1026      *   prepended.  This prevents a relative URI with a path such as
1027      *   {@code "a:b/c/d"} from later being re-parsed as an opaque URI with a
1028      *   scheme of {@code "a"} and a scheme-specific part of {@code "b/c/d"}.
1029      *   <b><i>(Deviation from RFC&nbsp;2396)</i></b> </p></li>
1030      *
1031      * </ol>
1032      *
1033      * <p> A normalized path will begin with one or more {@code ".."} segments
1034      * if there were insufficient non-{@code ".."} segments preceding them to
1035      * allow their removal.  A normalized path will begin with a {@code "."}
1036      * segment if one was inserted by step 3 above.  Otherwise, a normalized
1037      * path will not contain any {@code "."} or {@code ".."} segments. </p>
1038      *
1039      * @return  A URI equivalent to this URI,
1040      *          but whose path is in normal form
1041      * @spec https://www.rfc-editor.org/info/rfc2396
1042      *      RFC 2396: Uniform Resource Identifiers (URI): Generic Syntax
1043      */
1044     public URI normalize() {
1045         return normalize(this);
1046     }
1047 
1048     /**
1049      * Resolves the given URI against this URI.
1050      *
1051      * <p> If the given URI is already absolute, or if this URI is opaque, then
1052      * the given URI is returned.
1053      *
1054      * <p><a id="resolve-frag"></a> If the given URI's fragment component is
1055      * defined, its path component is empty, and its scheme, authority, and
1056      * query components are undefined, then a URI with the given fragment but
1057      * with all other components equal to those of this URI is returned.  This
1058      * allows a URI representing a standalone fragment reference, such as
1059      * {@code "#foo"}, to be usefully resolved against a base URI.
1060      *
1061      * <p> Otherwise this method constructs a new hierarchical URI in a manner
1062      * consistent with <a
1063      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
1064      * section&nbsp;5.2; that is: </p>
1065      *
1066      * <ol>
1067      *
1068      *   <li><p> A new URI is constructed with this URI's scheme and the given
1069      *   URI's query and fragment components. </p></li>
1070      *
1071      *   <li><p> If the given URI has an authority component then the new URI's
1072      *   authority and path are taken from the given URI. </p></li>
1073      *
1074      *   <li><p> Otherwise the new URI's authority component is copied from
1075      *   this URI, and its path is computed as follows: </p>
1076      *
1077      *   <ol>
1078      *
1079      *     <li><p> If the given URI's path is absolute then the new URI's path
1080      *     is taken from the given URI. </p></li>
1081      *
1082      *     <li><p> Otherwise the given URI's path is relative, and so the new
1083      *     URI's path is computed by resolving the path of the given URI
1084      *     against the path of this URI.  This is done by concatenating all but
1085      *     the last segment of this URI's path, if any, with the given URI's
1086      *     path and then normalizing the result as if by invoking the {@link
1087      *     #normalize() normalize} method. </p></li>
1088      *
1089      *   </ol></li>
1090      *
1091      * </ol>
1092      *
1093      * <p> The result of this method is absolute if, and only if, either this
1094      * URI is absolute or the given URI is absolute.  </p>
1095      *
1096      * @param  uri  The URI to be resolved against this URI
1097      * @return The resulting URI
1098      *
1099      * @throws  NullPointerException
1100      *          If {@code uri} is {@code null}
1101      * @spec https://www.rfc-editor.org/info/rfc2396
1102      *      RFC 2396: Uniform Resource Identifiers (URI): Generic Syntax
1103      */
1104     public URI resolve(URI uri) {
1105         return resolve(this, uri);
1106     }
1107 
1108     /**
1109      * Constructs a new URI by parsing the given string and then resolving it
1110      * against this URI.
1111      *
1112      * <p> This convenience method works as if invoking it were equivalent to
1113      * evaluating the expression {@link #resolve(java.net.URI)
1114      * resolve}{@code (URI.}{@link #create(String) create}{@code (str))}. </p>
1115      *
1116      * @param  str   The string to be parsed into a URI
1117      * @return The resulting URI
1118      *
1119      * @throws  NullPointerException
1120      *          If {@code str} is {@code null}
1121      *
1122      * @throws  IllegalArgumentException
1123      *          If the given string violates RFC&nbsp;2396
1124      */
1125     public URI resolve(String str) {
1126         return resolve(URI.create(str));
1127     }
1128 
1129     /**
1130      * Relativizes the given URI against this URI.
1131      *
1132      * <p> The relativization of the given URI against this URI is computed as
1133      * follows: </p>
1134      *
1135      * <ol>
1136      *
1137      *   <li><p> If either this URI or the given URI are opaque, or if the
1138      *   scheme and authority components of the two URIs are not identical, or
1139      *   if the path of this URI is not a prefix of the path of the given URI,
1140      *   then the given URI is returned. </p></li>
1141      *
1142      *   <li><p> Otherwise a new relative hierarchical URI is constructed with
1143      *   query and fragment components taken from the given URI and with a path
1144      *   component computed by removing this URI's path from the beginning of
1145      *   the given URI's path. </p></li>
1146      *
1147      * </ol>
1148      *
1149      * @param  uri  The URI to be relativized against this URI
1150      * @return The resulting URI
1151      *
1152      * @throws  NullPointerException
1153      *          If {@code uri} is {@code null}
1154      */
1155     public URI relativize(URI uri) {
1156         return relativize(this, uri);
1157     }
1158 
1159     /**
1160      * Constructs a URL from this URI.
1161      *
1162      * <p> This convenience method works as if invoking it were equivalent to
1163      * evaluating the expression {@code new URL(this.toString())} after
1164      * first checking that this URI is absolute. </p>
1165      *
1166      * @return  A URL constructed from this URI
1167      *
1168      * @throws  IllegalArgumentException
1169      *          If this URL is not absolute
1170      *
1171      * @throws  MalformedURLException
1172      *          If a protocol handler for the URL could not be found,
1173      *          or if some other error occurred while constructing the URL
1174      */
1175     public URL toURL() throws MalformedURLException {
1176         return URL.of(this, null);
1177     }
1178 
1179     // -- Component access methods --
1180 
1181     /**
1182      * Returns the scheme component of this URI.
1183      *
1184      * <p> The scheme component of a URI, if defined, only contains characters
1185      * in the <i>alphanum</i> category and in the string {@code "-.+"}.  A
1186      * scheme always starts with an <i>alpha</i> character. <p>
1187      *
1188      * The scheme component of a URI cannot contain escaped octets, hence this
1189      * method does not perform any decoding.
1190      *
1191      * @return  The scheme component of this URI,
1192      *          or {@code null} if the scheme is undefined
1193      */
1194     public String getScheme() {
1195         return scheme;
1196     }
1197 
1198     /**
1199      * Tells whether or not this URI is absolute.
1200      *
1201      * <p> A URI is absolute if, and only if, it has a scheme component. </p>
1202      *
1203      * @return  {@code true} if, and only if, this URI is absolute
1204      */
1205     public boolean isAbsolute() {
1206         return scheme != null;
1207     }
1208 
1209     /**
1210      * Tells whether or not this URI is opaque.
1211      *
1212      * <p> A URI is opaque if, and only if, it is absolute and its
1213      * scheme-specific part does not begin with a slash character ('/').
1214      * An opaque URI has a scheme, a scheme-specific part, and possibly
1215      * a fragment; all other components are undefined. </p>
1216      *
1217      * @return  {@code true} if, and only if, this URI is opaque
1218      */
1219     public boolean isOpaque() {
1220         return path == null;
1221     }
1222 
1223     /**
1224      * Returns the raw scheme-specific part of this URI.  The scheme-specific
1225      * part is never undefined, though it may be empty.
1226      *
1227      * <p> The scheme-specific part of a URI only contains legal URI
1228      * characters. </p>
1229      *
1230      * @return  The raw scheme-specific part of this URI
1231      *          (never {@code null})
1232      */
1233     public String getRawSchemeSpecificPart() {
1234         String part = schemeSpecificPart;
1235         if (part != null) {
1236             return part;
1237         }
1238 
1239         String s = string;
1240         if (s != null) {
1241             // if string is defined, components will have been parsed
1242             int start = 0;
1243             int end = s.length();
1244             if (scheme != null) {
1245                 start = scheme.length() + 1;
1246             }
1247             if (fragment != null) {
1248                 end -= fragment.length() + 1;
1249             }
1250             if (path != null && path.length() == end - start) {
1251                 part = path;
1252             } else {
1253                 part = s.substring(start, end);
1254             }
1255         } else {
1256             StringBuilder sb = new StringBuilder();
1257             appendSchemeSpecificPart(sb, null, getAuthority(), getUserInfo(),
1258                                  host, port, getPath(), getQuery());
1259             part = sb.toString();
1260         }
1261         return schemeSpecificPart = part;
1262     }
1263 
1264     /**
1265      * Returns the decoded scheme-specific part of this URI.
1266      *
1267      * <p> The string returned by this method is equal to that returned by the
1268      * {@link #getRawSchemeSpecificPart() getRawSchemeSpecificPart} method
1269      * except that all sequences of escaped octets are <a
1270      * href="#decode">decoded</a>.  </p>
1271      *
1272      * @return  The decoded scheme-specific part of this URI
1273      *          (never {@code null})
1274      */
1275     public String getSchemeSpecificPart() {
1276         String part = decodedSchemeSpecificPart;
1277         if (part == null) {
1278             decodedSchemeSpecificPart = part = decode(getRawSchemeSpecificPart());
1279         }
1280         return part;
1281     }
1282 
1283     /**
1284      * Returns the raw authority component of this URI.
1285      *
1286      * <p> The authority component of a URI, if defined, only contains the
1287      * commercial-at character ({@code '@'}) and characters in the
1288      * <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and <i>other</i>
1289      * categories.  If the authority is server-based then it is further
1290      * constrained to have valid user-information, host, and port
1291      * components. </p>
1292      *
1293      * @return  The raw authority component of this URI,
1294      *          or {@code null} if the authority is undefined
1295      */
1296     public String getRawAuthority() {
1297         return authority;
1298     }
1299 
1300     /**
1301      * Returns the decoded authority component of this URI.
1302      *
1303      * <p> The string returned by this method is equal to that returned by the
1304      * {@link #getRawAuthority() getRawAuthority} method except that all
1305      * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
1306      *
1307      * @return  The decoded authority component of this URI,
1308      *          or {@code null} if the authority is undefined
1309      */
1310     public String getAuthority() {
1311         String auth = decodedAuthority;
1312         if ((auth == null) && (authority != null)) {
1313             decodedAuthority = auth = decode(authority);
1314         }
1315         return auth;
1316     }
1317 
1318     /**
1319      * Returns the raw user-information component of this URI.
1320      *
1321      * <p> The user-information component of a URI, if defined, only contains
1322      * characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and
1323      * <i>other</i> categories. </p>
1324      *
1325      * @return  The raw user-information component of this URI,
1326      *          or {@code null} if the user information is undefined
1327      */
1328     public String getRawUserInfo() {
1329         return userInfo;
1330     }
1331 
1332     /**
1333      * Returns the decoded user-information component of this URI.
1334      *
1335      * <p> The string returned by this method is equal to that returned by the
1336      * {@link #getRawUserInfo() getRawUserInfo} method except that all
1337      * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
1338      *
1339      * @return  The decoded user-information component of this URI,
1340      *          or {@code null} if the user information is undefined
1341      */
1342     public String getUserInfo() {
1343         String user = decodedUserInfo;
1344         if ((user == null) && (userInfo != null)) {
1345             decodedUserInfo = user = decode(userInfo);
1346         }
1347         return user;
1348     }
1349 
1350     /**
1351      * Returns the host component of this URI.
1352      *
1353      * <p> The host component of a URI, if defined, will have one of the
1354      * following forms: </p>
1355      *
1356      * <ul>
1357      *
1358      *   <li><p> A domain name consisting of one or more <i>labels</i>
1359      *   separated by period characters ({@code '.'}), optionally followed by
1360      *   a period character.  Each label consists of <i>alphanum</i> characters
1361      *   as well as hyphen characters ({@code '-'}), though hyphens never
1362      *   occur as the first or last characters in a label. The rightmost
1363      *   label of a domain name consisting of two or more labels, begins
1364      *   with an <i>alpha</i> character. </li>
1365      *
1366      *   <li><p> A dotted-quad IPv4 address of the form
1367      *   <i>digit</i>{@code +.}<i>digit</i>{@code +.}<i>digit</i>{@code +.}<i>digit</i>{@code +},
1368      *   where no <i>digit</i> sequence is longer than three characters and no
1369      *   sequence has a value larger than 255. </p></li>
1370      *
1371      *   <li><p> An IPv6 address enclosed in square brackets ({@code '['} and
1372      *   {@code ']'}) and consisting of hexadecimal digits, colon characters
1373      *   ({@code ':'}), and possibly an embedded IPv4 address.  The full
1374      *   syntax of IPv6 addresses is specified in <a
1375      *   href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC&nbsp;2373: IPv6
1376      *   Addressing Architecture</i></a>.  </p></li>
1377      *
1378      * </ul>
1379      *
1380      * The host component of a URI cannot contain escaped octets, hence this
1381      * method does not perform any decoding.
1382      *
1383      * @return  The host component of this URI,
1384      *          or {@code null} if the host is undefined
1385      * @spec https://www.rfc-editor.org/info/rfc2373
1386      *      RFC 2373: IP Version 6 Addressing Architecture
1387      */
1388     public String getHost() {
1389         return host;
1390     }
1391 
1392     /**
1393      * Returns the port number of this URI.
1394      *
1395      * <p> The port component of a URI, if defined, is a non-negative
1396      * integer. </p>
1397      *
1398      * @return  The port component of this URI,
1399      *          or {@code -1} if the port is undefined
1400      */
1401     public int getPort() {
1402         return port;
1403     }
1404 
1405     /**
1406      * Returns the raw path component of this URI.
1407      *
1408      * <p> The path component of a URI, if defined, only contains the slash
1409      * character ({@code '/'}), the commercial-at character ({@code '@'}),
1410      * and characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>,
1411      * and <i>other</i> categories. </p>
1412      *
1413      * @return  The path component of this URI,
1414      *          or {@code null} if the path is undefined
1415      */
1416     public String getRawPath() {
1417         return path;
1418     }
1419 
1420     /**
1421      * Returns the decoded path component of this URI.
1422      *
1423      * <p> The string returned by this method is equal to that returned by the
1424      * {@link #getRawPath() getRawPath} method except that all sequences of
1425      * escaped octets are <a href="#decode">decoded</a>.  </p>
1426      *
1427      * @return  The decoded path component of this URI,
1428      *          or {@code null} if the path is undefined
1429      */
1430     public String getPath() {
1431         String decoded = decodedPath;
1432         if ((decoded == null) && (path != null)) {
1433             decodedPath = decoded = decode(path);
1434         }
1435         return decoded;
1436     }
1437 
1438     /**
1439      * Returns the raw query component of this URI.
1440      *
1441      * <p> The query component of a URI, if defined, only contains legal URI
1442      * characters. </p>
1443      *
1444      * @return  The raw query component of this URI,
1445      *          or {@code null} if the query is undefined
1446      */
1447     public String getRawQuery() {
1448         return query;
1449     }
1450 
1451     /**
1452      * Returns the decoded query component of this URI.
1453      *
1454      * <p> The string returned by this method is equal to that returned by the
1455      * {@link #getRawQuery() getRawQuery} method except that all sequences of
1456      * escaped octets are <a href="#decode">decoded</a>.  </p>
1457      *
1458      * @return  The decoded query component of this URI,
1459      *          or {@code null} if the query is undefined
1460      */
1461     public String getQuery() {
1462         String decoded = decodedQuery;
1463         if ((decoded == null) && (query != null)) {
1464             decodedQuery = decoded = decode(query, false);
1465         }
1466         return decoded;
1467     }
1468 
1469     /**
1470      * Returns the raw fragment component of this URI.
1471      *
1472      * <p> The fragment component of a URI, if defined, only contains legal URI
1473      * characters. </p>
1474      *
1475      * @return  The raw fragment component of this URI,
1476      *          or {@code null} if the fragment is undefined
1477      */
1478     public String getRawFragment() {
1479         return fragment;
1480     }
1481 
1482     /**
1483      * Returns the decoded fragment component of this URI.
1484      *
1485      * <p> The string returned by this method is equal to that returned by the
1486      * {@link #getRawFragment() getRawFragment} method except that all
1487      * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
1488      *
1489      * @return  The decoded fragment component of this URI,
1490      *          or {@code null} if the fragment is undefined
1491      */
1492     public String getFragment() {
1493         String decoded = decodedFragment;
1494         if ((decoded == null) && (fragment != null)) {
1495             decodedFragment = decoded = decode(fragment, false);
1496         }
1497         return decoded;
1498     }
1499 
1500 
1501     // -- Equality, comparison, hash code, toString, and serialization --
1502 
1503     /**
1504      * Tests this URI for equality with another object.
1505      *
1506      * <p> If the given object is not a URI then this method immediately
1507      * returns {@code false}.
1508      *
1509      * <p> For two URIs to be considered equal requires that either both are
1510      * opaque or both are hierarchical.  Their schemes must either both be
1511      * undefined or else be equal without regard to case. Their fragments
1512      * must either both be undefined or else be equal.
1513      *
1514      * <p> For two opaque URIs to be considered equal, their scheme-specific
1515      * parts must be equal.
1516      *
1517      * <p> For two hierarchical URIs to be considered equal, their paths must
1518      * be equal and their queries must either both be undefined or else be
1519      * equal.  Their authorities must either both be undefined, or both be
1520      * registry-based, or both be server-based.  If their authorities are
1521      * defined and are registry-based, then they must be equal.  If their
1522      * authorities are defined and are server-based, then their hosts must be
1523      * equal without regard to case, their port numbers must be equal, and
1524      * their user-information components must be equal.
1525      *
1526      * <p> When testing the user-information, path, query, fragment, authority,
1527      * or scheme-specific parts of two URIs for equality, the raw forms rather
1528      * than the encoded forms of these components are compared and the
1529      * hexadecimal digits of escaped octets are compared without regard to
1530      * case.
1531      *
1532      * <p> This method satisfies the general contract of the {@link
1533      * java.lang.Object#equals(Object) Object.equals} method. </p>
1534      *
1535      * @param   ob   The object to which this object is to be compared
1536      *
1537      * @return  {@code true} if, and only if, the given object is a URI that
1538      *          is identical to this URI
1539      */
1540     public boolean equals(Object ob) {
1541         if (ob == this)
1542             return true;
1543         if (!(ob instanceof URI that))
1544             return false;
1545         if (this.isOpaque() != that.isOpaque()) return false;
1546         if (!equalIgnoringCase(this.scheme, that.scheme)) return false;
1547         if (!equal(this.fragment, that.fragment)) return false;
1548 
1549         // Opaque
1550         if (this.isOpaque())
1551             return equal(this.schemeSpecificPart, that.schemeSpecificPart);
1552 
1553         // Hierarchical
1554         if (!equal(this.path, that.path)) return false;
1555         if (!equal(this.query, that.query)) return false;
1556 
1557         // Authorities
1558         if (this.authority == that.authority) return true;
1559         if (this.host != null) {
1560             // Server-based
1561             if (!equal(this.userInfo, that.userInfo)) return false;
1562             if (!equalIgnoringCase(this.host, that.host)) return false;
1563             if (this.port != that.port) return false;
1564         } else if (this.authority != null) {
1565             // Registry-based
1566             if (!equal(this.authority, that.authority)) return false;
1567         } else if (this.authority != that.authority) {
1568             return false;
1569         }
1570 
1571         return true;
1572     }
1573 
1574     /**
1575      * Returns a hash-code value for this URI.  The hash code is based upon all
1576      * of the URI's components, and satisfies the general contract of the
1577      * {@link java.lang.Object#hashCode() Object.hashCode} method.
1578      *
1579      * @return  A hash-code value for this URI
1580      */
1581     public int hashCode() {
1582         int h = hash;
1583         if (h == 0) {
1584             h = hashIgnoringCase(0, scheme);
1585             h = hash(h, fragment);
1586             if (isOpaque()) {
1587                 h = hash(h, schemeSpecificPart);
1588             } else {
1589                 h = hash(h, path);
1590                 h = hash(h, query);
1591                 if (host != null) {
1592                     h = hash(h, userInfo);
1593                     h = hashIgnoringCase(h, host);
1594                     h += 1949 * port;
1595                 } else {
1596                     h = hash(h, authority);
1597                 }
1598             }
1599             if (h != 0) {
1600                 hash = h;
1601             }
1602         }
1603         return h;
1604     }
1605 
1606     /**
1607      * Compares this URI to another object, which must be a URI.
1608      *
1609      * <p> When comparing corresponding components of two URIs, if one
1610      * component is undefined but the other is defined then the first is
1611      * considered to be less than the second.  Unless otherwise noted, string
1612      * components are ordered according to their natural, case-sensitive
1613      * ordering as defined by the {@link java.lang.String#compareTo(String)
1614      * String.compareTo} method.  String components that are subject to
1615      * encoding are compared by comparing their raw forms rather than their
1616      * encoded forms and the hexadecimal digits of escaped octets are compared
1617      * without regard to case.
1618      *
1619      * <p> The ordering of URIs is defined as follows: </p>
1620      *
1621      * <ul>
1622      *
1623      *   <li><p> Two URIs with different schemes are ordered according the
1624      *   ordering of their schemes, without regard to case. </p></li>
1625      *
1626      *   <li><p> A hierarchical URI is considered to be less than an opaque URI
1627      *   with an identical scheme. </p></li>
1628      *
1629      *   <li><p> Two opaque URIs with identical schemes are ordered according
1630      *   to the ordering of their scheme-specific parts. </p></li>
1631      *
1632      *   <li><p> Two opaque URIs with identical schemes and scheme-specific
1633      *   parts are ordered according to the ordering of their
1634      *   fragments. </p></li>
1635      *
1636      *   <li><p> Two hierarchical URIs with identical schemes are ordered
1637      *   according to the ordering of their authority components: </p>
1638      *
1639      *   <ul>
1640      *
1641      *     <li><p> If both authority components are server-based then the URIs
1642      *     are ordered according to their user-information components; if these
1643      *     components are identical then the URIs are ordered according to the
1644      *     ordering of their hosts, without regard to case; if the hosts are
1645      *     identical then the URIs are ordered according to the ordering of
1646      *     their ports. </p></li>
1647      *
1648      *     <li><p> If one or both authority components are registry-based then
1649      *     the URIs are ordered according to the ordering of their authority
1650      *     components. </p></li>
1651      *
1652      *   </ul></li>
1653      *
1654      *   <li><p> Finally, two hierarchical URIs with identical schemes and
1655      *   authority components are ordered according to the ordering of their
1656      *   paths; if their paths are identical then they are ordered according to
1657      *   the ordering of their queries; if the queries are identical then they
1658      *   are ordered according to the order of their fragments. </p></li>
1659      *
1660      * </ul>
1661      *
1662      * <p> This method satisfies the general contract of the {@link
1663      * java.lang.Comparable#compareTo(Object) Comparable.compareTo}
1664      * method. </p>
1665      *
1666      * @param   that
1667      *          The object to which this URI is to be compared
1668      *
1669      * @return  A negative integer, zero, or a positive integer as this URI is
1670      *          less than, equal to, or greater than the given URI
1671      *
1672      * @throws  ClassCastException
1673      *          If the given object is not a URI
1674      */
1675     public int compareTo(URI that) {
1676         int c;
1677 
1678         if ((c = compareIgnoringCase(this.scheme, that.scheme)) != 0)
1679             return c;
1680 
1681         if (this.isOpaque()) {
1682             if (that.isOpaque()) {
1683                 // Both opaque
1684                 if ((c = compare(this.schemeSpecificPart,
1685                                  that.schemeSpecificPart)) != 0)
1686                     return c;
1687                 return compare(this.fragment, that.fragment);
1688             }
1689             return +1;                  // Opaque > hierarchical
1690         } else if (that.isOpaque()) {
1691             return -1;                  // Hierarchical < opaque
1692         }
1693 
1694         // Hierarchical
1695         if ((this.host != null) && (that.host != null)) {
1696             // Both server-based
1697             if ((c = compare(this.userInfo, that.userInfo)) != 0)
1698                 return c;
1699             if ((c = compareIgnoringCase(this.host, that.host)) != 0)
1700                 return c;
1701             if ((c = this.port - that.port) != 0)
1702                 return c;
1703         } else {
1704             // If one or both authorities are registry-based then we simply
1705             // compare them in the usual, case-sensitive way.  If one is
1706             // registry-based and one is server-based then the strings are
1707             // guaranteed to be unequal, hence the comparison will never return
1708             // zero and the compareTo and equals methods will remain
1709             // consistent.
1710             if ((c = compare(this.authority, that.authority)) != 0) return c;
1711         }
1712 
1713         if ((c = compare(this.path, that.path)) != 0) return c;
1714         if ((c = compare(this.query, that.query)) != 0) return c;
1715         return compare(this.fragment, that.fragment);
1716     }
1717 
1718     /**
1719      * Returns the content of this URI as a string.
1720      *
1721      * <p> If this URI was created by invoking one of the constructors in this
1722      * class then a string equivalent to the original input string, or to the
1723      * string computed from the originally-given components, as appropriate, is
1724      * returned.  Otherwise this URI was created by normalization, resolution,
1725      * or relativization, and so a string is constructed from this URI's
1726      * components according to the rules specified in <a
1727      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
1728      * section&nbsp;5.2, step&nbsp;7. </p>
1729      *
1730      * @return  The string form of this URI
1731      * @spec https://www.rfc-editor.org/info/rfc2396
1732      *      RFC 2396: Uniform Resource Identifiers (URI): Generic Syntax
1733      */
1734     public String toString() {
1735         String s = string;
1736         if (s == null) {
1737             s = defineString();
1738         }
1739         return s;
1740     }
1741 
1742     private String defineString() {
1743         String s = string;
1744         if (s != null) {
1745             return s;
1746         }
1747 
1748         StringBuilder sb = new StringBuilder();
1749         if (scheme != null) {
1750             sb.append(scheme);
1751             sb.append(':');
1752         }
1753         if (isOpaque()) {
1754             sb.append(schemeSpecificPart);
1755         } else {
1756             if (host != null) {
1757                 sb.append("//");
1758                 if (userInfo != null) {
1759                     sb.append(userInfo);
1760                     sb.append('@');
1761                 }
1762                 boolean needBrackets = ((host.indexOf(':') >= 0)
1763                         && !host.startsWith("[")
1764                         && !host.endsWith("]"));
1765                 if (needBrackets) sb.append('[');
1766                 sb.append(host);
1767                 if (needBrackets) sb.append(']');
1768                 if (port != -1) {
1769                     sb.append(':');
1770                     sb.append(port);
1771                 }
1772             } else if (authority != null) {
1773                 sb.append("//");
1774                 sb.append(authority);
1775             }
1776             if (path != null)
1777                 sb.append(path);
1778             if (query != null) {
1779                 sb.append('?');
1780                 sb.append(query);
1781             }
1782         }
1783         if (fragment != null) {
1784             sb.append('#');
1785             sb.append(fragment);
1786         }
1787         return string = sb.toString();
1788     }
1789 
1790     /**
1791      * Returns the content of this URI as a US-ASCII string.
1792      *
1793      * <p> If this URI does not contain any characters in the <i>other</i>
1794      * category then an invocation of this method will return the same value as
1795      * an invocation of the {@link #toString() toString} method.  Otherwise
1796      * this method works as if by invoking that method and then <a
1797      * href="#encode">encoding</a> the result.  </p>
1798      *
1799      * @return  The string form of this URI, encoded as needed
1800      *          so that it only contains characters in the US-ASCII
1801      *          charset
1802      */
1803     public String toASCIIString() {
1804         return encode(toString());
1805     }
1806 
1807 
1808     // -- Serialization support --
1809 
1810     /**
1811      * Saves the content of this URI to the given serial stream.
1812      *
1813      * <p> The only serializable field of a URI instance is its {@code string}
1814      * field.  That field is given a value, if it does not have one already,
1815      * and then the {@link java.io.ObjectOutputStream#defaultWriteObject()}
1816      * method of the given object-output stream is invoked. </p>
1817      *
1818      * @param  os  The object-output stream to which this object
1819      *             is to be written
1820      *
1821      * @throws IOException
1822      *         If an I/O error occurs
1823      */
1824     @java.io.Serial
1825     private void writeObject(ObjectOutputStream os)
1826         throws IOException
1827     {
1828         defineString();
1829         os.defaultWriteObject();        // Writes the string field only
1830     }
1831 
1832     /**
1833      * Reconstitutes a URI from the given serial stream.
1834      *
1835      * <p> The {@link java.io.ObjectInputStream#defaultReadObject()} method is
1836      * invoked to read the value of the {@code string} field.  The result is
1837      * then parsed in the usual way.
1838      *
1839      * @param  is  The object-input stream from which this object
1840      *             is being read
1841      *
1842      * @throws IOException
1843      *         If an I/O error occurs
1844      *
1845      * @throws ClassNotFoundException
1846      *         If a serialized class cannot be loaded
1847      */
1848     @java.io.Serial
1849     private void readObject(ObjectInputStream is)
1850         throws ClassNotFoundException, IOException
1851     {
1852         port = -1;                      // Argh
1853         is.defaultReadObject();
1854         try {
1855             new Parser(string).parse(false);
1856         } catch (URISyntaxException x) {
1857             IOException y = new InvalidObjectException("Invalid URI");
1858             y.initCause(x);
1859             throw y;
1860         }
1861     }
1862 
1863 
1864     // -- End of public methods --
1865 
1866 
1867     // -- Utility methods for string-field comparison and hashing --
1868 
1869     // These methods return appropriate values for null string arguments,
1870     // thereby simplifying the equals, hashCode, and compareTo methods.
1871     //
1872     // The case-ignoring methods should only be applied to strings whose
1873     // characters are all known to be US-ASCII.  Because of this restriction,
1874     // these methods are faster than the similar methods in the String class.
1875 
1876     // US-ASCII only
1877     private static int toLower(char c) {
1878         if ((c >= 'A') && (c <= 'Z'))
1879             return c + ('a' - 'A');
1880         return c;
1881     }
1882 
1883     // US-ASCII only
1884     private static int toUpper(char c) {
1885         if ((c >= 'a') && (c <= 'z'))
1886             return c - ('a' - 'A');
1887         return c;
1888     }
1889 
1890     private static boolean equal(String s, String t) {
1891         boolean testForEquality = true;
1892         int result = percentNormalizedComparison(s, t, testForEquality);
1893         return result == 0;
1894     }
1895 
1896     // US-ASCII only
1897     private static boolean equalIgnoringCase(String s, String t) {
1898         if (s == t) return true;
1899         if ((s != null) && (t != null)) {
1900             int n = s.length();
1901             if (t.length() != n)
1902                 return false;
1903             for (int i = 0; i < n; i++) {
1904                 if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
1905                     return false;
1906             }
1907             return true;
1908         }
1909         return false;
1910     }
1911 
1912     private static int hash(int hash, String s) {
1913         if (s == null) return hash;
1914         return s.indexOf('%') < 0 ? hash * 127 + s.hashCode()
1915                                   : normalizedHash(hash, s);
1916     }
1917 
1918 
1919     private static int normalizedHash(int hash, String s) {
1920         int h = 0;
1921         for (int index = 0; index < s.length(); index++) {
1922             char ch = s.charAt(index);
1923             h = 31 * h + ch;
1924             if (ch == '%') {
1925                 /*
1926                  * Process the next two encoded characters
1927                  */
1928                 for (int i = index + 1; i < index + 3; i++)
1929                     h = 31 * h + toUpper(s.charAt(i));
1930                 index += 2;
1931             }
1932         }
1933         return hash * 127 + h;
1934     }
1935 
1936     // US-ASCII only
1937     private static int hashIgnoringCase(int hash, String s) {
1938         if (s == null) return hash;
1939         int h = hash;
1940         int n = s.length();
1941         for (int i = 0; i < n; i++)
1942             h = 31 * h + toLower(s.charAt(i));
1943         return h;
1944     }
1945 
1946     private static int compare(String s, String t) {
1947         boolean testForEquality = false;
1948         int result = percentNormalizedComparison(s, t, testForEquality);
1949         return result;
1950     }
1951 
1952     // The percentNormalizedComparison method does not verify two
1953     // characters that follow the % sign are hexadecimal digits.
1954     // Reason being:
1955     // 1) percentNormalizedComparison method is not called with
1956     // 'decoded' strings
1957     // 2) The only place where a percent can be followed by anything
1958     // other than hexadecimal digits is in the authority component
1959     // (for a IPv6 scope) and the whole authority component is case
1960     // insensitive.
1961     private static int percentNormalizedComparison(String s, String t,
1962                                                    boolean testForEquality) {
1963 
1964         if (s == t) return 0;
1965         if (s != null) {
1966             if (t != null) {
1967                 if (s.indexOf('%') < 0) {
1968                     return s.compareTo(t);
1969                 }
1970                 int sn = s.length();
1971                 int tn = t.length();
1972                 if ((sn != tn) && testForEquality)
1973                     return sn - tn;
1974                 int val = 0;
1975                 int n = Math.min(sn, tn);
1976                 for (int i = 0; i < n; ) {
1977                     char c = s.charAt(i);
1978                     char d = t.charAt(i);
1979                     val = c - d;
1980                     if (c != '%') {
1981                         if (val != 0)
1982                             return val;
1983                         i++;
1984                         continue;
1985                     }
1986                     if (d != '%') {
1987                         if (val != 0)
1988                             return val;
1989                     }
1990                     i++;
1991                     val = toLower(s.charAt(i)) - toLower(t.charAt(i));
1992                     if (val != 0)
1993                         return val;
1994                     i++;
1995                     val = toLower(s.charAt(i)) - toLower(t.charAt(i));
1996                     if (val != 0)
1997                         return val;
1998                     i++;
1999                 }
2000                 return sn - tn;
2001             } else
2002                 return +1;
2003         } else {
2004             return -1;
2005         }
2006     }
2007 
2008     // US-ASCII only
2009     private static int compareIgnoringCase(String s, String t) {
2010         if (s == t) return 0;
2011         if (s != null) {
2012             if (t != null) {
2013                 int sn = s.length();
2014                 int tn = t.length();
2015                 int n = sn < tn ? sn : tn;
2016                 for (int i = 0; i < n; i++) {
2017                     int c = toLower(s.charAt(i)) - toLower(t.charAt(i));
2018                     if (c != 0)
2019                         return c;
2020                 }
2021                 return sn - tn;
2022             }
2023             return +1;
2024         } else {
2025             return -1;
2026         }
2027     }
2028 
2029 
2030     // -- String construction --
2031 
2032     // If a scheme is given then the path, if given, must be absolute
2033     //
2034     private static void checkPath(String s, String scheme, String path)
2035         throws URISyntaxException
2036     {
2037         if (scheme != null) {
2038             if (path != null && !path.isEmpty() && path.charAt(0) != '/')
2039                 throw new URISyntaxException(formatMsg("%s", filterNonSocketInfo(s)),
2040                                              "Relative path in absolute URI");
2041         }
2042     }
2043 
2044     private void appendAuthority(StringBuilder sb,
2045                                  String authority,
2046                                  String userInfo,
2047                                  String host,
2048                                  int port)
2049     {
2050         if (host != null) {
2051             sb.append("//");
2052             if (userInfo != null) {
2053                 sb.append(quote(userInfo, L_USERINFO, H_USERINFO));
2054                 sb.append('@');
2055             }
2056             boolean needBrackets = ((host.indexOf(':') >= 0)
2057                                     && !host.startsWith("[")
2058                                     && !host.endsWith("]"));
2059             if (needBrackets) sb.append('[');
2060             sb.append(host);
2061             if (needBrackets) sb.append(']');
2062             if (port != -1) {
2063                 sb.append(':');
2064                 sb.append(port);
2065             }
2066         } else if (authority != null) {
2067             sb.append("//");
2068             if (authority.startsWith("[")) {
2069                 // authority should (but may not) contain an embedded IPv6 address
2070                 int end = authority.indexOf(']');
2071                 String doquote = authority;
2072                 if (end != -1 && authority.indexOf(':') != -1) {
2073                     // the authority contains an IPv6 address
2074                     sb.append(authority, 0, end + 1);
2075                     doquote = authority.substring(end + 1);
2076                 }
2077                 sb.append(quote(doquote,
2078                             L_REG_NAME | L_SERVER,
2079                             H_REG_NAME | H_SERVER));
2080             } else {
2081                 sb.append(quote(authority,
2082                             L_REG_NAME | L_SERVER,
2083                             H_REG_NAME | H_SERVER));
2084             }
2085         }
2086     }
2087 
2088     private void appendSchemeSpecificPart(StringBuilder sb,
2089                                           String opaquePart,
2090                                           String authority,
2091                                           String userInfo,
2092                                           String host,
2093                                           int port,
2094                                           String path,
2095                                           String query)
2096     {
2097         if (opaquePart != null) {
2098             /* check if SSP begins with an IPv6 address
2099              * because we must not quote a literal IPv6 address
2100              */
2101             if (opaquePart.startsWith("//[")) {
2102                 int end =  opaquePart.indexOf(']');
2103                 if (end != -1 && opaquePart.indexOf(':')!=-1) {
2104                     String doquote = opaquePart.substring(end + 1);
2105                     sb.append(opaquePart, 0, end + 1);
2106                     sb.append(quote(doquote, L_URIC, H_URIC));
2107                 }
2108             } else {
2109                 sb.append(quote(opaquePart, L_URIC, H_URIC));
2110             }
2111         } else {
2112             appendAuthority(sb, authority, userInfo, host, port);
2113             if (path != null)
2114                 sb.append(quote(path, L_PATH, H_PATH));
2115             if (query != null) {
2116                 sb.append('?');
2117                 sb.append(quote(query, L_URIC, H_URIC));
2118             }
2119         }
2120     }
2121 
2122     private void appendFragment(StringBuilder sb, String fragment) {
2123         if (fragment != null) {
2124             sb.append('#');
2125             sb.append(quote(fragment, L_URIC, H_URIC));
2126         }
2127     }
2128 
2129     private String toString(String scheme,
2130                             String opaquePart,
2131                             String authority,
2132                             String userInfo,
2133                             String host,
2134                             int port,
2135                             String path,
2136                             String query,
2137                             String fragment)
2138     {
2139         StringBuilder sb = new StringBuilder();
2140         if (scheme != null) {
2141             sb.append(scheme);
2142             sb.append(':');
2143         }
2144         appendSchemeSpecificPart(sb, opaquePart,
2145                                  authority, userInfo, host, port,
2146                                  path, query);
2147         appendFragment(sb, fragment);
2148         return sb.toString();
2149     }
2150 
2151     // -- Normalization, resolution, and relativization --
2152 
2153     // RFC2396 5.2 (6)
2154     private static String resolvePath(String base, String child, boolean absolute)
2155     {
2156         int i = base.lastIndexOf('/');
2157         int cn = child.length();
2158         String path = "";
2159 
2160         if (cn == 0) {
2161             // 5.2 (6a)
2162             if (i >= 0)
2163                 path = base.substring(0, i + 1);
2164         } else {
2165             // 5.2 (6a-b)
2166             if (i >= 0 || !absolute) {
2167                 path = base.substring(0, i + 1).concat(child);
2168             } else {
2169                 path = "/".concat(child);
2170             }
2171 
2172         }
2173 
2174         // 5.2 (6c-f)
2175         String np = normalize(path);
2176 
2177         // 5.2 (6g): If the result is absolute but the path begins with "../",
2178         // then we simply leave the path as-is
2179 
2180         return np;
2181     }
2182 
2183     // RFC2396 5.2
2184     private static URI resolve(URI base, URI child) {
2185         // check if child if opaque first so that NPE is thrown
2186         // if child is null.
2187         if (child.isOpaque() || base.isOpaque())
2188             return child;
2189 
2190         // 5.2 (2): Reference to current document (lone fragment)
2191         if ((child.scheme == null) && (child.authority == null)
2192             && child.path.isEmpty() && (child.fragment != null)
2193             && (child.query == null)) {
2194             if ((base.fragment != null)
2195                 && child.fragment.equals(base.fragment)) {
2196                 return base;
2197             }
2198             URI ru = new URI();
2199             ru.scheme = base.scheme;
2200             ru.authority = base.authority;
2201             ru.userInfo = base.userInfo;
2202             ru.host = base.host;
2203             ru.port = base.port;
2204             ru.path = base.path;
2205             ru.fragment = child.fragment;
2206             ru.query = base.query;
2207             return ru;
2208         }
2209 
2210         // 5.2 (3): Child is absolute
2211         if (child.scheme != null)
2212             return child;
2213 
2214         URI ru = new URI();             // Resolved URI
2215         ru.scheme = base.scheme;
2216         ru.query = child.query;
2217         ru.fragment = child.fragment;
2218 
2219         // 5.2 (4): Authority
2220         if (child.authority == null) {
2221             ru.authority = base.authority;
2222             ru.host = base.host;
2223             ru.userInfo = base.userInfo;
2224             ru.port = base.port;
2225 
2226             String cp = child.path;
2227             if (!cp.isEmpty() && cp.charAt(0) == '/') {
2228                 // 5.2 (5): Child path is absolute
2229                 ru.path = child.path;
2230             } else {
2231                 // 5.2 (6): Resolve relative path
2232                 ru.path = resolvePath(base.path, cp, base.isAbsolute());
2233             }
2234         } else {
2235             ru.authority = child.authority;
2236             ru.host = child.host;
2237             ru.userInfo = child.userInfo;
2238             ru.port = child.port;
2239             ru.path = child.path;
2240         }
2241 
2242         // 5.2 (7): Recombine (nothing to do here)
2243         return ru;
2244     }
2245 
2246     // If the given URI's path is normal then return the URI;
2247     // o.w., return a new URI containing the normalized path.
2248     //
2249     private static URI normalize(URI u) {
2250         if (u.isOpaque() || u.path == null || u.path.isEmpty())
2251             return u;
2252 
2253         String np = normalize(u.path);
2254         if (np == u.path)
2255             return u;
2256 
2257         URI v = new URI();
2258         v.scheme = u.scheme;
2259         v.fragment = u.fragment;
2260         v.authority = u.authority;
2261         v.userInfo = u.userInfo;
2262         v.host = u.host;
2263         v.port = u.port;
2264         v.path = np;
2265         v.query = u.query;
2266         return v;
2267     }
2268 
2269     // If both URIs are hierarchical, their scheme and authority components are
2270     // identical, and the base path is a prefix of the child's path, then
2271     // return a relative URI that, when resolved against the base, yields the
2272     // child; otherwise, return the child.
2273     //
2274     private static URI relativize(URI base, URI child) {
2275         // check if child if opaque first so that NPE is thrown
2276         // if child is null.
2277         if (child.isOpaque() || base.isOpaque())
2278             return child;
2279         if (!equalIgnoringCase(base.scheme, child.scheme)
2280             || !equal(base.authority, child.authority))
2281             return child;
2282 
2283         String bp = normalize(base.path);
2284         String cp = normalize(child.path);
2285         if (!bp.equals(cp)) {
2286             if (!bp.endsWith("/"))
2287                 bp = bp + "/";
2288             if (!cp.startsWith(bp))
2289                 return child;
2290         }
2291 
2292         URI v = new URI();
2293         v.path = cp.substring(bp.length());
2294         v.query = child.query;
2295         v.fragment = child.fragment;
2296         return v;
2297     }
2298 
2299 
2300 
2301     // -- Path normalization --
2302 
2303     // The following algorithm for path normalization avoids the creation of a
2304     // string object for each segment, as well as the use of a string buffer to
2305     // compute the final result, by using a single char array and editing it in
2306     // place.  The array is first split into segments, replacing each slash
2307     // with '\0' and creating a segment-index array, each element of which is
2308     // the index of the first char in the corresponding segment.  We then walk
2309     // through both arrays, removing ".", "..", and other segments as necessary
2310     // by setting their entries in the index array to -1.  Finally, the two
2311     // arrays are used to rejoin the segments and compute the final result.
2312     //
2313     // This code is based upon src/solaris/native/java/io/canonicalize_md.c
2314 
2315 
2316     // Check the given path to see if it might need normalization.  A path
2317     // might need normalization if it contains duplicate slashes, a "."
2318     // segment, or a ".." segment.  Return -1 if no further normalization is
2319     // possible, otherwise return the number of segments found.
2320     //
2321     // This method takes a string argument rather than a char array so that
2322     // this test can be performed without invoking path.toCharArray().
2323     //
2324     private static int needsNormalization(String path) {
2325         boolean normal = true;
2326         int ns = 0;                     // Number of segments
2327         int end = path.length() - 1;    // Index of last char in path
2328         int p = 0;                      // Index of next char in path
2329 
2330         // Skip initial slashes
2331         while (p <= end) {
2332             if (path.charAt(p) != '/') break;
2333             p++;
2334         }
2335         if (p > 1) normal = false;
2336 
2337         // Scan segments
2338         while (p <= end) {
2339 
2340             // Looking at "." or ".." ?
2341             if ((path.charAt(p) == '.')
2342                 && ((p == end)
2343                     || ((path.charAt(p + 1) == '/')
2344                         || ((path.charAt(p + 1) == '.')
2345                             && ((p + 1 == end)
2346                                 || (path.charAt(p + 2) == '/')))))) {
2347                 normal = false;
2348             }
2349             ns++;
2350 
2351             // Find beginning of next segment
2352             while (p <= end) {
2353                 if (path.charAt(p++) != '/')
2354                     continue;
2355 
2356                 // Skip redundant slashes
2357                 while (p <= end) {
2358                     if (path.charAt(p) != '/') break;
2359                     normal = false;
2360                     p++;
2361                 }
2362 
2363                 break;
2364             }
2365         }
2366 
2367         return normal ? -1 : ns;
2368     }
2369 
2370 
2371     // Split the given path into segments, replacing slashes with nulls and
2372     // filling in the given segment-index array.
2373     //
2374     // Preconditions:
2375     //   segs.length == Number of segments in path
2376     //
2377     // Postconditions:
2378     //   All slashes in path replaced by '\0'
2379     //   segs[i] == Index of first char in segment i (0 <= i < segs.length)
2380     //
2381     private static void split(char[] path, int[] segs) {
2382         int end = path.length - 1;      // Index of last char in path
2383         int p = 0;                      // Index of next char in path
2384         int i = 0;                      // Index of current segment
2385 
2386         // Skip initial slashes
2387         while (p <= end) {
2388             if (path[p] != '/') break;
2389             path[p] = '\0';
2390             p++;
2391         }
2392 
2393         while (p <= end) {
2394 
2395             // Note start of segment
2396             segs[i++] = p++;
2397 
2398             // Find beginning of next segment
2399             while (p <= end) {
2400                 if (path[p++] != '/')
2401                     continue;
2402                 path[p - 1] = '\0';
2403 
2404                 // Skip redundant slashes
2405                 while (p <= end) {
2406                     if (path[p] != '/') break;
2407                     path[p++] = '\0';
2408                 }
2409                 break;
2410             }
2411         }
2412 
2413         if (i != segs.length)
2414             throw new InternalError();  // ASSERT
2415     }
2416 
2417 
2418     // Join the segments in the given path according to the given segment-index
2419     // array, ignoring those segments whose index entries have been set to -1,
2420     // and inserting slashes as needed.  Return the length of the resulting
2421     // path.
2422     //
2423     // Preconditions:
2424     //   segs[i] == -1 implies segment i is to be ignored
2425     //   path computed by split, as above, with '\0' having replaced '/'
2426     //
2427     // Postconditions:
2428     //   path[0] .. path[return value] == Resulting path
2429     //
2430     private static int join(char[] path, int[] segs) {
2431         int ns = segs.length;           // Number of segments
2432         int end = path.length - 1;      // Index of last char in path
2433         int p = 0;                      // Index of next path char to write
2434 
2435         if (path[p] == '\0') {
2436             // Restore initial slash for absolute paths
2437             path[p++] = '/';
2438         }
2439 
2440         for (int i = 0; i < ns; i++) {
2441             int q = segs[i];            // Current segment
2442             if (q == -1)
2443                 // Ignore this segment
2444                 continue;
2445 
2446             if (p == q) {
2447                 // We're already at this segment, so just skip to its end
2448                 while ((p <= end) && (path[p] != '\0'))
2449                     p++;
2450                 if (p <= end) {
2451                     // Preserve trailing slash
2452                     path[p++] = '/';
2453                 }
2454             } else if (p < q) {
2455                 // Copy q down to p
2456                 while ((q <= end) && (path[q] != '\0'))
2457                     path[p++] = path[q++];
2458                 if (q <= end) {
2459                     // Preserve trailing slash
2460                     path[p++] = '/';
2461                 }
2462             } else
2463                 throw new InternalError(); // ASSERT false
2464         }
2465 
2466         return p;
2467     }
2468 
2469 
2470     // Remove "." segments from the given path, and remove segment pairs
2471     // consisting of a non-".." segment followed by a ".." segment.
2472     //
2473     private static void removeDots(char[] path, int[] segs) {
2474         int ns = segs.length;
2475         int end = path.length - 1;
2476 
2477         for (int i = 0; i < ns; i++) {
2478             int dots = 0;               // Number of dots found (0, 1, or 2)
2479 
2480             // Find next occurrence of "." or ".."
2481             do {
2482                 int p = segs[i];
2483                 if (path[p] == '.') {
2484                     if (p == end) {
2485                         dots = 1;
2486                         break;
2487                     } else if (path[p + 1] == '\0') {
2488                         dots = 1;
2489                         break;
2490                     } else if ((path[p + 1] == '.')
2491                                && ((p + 1 == end)
2492                                    || (path[p + 2] == '\0'))) {
2493                         dots = 2;
2494                         break;
2495                     }
2496                 }
2497                 i++;
2498             } while (i < ns);
2499             if ((i > ns) || (dots == 0))
2500                 break;
2501 
2502             if (dots == 1) {
2503                 // Remove this occurrence of "."
2504                 segs[i] = -1;
2505             } else {
2506                 // If there is a preceding non-".." segment, remove both that
2507                 // segment and this occurrence of ".."; otherwise, leave this
2508                 // ".." segment as-is.
2509                 int j;
2510                 for (j = i - 1; j >= 0; j--) {
2511                     if (segs[j] != -1) break;
2512                 }
2513                 if (j >= 0) {
2514                     int q = segs[j];
2515                     if (!((path[q] == '.')
2516                           && (path[q + 1] == '.')
2517                           && (path[q + 2] == '\0'))) {
2518                         segs[i] = -1;
2519                         segs[j] = -1;
2520                     }
2521                 }
2522             }
2523         }
2524     }
2525 
2526 
2527     // DEVIATION: If the normalized path is relative, and if the first
2528     // segment could be parsed as a scheme name, then prepend a "." segment
2529     //
2530     private static void maybeAddLeadingDot(char[] path, int[] segs) {
2531 
2532         if (path[0] == '\0')
2533             // The path is absolute
2534             return;
2535 
2536         int ns = segs.length;
2537         int f = 0;                      // Index of first segment
2538         while (f < ns) {
2539             if (segs[f] >= 0)
2540                 break;
2541             f++;
2542         }
2543         if ((f >= ns) || (f == 0))
2544             // The path is empty, or else the original first segment survived,
2545             // in which case we already know that no leading "." is needed
2546             return;
2547 
2548         int p = segs[f];
2549         while ((p < path.length) && (path[p] != ':') && (path[p] != '\0')) p++;
2550         if (p >= path.length || path[p] == '\0')
2551             // No colon in first segment, so no "." needed
2552             return;
2553 
2554         // At this point we know that the first segment is unused,
2555         // hence we can insert a "." segment at that position
2556         path[0] = '.';
2557         path[1] = '\0';
2558         segs[0] = 0;
2559     }
2560 
2561 
2562     // Normalize the given path string.  A normal path string has no empty
2563     // segments (i.e., occurrences of "//"), no segments equal to ".", and no
2564     // segments equal to ".." that are preceded by a segment not equal to "..".
2565     // In contrast to Unix-style pathname normalization, for URI paths we
2566     // always retain trailing slashes.
2567     //
2568     private static String normalize(String ps) {
2569 
2570         // Does this path need normalization?
2571         int ns = needsNormalization(ps);        // Number of segments
2572         if (ns < 0)
2573             // Nope -- just return it
2574             return ps;
2575 
2576         char[] path = ps.toCharArray();         // Path in char-array form
2577 
2578         // Split path into segments
2579         int[] segs = new int[ns];               // Segment-index array
2580         split(path, segs);
2581 
2582         // Remove dots
2583         removeDots(path, segs);
2584 
2585         // Prevent scheme-name confusion
2586         maybeAddLeadingDot(path, segs);
2587 
2588         // Join the remaining segments and return the result
2589         String s = new String(path, 0, join(path, segs));
2590         if (s.equals(ps)) {
2591             // string was already normalized
2592             return ps;
2593         }
2594         return s;
2595     }
2596 
2597 
2598 
2599     // -- Character classes for parsing --
2600 
2601     // RFC2396 precisely specifies which characters in the US-ASCII charset are
2602     // permissible in the various components of a URI reference.  We here
2603     // define a set of mask pairs to aid in enforcing these restrictions.  Each
2604     // mask pair consists of two longs, a low mask and a high mask.  Taken
2605     // together they represent a 128-bit mask, where bit i is set iff the
2606     // character with value i is permitted.
2607     //
2608     // This approach is more efficient than sequentially searching arrays of
2609     // permitted characters.  It could be made still more efficient by
2610     // precompiling the mask information so that a character's presence in a
2611     // given mask could be determined by a single table lookup.
2612 
2613     // To save startup time, we manually calculate the low-/highMask constants.
2614     // For reference, the following methods were used to calculate the values:
2615 
2616     // Compute the low-order mask for the characters in the given string
2617     //     private static long lowMask(String chars) {
2618     //        int n = chars.length();
2619     //        long m = 0;
2620     //        for (int i = 0; i < n; i++) {
2621     //            char c = chars.charAt(i);
2622     //            if (c < 64)
2623     //                m |= (1L << c);
2624     //        }
2625     //        return m;
2626     //    }
2627 
2628     // Compute the high-order mask for the characters in the given string
2629     //    private static long highMask(String chars) {
2630     //        int n = chars.length();
2631     //        long m = 0;
2632     //        for (int i = 0; i < n; i++) {
2633     //            char c = chars.charAt(i);
2634     //            if ((c >= 64) && (c < 128))
2635     //                m |= (1L << (c - 64));
2636     //        }
2637     //        return m;
2638     //    }
2639 
2640     // Compute a low-order mask for the characters
2641     // between first and last, inclusive
2642     //    private static long lowMask(char first, char last) {
2643     //        long m = 0;
2644     //        int f = Math.max(Math.min(first, 63), 0);
2645     //        int l = Math.max(Math.min(last, 63), 0);
2646     //        for (int i = f; i <= l; i++)
2647     //            m |= 1L << i;
2648     //        return m;
2649     //    }
2650 
2651     // Compute a high-order mask for the characters
2652     // between first and last, inclusive
2653     //    private static long highMask(char first, char last) {
2654     //        long m = 0;
2655     //        int f = Math.max(Math.min(first, 127), 64) - 64;
2656     //        int l = Math.max(Math.min(last, 127), 64) - 64;
2657     //        for (int i = f; i <= l; i++)
2658     //            m |= 1L << i;
2659     //        return m;
2660     //    }
2661 
2662     // Tell whether the given character is permitted by the given mask pair
2663     private static boolean match(char c, long lowMask, long highMask) {
2664         if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches.
2665             return false;
2666         if (c < 64)
2667             return ((1L << c) & lowMask) != 0;
2668         if (c < 128)
2669             return ((1L << (c - 64)) & highMask) != 0;
2670         return false;
2671     }
2672 
2673     // Character-class masks, in reverse order from RFC2396 because
2674     // initializers for static fields cannot make forward references.
2675 
2676     // digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
2677     //            "8" | "9"
2678     private static final long L_DIGIT = 0x3FF000000000000L; // lowMask('0', '9');
2679     private static final long H_DIGIT = 0L;
2680 
2681     // upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
2682     //            "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
2683     //            "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
2684     private static final long L_UPALPHA = 0L;
2685     private static final long H_UPALPHA = 0x7FFFFFEL; // highMask('A', 'Z');
2686 
2687     // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
2688     //            "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
2689     //            "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
2690     private static final long L_LOWALPHA = 0L;
2691     private static final long H_LOWALPHA = 0x7FFFFFE00000000L; // highMask('a', 'z');
2692 
2693     // alpha         = lowalpha | upalpha
2694     private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA;
2695     private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA;
2696 
2697     // alphanum      = alpha | digit
2698     private static final long L_ALPHANUM = L_DIGIT | L_ALPHA;
2699     private static final long H_ALPHANUM = H_DIGIT | H_ALPHA;
2700 
2701     // hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
2702     //                         "a" | "b" | "c" | "d" | "e" | "f"
2703     private static final long L_HEX = L_DIGIT;
2704     private static final long H_HEX = 0x7E0000007EL; // highMask('A', 'F') | highMask('a', 'f');
2705 
2706     // mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
2707     //                 "(" | ")"
2708     private static final long L_MARK = 0x678200000000L; // lowMask("-_.!~*'()");
2709     private static final long H_MARK = 0x4000000080000000L; // highMask("-_.!~*'()");
2710 
2711     // unreserved    = alphanum | mark
2712     private static final long L_UNRESERVED = L_ALPHANUM | L_MARK;
2713     private static final long H_UNRESERVED = H_ALPHANUM | H_MARK;
2714 
2715     // reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
2716     //                 "$" | "," | "[" | "]"
2717     // Added per RFC2732: "[", "]"
2718     private static final long L_RESERVED = 0xAC00985000000000L; // lowMask(";/?:@&=+$,[]");
2719     private static final long H_RESERVED = 0x28000001L; // highMask(";/?:@&=+$,[]");
2720 
2721     // The zero'th bit is used to indicate that escape pairs and non-US-ASCII
2722     // characters are allowed; this is handled by the scanEscape method below.
2723     private static final long L_ESCAPED = 1L;
2724     private static final long H_ESCAPED = 0L;
2725 
2726     // uric          = reserved | unreserved | escaped
2727     private static final long L_URIC = L_RESERVED | L_UNRESERVED | L_ESCAPED;
2728     private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED;
2729 
2730     // pchar         = unreserved | escaped |
2731     //                 ":" | "@" | "&" | "=" | "+" | "$" | ","
2732     private static final long L_PCHAR
2733         = L_UNRESERVED | L_ESCAPED | 0x2400185000000000L; // lowMask(":@&=+$,");
2734     private static final long H_PCHAR
2735         = H_UNRESERVED | H_ESCAPED | 0x1L; // highMask(":@&=+$,");
2736 
2737     // All valid path characters
2738     private static final long L_PATH = L_PCHAR | 0x800800000000000L; // lowMask(";/");
2739     private static final long H_PATH = H_PCHAR; // highMask(";/") == 0x0L;
2740 
2741     // Dash, for use in domainlabel and toplabel
2742     private static final long L_DASH = 0x200000000000L; // lowMask("-");
2743     private static final long H_DASH = 0x0L; // highMask("-");
2744 
2745     // Dot, for use in hostnames
2746     private static final long L_DOT = 0x400000000000L; // lowMask(".");
2747     private static final long H_DOT = 0x0L; // highMask(".");
2748 
2749     // userinfo      = *( unreserved | escaped |
2750     //                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
2751     private static final long L_USERINFO
2752         = L_UNRESERVED | L_ESCAPED | 0x2C00185000000000L; // lowMask(";:&=+$,");
2753     private static final long H_USERINFO
2754         = H_UNRESERVED | H_ESCAPED; // | highMask(";:&=+$,") == 0L;
2755 
2756     // reg_name      = 1*( unreserved | escaped | "$" | "," |
2757     //                     ";" | ":" | "@" | "&" | "=" | "+" )
2758     private static final long L_REG_NAME
2759         = L_UNRESERVED | L_ESCAPED | 0x2C00185000000000L; // lowMask("$,;:@&=+");
2760     private static final long H_REG_NAME
2761         = H_UNRESERVED | H_ESCAPED | 0x1L; // highMask("$,;:@&=+");
2762 
2763     // All valid characters for server-based authorities
2764     private static final long L_SERVER
2765         = L_USERINFO | L_ALPHANUM | L_DASH | 0x400400000000000L; // lowMask(".:@[]");
2766     private static final long H_SERVER
2767         = H_USERINFO | H_ALPHANUM | H_DASH | 0x28000001L; // highMask(".:@[]");
2768 
2769     // Special case of server authority that represents an IPv6 address
2770     // In this case, a % does not signify an escape sequence
2771     private static final long L_SERVER_PERCENT
2772         = L_SERVER | 0x2000000000L; // lowMask("%");
2773     private static final long H_SERVER_PERCENT
2774         = H_SERVER; // | highMask("%") == 0L;
2775 
2776     // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
2777     private static final long L_SCHEME = L_ALPHA | L_DIGIT | 0x680000000000L; // lowMask("+-.");
2778     private static final long H_SCHEME = H_ALPHA | H_DIGIT; // | highMask("+-.") == 0L
2779 
2780     // scope_id = alpha | digit | "_" | "."
2781     private static final long L_SCOPE_ID
2782         = L_ALPHANUM | 0x400000000000L; // lowMask("_.");
2783     private static final long H_SCOPE_ID
2784         = H_ALPHANUM | 0x80000000L; // highMask("_.");
2785 
2786     // -- Escaping and encoding --
2787 
2788     private static final char[] hexDigits = {
2789         '0', '1', '2', '3', '4', '5', '6', '7',
2790         '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
2791     };
2792 
2793     private static void appendEscape(StringBuilder sb, byte b) {
2794         sb.append('%');
2795         sb.append(hexDigits[(b >> 4) & 0x0f]);
2796         sb.append(hexDigits[(b >> 0) & 0x0f]);
2797     }
2798 
2799     private static void appendEncoded(CharsetEncoder encoder, StringBuilder sb, char c) {
2800         ByteBuffer bb = null;
2801         try {
2802             bb = encoder.encode(CharBuffer.wrap(new char[]{c}));
2803         } catch (CharacterCodingException x) {
2804             assert false;
2805         }
2806         while (bb.hasRemaining()) {
2807             int b = bb.get() & 0xff;
2808             if (b >= 0x80)
2809                 appendEscape(sb, (byte)b);
2810             else
2811                 sb.append((char)b);
2812         }
2813     }
2814 
2815     // Quote any characters in s that are not permitted
2816     // by the given mask pair
2817     //
2818     private static String quote(String s, long lowMask, long highMask) {
2819         StringBuilder sb = null;
2820         CharsetEncoder encoder = null;
2821         boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);
2822         for (int i = 0; i < s.length(); i++) {
2823             char c = s.charAt(i);
2824             if (c < '\u0080') {
2825                 if (!match(c, lowMask, highMask)) {
2826                     if (sb == null) {
2827                         sb = new StringBuilder();
2828                         sb.append(s, 0, i);
2829                     }
2830                     appendEscape(sb, (byte)c);
2831                 } else {
2832                     if (sb != null)
2833                         sb.append(c);
2834                 }
2835             } else if (allowNonASCII
2836                        && (Character.isSpaceChar(c)
2837                            || Character.isISOControl(c))) {
2838                 if (encoder == null)
2839                     encoder = UTF_8.INSTANCE.newEncoder();
2840                 if (sb == null) {
2841                     sb = new StringBuilder();
2842                     sb.append(s, 0, i);
2843                 }
2844                 appendEncoded(encoder, sb, c);
2845             } else {
2846                 if (sb != null)
2847                     sb.append(c);
2848             }
2849         }
2850         return (sb == null) ? s : sb.toString();
2851     }
2852 
2853     // Encodes all characters >= \u0080 into escaped, normalized UTF-8 octets,
2854     // assuming that s is otherwise legal
2855     //
2856     private static String encode(String s) {
2857         int n = s.length();
2858         if (n == 0)
2859             return s;
2860 
2861         // First check whether we actually need to encode
2862         for (int i = 0;;) {
2863             if (s.charAt(i) >= '\u0080')
2864                 break;
2865             if (++i >= n)
2866                 return s;
2867         }
2868 
2869         String ns = Normalizer.normalize(s, Normalizer.Form.NFC);
2870         ByteBuffer bb = null;
2871         try {
2872             bb = UTF_8.INSTANCE.newEncoder()
2873                 .encode(CharBuffer.wrap(ns));
2874 
2875         } catch (CharacterCodingException x) {
2876             assert false;
2877         }
2878 
2879         StringBuilder sb = new StringBuilder();
2880         while (bb.hasRemaining()) {
2881             int b = bb.get() & 0xff;
2882             if (b >= 0x80)
2883                 appendEscape(sb, (byte)b);
2884             else
2885                 sb.append((char)b);
2886         }
2887         return sb.toString();
2888     }
2889 
2890     private static int decode(char c) {
2891         if ((c >= '0') && (c <= '9'))
2892             return c - '0';
2893         if ((c >= 'a') && (c <= 'f'))
2894             return c - 'a' + 10;
2895         if ((c >= 'A') && (c <= 'F'))
2896             return c - 'A' + 10;
2897         assert false;
2898         return -1;
2899     }
2900 
2901     private static byte decode(char c1, char c2) {
2902         return (byte)(  ((decode(c1) & 0xf) << 4)
2903                       | ((decode(c2) & 0xf) << 0));
2904     }
2905 
2906     // Evaluates all escapes in s, applying UTF-8 decoding if needed.  Assumes
2907     // that escapes are well-formed syntactically, i.e., of the form %XX.  If a
2908     // sequence of escaped octets is not valid UTF-8 then the erroneous octets
2909     // are replaced with '\uFFFD'.
2910     // Exception: any "%" found between "[]" is left alone. It is an IPv6 literal
2911     //            with a scope_id
2912     //
2913     private static String decode(String s) {
2914         return decode(s, true);
2915     }
2916 
2917     // This method was introduced as a generalization of URI.decode method
2918     // to provide a fix for JDK-8037396
2919     private static String decode(String s, boolean ignorePercentInBrackets) {
2920         if (s == null)
2921             return s;
2922         int n = s.length();
2923         if (n == 0)
2924             return s;
2925         if (s.indexOf('%') < 0)
2926             return s;
2927 
2928         StringBuilder sb = new StringBuilder(n);
2929         ByteBuffer bb = ByteBuffer.allocate(n);
2930         CharBuffer cb = CharBuffer.allocate(n);
2931         CharsetDecoder dec = UTF_8.INSTANCE.newDecoder()
2932             .onMalformedInput(CodingErrorAction.REPLACE)
2933             .onUnmappableCharacter(CodingErrorAction.REPLACE);
2934 
2935         // This is not horribly efficient, but it will do for now
2936         char c = s.charAt(0);
2937         boolean betweenBrackets = false;
2938 
2939         for (int i = 0; i < n;) {
2940             assert c == s.charAt(i);    // Loop invariant
2941             if (c == '[') {
2942                 betweenBrackets = true;
2943             } else if (betweenBrackets && c == ']') {
2944                 betweenBrackets = false;
2945             }
2946             if (c != '%' || (betweenBrackets && ignorePercentInBrackets)) {
2947                 sb.append(c);
2948                 if (++i >= n)
2949                     break;
2950                 c = s.charAt(i);
2951                 continue;
2952             }
2953             bb.clear();
2954             for (;;) {
2955                 assert (n - i >= 2);
2956                 bb.put(decode(s.charAt(++i), s.charAt(++i)));
2957                 if (++i >= n)
2958                     break;
2959                 c = s.charAt(i);
2960                 if (c != '%')
2961                     break;
2962             }
2963             bb.flip();
2964             cb.clear();
2965             dec.reset();
2966             CoderResult cr = dec.decode(bb, cb, true);
2967             assert cr.isUnderflow();
2968             cr = dec.flush(cb);
2969             assert cr.isUnderflow();
2970             sb.append(cb.flip().toString());
2971         }
2972 
2973         return sb.toString();
2974     }
2975 
2976 
2977     // -- Parsing --
2978 
2979     // For convenience we wrap the input URI string in a new instance of the
2980     // following internal class.  This saves always having to pass the input
2981     // string as an argument to each internal scan/parse method.
2982 
2983     private class Parser {
2984 
2985         private final String input;           // URI input string
2986         private boolean requireServerAuthority = false;
2987 
2988         Parser(String s) {
2989             input = s;
2990             string = s;
2991         }
2992 
2993         // -- Methods for throwing URISyntaxException in various ways --
2994 
2995         private void fail(String reason) throws URISyntaxException {
2996             throw new URISyntaxException(formatMsg("%s", filterNonSocketInfo(input)), reason);
2997         }
2998 
2999         private void fail(String reason, int p) throws URISyntaxException {
3000             if (!Exceptions.enhancedNonSocketExceptions()) {
3001                 p = -1;
3002             }
3003             throw new URISyntaxException(formatMsg("%s", filterNonSocketInfo(input)), reason, p);
3004         }
3005 
3006         private void failExpecting(String expected, int p)
3007             throws URISyntaxException
3008         {
3009             fail("Expected " + expected, p);
3010         }
3011 
3012 
3013         // -- Simple access to the input string --
3014 
3015         // Tells whether start < end and, if so, whether charAt(start) == c
3016         //
3017         private boolean at(int start, int end, char c) {
3018             return (start < end) && (input.charAt(start) == c);
3019         }
3020 
3021         // Tells whether start + s.length() < end and, if so,
3022         // whether the chars at the start position match s exactly
3023         //
3024         private boolean at(int start, int end, String s) {
3025             int p = start;
3026             int sn = s.length();
3027             if (sn > end - p)
3028                 return false;
3029             int i = 0;
3030             while (i < sn) {
3031                 if (input.charAt(p++) != s.charAt(i)) {
3032                     break;
3033                 }
3034                 i++;
3035             }
3036             return (i == sn);
3037         }
3038 
3039 
3040         // -- Scanning --
3041 
3042         // The various scan and parse methods that follow use a uniform
3043         // convention of taking the current start position and end index as
3044         // their first two arguments.  The start is inclusive while the end is
3045         // exclusive, just as in the String class, i.e., a start/end pair
3046         // denotes the left-open interval [start, end) of the input string.
3047         //
3048         // These methods never proceed past the end position.  They may return
3049         // -1 to indicate outright failure, but more often they simply return
3050         // the position of the first char after the last char scanned.  Thus
3051         // a typical idiom is
3052         //
3053         //     int p = start;
3054         //     int q = scan(p, end, ...);
3055         //     if (q > p)
3056         //         // We scanned something
3057         //         ...;
3058         //     else if (q == p)
3059         //         // We scanned nothing
3060         //         ...;
3061         //     else if (q == -1)
3062         //         // Something went wrong
3063         //         ...;
3064 
3065 
3066         // Scan a specific char: If the char at the given start position is
3067         // equal to c, return the index of the next char; otherwise, return the
3068         // start position.
3069         //
3070         private int scan(int start, int end, char c) {
3071             if ((start < end) && (input.charAt(start) == c))
3072                 return start + 1;
3073             return start;
3074         }
3075 
3076         // Scan forward from the given start position.  Stop at the first char
3077         // in the err string (in which case -1 is returned), or the first char
3078         // in the stop string (in which case the index of the preceding char is
3079         // returned), or the end of the input string (in which case the length
3080         // of the input string is returned).  May return the start position if
3081         // nothing matches.
3082         //
3083         private int scan(int start, int end, String err, String stop) {
3084             int p = start;
3085             while (p < end) {
3086                 char c = input.charAt(p);
3087                 if (err.indexOf(c) >= 0)
3088                     return -1;
3089                 if (stop.indexOf(c) >= 0)
3090                     break;
3091                 p++;
3092             }
3093             return p;
3094         }
3095 
3096         // Scan forward from the given start position.  Stop at the first char
3097         // in the stop string (in which case the index of the preceding char is
3098         // returned), or the end of the input string (in which case the length
3099         // of the input string is returned).  May return the start position if
3100         // nothing matches.
3101         //
3102         private int scan(int start, int end, String stop) {
3103             int p = start;
3104             while (p < end) {
3105                 char c = input.charAt(p);
3106                 if (stop.indexOf(c) >= 0)
3107                     break;
3108                 p++;
3109             }
3110             return p;
3111         }
3112 
3113         // Scan a potential escape sequence, starting at the given position,
3114         // with the given first char (i.e., charAt(start) == c).
3115         //
3116         // This method assumes that if escapes are allowed then visible
3117         // non-US-ASCII chars are also allowed.
3118         //
3119         private int scanEscape(int start, int n, char first)
3120             throws URISyntaxException
3121         {
3122             int p = start;
3123             char c = first;
3124             if (c == '%') {
3125                 // Process escape pair
3126                 if ((p + 3 <= n)
3127                     && match(input.charAt(p + 1), L_HEX, H_HEX)
3128                     && match(input.charAt(p + 2), L_HEX, H_HEX)) {
3129                     return p + 3;
3130                 }
3131                 fail("Malformed escape pair", p);
3132             } else if ((c > 128)
3133                        && !Character.isSpaceChar(c)
3134                        && !Character.isISOControl(c)) {
3135                 // Allow unescaped but visible non-US-ASCII chars
3136                 return p + 1;
3137             }
3138             return p;
3139         }
3140 
3141         // Scan chars that match the given mask pair
3142         //
3143         private int scan(int start, int n, long lowMask, long highMask)
3144             throws URISyntaxException
3145         {
3146             int p = start;
3147             while (p < n) {
3148                 char c = input.charAt(p);
3149                 if (match(c, lowMask, highMask)) {
3150                     p++;
3151                     continue;
3152                 }
3153                 if ((lowMask & L_ESCAPED) != 0) {
3154                     int q = scanEscape(p, n, c);
3155                     if (q > p) {
3156                         p = q;
3157                         continue;
3158                     }
3159                 }
3160                 break;
3161             }
3162             return p;
3163         }
3164 
3165         // Check that each of the chars in [start, end) matches the given mask
3166         //
3167         private void checkChars(int start, int end,
3168                                 long lowMask, long highMask,
3169                                 String what)
3170             throws URISyntaxException
3171         {
3172             int p = scan(start, end, lowMask, highMask);
3173             if (p < end)
3174                 fail("Illegal character in " + what, p);
3175         }
3176 
3177         // Check that the char at position p matches the given mask
3178         //
3179         private void checkChar(int p,
3180                                long lowMask, long highMask,
3181                                String what)
3182             throws URISyntaxException
3183         {
3184             checkChars(p, p + 1, lowMask, highMask, what);
3185         }
3186 
3187 
3188         // -- Parsing --
3189 
3190         // [<scheme>:]<scheme-specific-part>[#<fragment>]
3191         //
3192         void parse(boolean rsa) throws URISyntaxException {
3193             requireServerAuthority = rsa;
3194             int n = input.length();
3195             int p = scan(0, n, "/?#", ":");
3196             if ((p >= 0) && at(p, n, ':')) {
3197                 if (p == 0)
3198                     failExpecting("scheme name", 0);
3199                 checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
3200                 checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
3201                 scheme = input.substring(0, p);
3202                 p++;                    // Skip ':'
3203                 if (at(p, n, '/')) {
3204                     p = parseHierarchical(p, n);
3205                 } else {
3206                     // opaque; need to create the schemeSpecificPart
3207                     int q = scan(p, n, "#");
3208                     if (q <= p)
3209                         failExpecting("scheme-specific part", p);
3210                     checkChars(p, q, L_URIC, H_URIC, "opaque part");
3211                     schemeSpecificPart = input.substring(p, q);
3212                     p = q;
3213                 }
3214             } else {
3215                 p = parseHierarchical(0, n);
3216             }
3217             if (at(p, n, '#')) {
3218                 checkChars(p + 1, n, L_URIC, H_URIC, "fragment");
3219                 fragment = input.substring(p + 1, n);
3220                 p = n;
3221             }
3222             if (p < n)
3223                 fail("end of URI", p);
3224         }
3225 
3226         // [//authority]<path>[?<query>]
3227         //
3228         // DEVIATION from RFC2396: We allow an empty authority component as
3229         // long as it's followed by a non-empty path, query component, or
3230         // fragment component.  This is so that URIs such as "file:///foo/bar"
3231         // will parse.  This seems to be the intent of RFC2396, though the
3232         // grammar does not permit it.  If the authority is empty then the
3233         // userInfo, host, and port components are undefined.
3234         //
3235         // DEVIATION from RFC2396: We allow empty relative paths.  This seems
3236         // to be the intent of RFC2396, but the grammar does not permit it.
3237         // The primary consequence of this deviation is that "#f" parses as a
3238         // relative URI with an empty path.
3239         //
3240         private int parseHierarchical(int start, int n)
3241             throws URISyntaxException
3242         {
3243             int p = start;
3244             if (at(p, n, '/') && at(p + 1, n, '/')) {
3245                 p += 2;
3246                 int q = scan(p, n, "/?#");
3247                 if (q > p) {
3248                     p = parseAuthority(p, q);
3249                 } else if (q < n) {
3250                     // DEVIATION: Allow empty authority prior to non-empty
3251                     // path, query component or fragment identifier
3252                 } else
3253                     failExpecting("authority", p);
3254             }
3255             int q = scan(p, n, "?#"); // DEVIATION: May be empty
3256             checkChars(p, q, L_PATH, H_PATH, "path");
3257             path = input.substring(p, q);
3258             p = q;
3259             if (at(p, n, '?')) {
3260                 p++;
3261                 q = scan(p, n, "#");
3262                 checkChars(p, q, L_URIC, H_URIC, "query");
3263                 query = input.substring(p, q);
3264                 p = q;
3265             }
3266             return p;
3267         }
3268 
3269         // authority     = server | reg_name
3270         //
3271         // Ambiguity: An authority that is a registry name rather than a server
3272         // might have a prefix that parses as a server.  We use the fact that
3273         // the authority component is always followed by '/' or the end of the
3274         // input string to resolve this: If the complete authority did not
3275         // parse as a server then we try to parse it as a registry name.
3276         //
3277         private int parseAuthority(int start, int n)
3278             throws URISyntaxException
3279         {
3280             int p = start;
3281             int q = p;
3282             int qreg = p;
3283             URISyntaxException ex = null;
3284 
3285             boolean serverChars;
3286             boolean regChars;
3287             boolean skipParseException;
3288 
3289             if (scan(p, n, "]") > p) {
3290                 // contains a literal IPv6 address, therefore % is allowed
3291                 serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n);
3292             } else {
3293                 serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
3294             }
3295             regChars = ((qreg = scan(p, n, L_REG_NAME, H_REG_NAME)) == n);
3296 
3297             if (regChars && !serverChars) {
3298                 // Must be a registry-based authority
3299                 authority = input.substring(p, n);
3300                 return n;
3301             }
3302 
3303             // When parsing a URI, skip creating exception objects if the server-based
3304             // authority is not required and the registry parse is successful.
3305             //
3306             skipParseException = (!requireServerAuthority && regChars);
3307             if (serverChars) {
3308                 // Might be (probably is) a server-based authority, so attempt
3309                 // to parse it as such.  If the attempt fails, try to treat it
3310                 // as a registry-based authority.
3311                 try {
3312                     q = parseServer(p, n, skipParseException);
3313                     if (q < n) {
3314                         if (skipParseException) {
3315                             userInfo = null;
3316                             host = null;
3317                             port = -1;
3318                             q = p;
3319                         } else {
3320                             failExpecting("end of authority", q);
3321                         }
3322                     } else {
3323                         authority = input.substring(p, n);
3324                     }
3325                 } catch (URISyntaxException x) {
3326                     // Undo results of failed parse
3327                     userInfo = null;
3328                     host = null;
3329                     port = -1;
3330                     if (requireServerAuthority) {
3331                         // If we're insisting upon a server-based authority,
3332                         // then just re-throw the exception
3333                         throw x;
3334                     } else {
3335                         // Save the exception in case it doesn't parse as a
3336                         // registry either
3337                         ex = x;
3338                         q = p;
3339                     }
3340                 }
3341             }
3342 
3343             if (q < n) {
3344                 if (regChars) {
3345                     // Registry-based authority
3346                     authority = input.substring(p, n);
3347                 } else if (ex != null) {
3348                     // Re-throw exception; it was probably due to
3349                     // a malformed IPv6 address
3350                     throw ex;
3351                 } else {
3352                     fail("Illegal character in authority", serverChars ? q : qreg);
3353                 }
3354             }
3355 
3356             return n;
3357         }
3358 
3359 
3360         // [<userinfo>@]<host>[:<port>]
3361         //
3362         private int parseServer(int start, int n, boolean skipParseException)
3363             throws URISyntaxException
3364         {
3365             int p = start;
3366             int q;
3367 
3368             // userinfo
3369             q = scan(p, n, "/?#", "@");
3370             if ((q >= p) && at(q, n, '@')) {
3371                 checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
3372                 userInfo = input.substring(p, q);
3373                 p = q + 1;              // Skip '@'
3374             }
3375 
3376             // hostname, IPv4 address, or IPv6 address
3377             if (at(p, n, '[')) {
3378                 // DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
3379                 p++;
3380                 q = scan(p, n, "/?#", "]");
3381                 if ((q > p) && at(q, n, ']')) {
3382                     // look for a "%" scope id
3383                     int r = scan (p, q, "%");
3384                     if (r > p) {
3385                         parseIPv6Reference(p, r);
3386                         if (r+1 == q) {
3387                             fail ("scope id expected");
3388                         }
3389                         checkChars (r+1, q, L_SCOPE_ID, H_SCOPE_ID,
3390                                                 "scope id");
3391                     } else {
3392                         parseIPv6Reference(p, q);
3393                     }
3394                     host = input.substring(p-1, q+1);
3395                     p = q + 1;
3396                 } else {
3397                     failExpecting("closing bracket for IPv6 address", q);
3398                 }
3399             } else {
3400                 q = parseIPv4Address(p, n);
3401                 if (q <= p)
3402                     q = parseHostname(p, n, skipParseException);
3403                 p = q;
3404             }
3405 
3406             // port
3407             if (at(p, n, ':')) {
3408                 p++;
3409                 q = scan(p, n, "/");
3410                 if (q > p) {
3411                     checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
3412                     try {
3413                         port = Integer.parseInt(input, p, q, 10);
3414                     } catch (NumberFormatException x) {
3415                         fail("Malformed port number", p);
3416                     }
3417                     p = q;
3418                 }
3419             } else if (p < n && skipParseException) {
3420                 return p;
3421             }
3422 
3423             if (p < n)
3424                 failExpecting("port number", p);
3425 
3426             return p;
3427         }
3428 
3429         // Scan a string of decimal digits whose value fits in a byte
3430         //
3431         private int scanByte(int start, int n)
3432             throws URISyntaxException
3433         {
3434             int p = start;
3435             int q = scan(p, n, L_DIGIT, H_DIGIT);
3436             if (q <= p) return q;
3437 
3438             // Handle leading zeros
3439             int i = p, j;
3440             while ((j = scan(i, q, '0')) > i) i = j;
3441 
3442             // Calculate the number of significant digits (after leading zeros)
3443             int significantDigitsNum = q - i;
3444 
3445             if (significantDigitsNum < 3)  return q; // definitely < 255
3446 
3447             // If more than 3 significant digits, it's definitely > 255
3448             if (significantDigitsNum > 3) return p;
3449 
3450             if (Integer.parseInt(input, p, q, 10) > 255) return p;
3451             return q;
3452         }
3453 
3454         // Scan an IPv4 address.
3455         //
3456         // If the strict argument is true then we require that the given
3457         // interval contain nothing besides an IPv4 address; if it is false
3458         // then we only require that it start with an IPv4 address.
3459         //
3460         // If the interval does not contain or start with (depending upon the
3461         // strict argument) a legal IPv4 address characters then we return -1
3462         // immediately; otherwise we insist that these characters parse as a
3463         // legal IPv4 address and throw an exception on failure.
3464         //
3465         // We assume that any string of decimal digits and dots must be an IPv4
3466         // address.  It won't parse as a hostname anyway, so making that
3467         // assumption here allows more meaningful exceptions to be thrown.
3468         //
3469         private int scanIPv4Address(int start, int n, boolean strict)
3470             throws URISyntaxException
3471         {
3472             int p = start;
3473             int q;
3474             int m = scan(p, n, L_DIGIT | L_DOT, H_DIGIT | H_DOT);
3475             if ((m <= p) || (strict && (m != n)))
3476                 return -1;
3477             for (;;) {
3478                 // Per RFC2732: At most three digits per byte
3479                 // Further constraint: Each element fits in a byte
3480                 if ((q = scanByte(p, m)) <= p) break;   p = q;
3481                 if ((q = scan(p, m, '.')) <= p) break;  p = q;
3482                 if ((q = scanByte(p, m)) <= p) break;   p = q;
3483                 if ((q = scan(p, m, '.')) <= p) break;  p = q;
3484                 if ((q = scanByte(p, m)) <= p) break;   p = q;
3485                 if ((q = scan(p, m, '.')) <= p) break;  p = q;
3486                 if ((q = scanByte(p, m)) <= p) break;   p = q;
3487                 if (q < m) break;
3488                 return q;
3489             }
3490             if (strict) fail("Malformed IPv4 address", q);
3491             return -1;
3492         }
3493 
3494         // Take an IPv4 address: Throw an exception if the given interval
3495         // contains anything except an IPv4 address
3496         //
3497         private int takeIPv4Address(int start, int n, String expected)
3498             throws URISyntaxException
3499         {
3500             int p = scanIPv4Address(start, n, true);
3501             if (p <= start)
3502                 failExpecting(expected, start);
3503             return p;
3504         }
3505 
3506         // Attempt to parse an IPv4 address, returning -1 on failure but
3507         // allowing the given interval to contain [:<characters>] after
3508         // the IPv4 address.
3509         //
3510         private int parseIPv4Address(int start, int n) {
3511             int p;
3512 
3513             try {
3514                 p = scanIPv4Address(start, n, false);
3515             } catch (URISyntaxException | NumberFormatException x) {
3516                 return -1;
3517             }
3518 
3519             if (p == -1) {
3520                 return p;
3521             }
3522 
3523             if (p > start && p < n) {
3524                 // IPv4 address is followed by something - check that
3525                 // it's a ":" as this is the only valid character to
3526                 // follow an address.
3527                 if (input.charAt(p) != ':') {
3528                     return -1;
3529                 }
3530             }
3531 
3532             if (p > start)
3533                 host = input.substring(start, p);
3534 
3535             return p;
3536         }
3537 
3538         // hostname      = domainlabel [ "." ] | 1*( domainlabel "." ) toplabel [ "." ]
3539         // domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
3540         // toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
3541         //
3542         private int parseHostname(int start, int n, boolean skipParseException)
3543             throws URISyntaxException
3544         {
3545             int p = start;
3546             int q;
3547             int l = -1;                 // Start of last parsed label
3548 
3549             do {
3550                 // domainlabel = alphanum [ *( alphanum | "-" ) alphanum ]
3551                 q = scan(p, n, L_ALPHANUM, H_ALPHANUM);
3552                 if (q <= p)
3553                     break;
3554                 l = p;
3555                 p = q;
3556                 q = scan(p, n, L_ALPHANUM | L_DASH, H_ALPHANUM | H_DASH);
3557                 if (q > p) {
3558                     if (input.charAt(q - 1) == '-')
3559                         fail("Illegal character in hostname", q - 1);
3560                     p = q;
3561                 }
3562                 q = scan(p, n, '.');
3563                 if (q <= p)
3564                     break;
3565                 p = q;
3566             } while (p < n);
3567 
3568             if ((p < n) && !at(p, n, ':')) {
3569                 if (skipParseException) {
3570                     return p;
3571                 }
3572                 fail("Illegal character in hostname", p);
3573             }
3574             if (l < 0)
3575                 failExpecting("hostname", start);
3576 
3577             // for a fully qualified hostname check that the rightmost
3578             // label starts with an alpha character.
3579             if (l > start && !match(input.charAt(l), L_ALPHA, H_ALPHA)) {
3580                 fail("Illegal character in hostname", l);
3581             }
3582 
3583             host = input.substring(start, p);
3584             return p;
3585         }
3586 
3587 
3588         // IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture
3589         //
3590         // Bug: The grammar in RFC2373 Appendix B does not allow addresses of
3591         // the form ::12.34.56.78, which are clearly shown in the examples
3592         // earlier in the document.  Here is the original grammar:
3593         //
3594         //   IPv6address = hexpart [ ":" IPv4address ]
3595         //   hexpart     = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]
3596         //   hexseq      = hex4 *( ":" hex4)
3597         //   hex4        = 1*4HEXDIG
3598         //
3599         // We therefore use the following revised grammar:
3600         //
3601         //   IPv6address = hexseq [ ":" IPv4address ]
3602         //                 | hexseq [ "::" [ hexpost ] ]
3603         //                 | "::" [ hexpost ]
3604         //   hexpost     = hexseq | hexseq ":" IPv4address | IPv4address
3605         //   hexseq      = hex4 *( ":" hex4)
3606         //   hex4        = 1*4HEXDIG
3607         //
3608         // This covers all and only the following cases:
3609         //
3610         //   hexseq
3611         //   hexseq : IPv4address
3612         //   hexseq ::
3613         //   hexseq :: hexseq
3614         //   hexseq :: hexseq : IPv4address
3615         //   hexseq :: IPv4address
3616         //   :: hexseq
3617         //   :: hexseq : IPv4address
3618         //   :: IPv4address
3619         //   ::
3620         //
3621         // Additionally we constrain the IPv6 address as follows :-
3622         //
3623         //  i.  IPv6 addresses without compressed zeros should contain
3624         //      exactly 16 bytes.
3625         //
3626         //  ii. IPv6 addresses with compressed zeros should contain
3627         //      less than 16 bytes.
3628 
3629         private int ipv6byteCount = 0;
3630 
3631         private int parseIPv6Reference(int start, int n)
3632             throws URISyntaxException
3633         {
3634             int p = start;
3635             int q;
3636             boolean compressedZeros = false;
3637 
3638             q = scanHexSeq(p, n);
3639 
3640             if (q > p) {
3641                 p = q;
3642                 if (at(p, n, "::")) {
3643                     compressedZeros = true;
3644                     p = scanHexPost(p + 2, n);
3645                 } else if (at(p, n, ':')) {
3646                     p = takeIPv4Address(p + 1,  n, "IPv4 address");
3647                     ipv6byteCount += 4;
3648                 }
3649             } else if (at(p, n, "::")) {
3650                 compressedZeros = true;
3651                 p = scanHexPost(p + 2, n);
3652             }
3653             if (p < n)
3654                 fail("Malformed IPv6 address", start);
3655             if (ipv6byteCount > 16)
3656                 fail("IPv6 address too long", start);
3657             if (!compressedZeros && ipv6byteCount < 16)
3658                 fail("IPv6 address too short", start);
3659             if (compressedZeros && ipv6byteCount == 16)
3660                 fail("Malformed IPv6 address", start);
3661 
3662             return p;
3663         }
3664 
3665         private int scanHexPost(int start, int n)
3666             throws URISyntaxException
3667         {
3668             int p = start;
3669             int q;
3670 
3671             if (p == n)
3672                 return p;
3673 
3674             q = scanHexSeq(p, n);
3675             if (q > p) {
3676                 p = q;
3677                 if (at(p, n, ':')) {
3678                     p++;
3679                     p = takeIPv4Address(p, n, "hex digits or IPv4 address");
3680                     ipv6byteCount += 4;
3681                 }
3682             } else {
3683                 p = takeIPv4Address(p, n, "hex digits or IPv4 address");
3684                 ipv6byteCount += 4;
3685             }
3686             return p;
3687         }
3688 
3689         // Scan a hex sequence; return -1 if one could not be scanned
3690         //
3691         private int scanHexSeq(int start, int n)
3692             throws URISyntaxException
3693         {
3694             int p = start;
3695             int q;
3696 
3697             q = scan(p, n, L_HEX, H_HEX);
3698             if (q <= p)
3699                 return -1;
3700             if (at(q, n, '.'))          // Beginning of IPv4 address
3701                 return -1;
3702             if (q > p + 4)
3703                 fail("IPv6 hexadecimal digit sequence too long", p);
3704             ipv6byteCount += 2;
3705             p = q;
3706             while (p < n) {
3707                 if (!at(p, n, ':'))
3708                     break;
3709                 if (at(p + 1, n, ':'))
3710                     break;              // "::"
3711                 p++;
3712                 q = scan(p, n, L_HEX, H_HEX);
3713                 if (q <= p)
3714                     failExpecting("digits for an IPv6 address", p);
3715                 if (at(q, n, '.')) {    // Beginning of IPv4 address
3716                     p--;
3717                     break;
3718                 }
3719                 if (q > p + 4)
3720                     fail("IPv6 hexadecimal digit sequence too long", p);
3721                 ipv6byteCount += 2;
3722                 p = q;
3723             }
3724 
3725             return p;
3726         }
3727 
3728     }
3729 
3730     static {
3731         runtimeSetup();
3732     }
3733 
3734     // Called from JVM when loading an AOT cache
3735     private static void runtimeSetup() {
3736         SharedSecrets.setJavaNetUriAccess(
3737             new JavaNetUriAccess() {
3738                 public URI create(String scheme, String path) {
3739                     return new URI(scheme, path);
3740                 }
3741             }
3742         );
3743     }
3744 }