1 /*
   2  * Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.ObjectStreamField;
  29 import java.io.UnsupportedEncodingException;
  30 import java.lang.annotation.Native;
  31 import java.lang.foreign.MemorySegment;
  32 import java.lang.foreign.ValueLayout;
  33 import java.lang.invoke.MethodHandles;
  34 import java.lang.constant.Constable;
  35 import java.lang.constant.ConstantDesc;
  36 import java.nio.ByteBuffer;
  37 import java.nio.CharBuffer;
  38 import java.nio.charset.*;
  39 import java.util.ArrayList;
  40 import java.util.Arrays;
  41 import java.util.Comparator;
  42 import java.util.Formatter;
  43 import java.util.List;
  44 import java.util.Locale;
  45 import java.util.Objects;
  46 import java.util.Optional;
  47 import java.util.Spliterator;
  48 import java.util.function.Function;
  49 import java.util.regex.Pattern;
  50 import java.util.regex.PatternSyntaxException;
  51 import java.util.stream.Collectors;
  52 import java.util.stream.IntStream;
  53 import java.util.stream.Stream;
  54 import java.util.stream.StreamSupport;
  55 
  56 import jdk.internal.util.ArraysSupport;
  57 import jdk.internal.util.Preconditions;
  58 import jdk.internal.vm.annotation.ForceInline;
  59 import jdk.internal.vm.annotation.IntrinsicCandidate;
  60 import jdk.internal.vm.annotation.Stable;
  61 import sun.nio.cs.ArrayDecoder;
  62 import sun.nio.cs.ArrayEncoder;
  63 
  64 import sun.nio.cs.ISO_8859_1;
  65 import sun.nio.cs.US_ASCII;
  66 import sun.nio.cs.UTF_8;
  67 
  68 /**
  69  * The {@code String} class represents character strings. All
  70  * string literals in Java programs, such as {@code "abc"}, are
  71  * implemented as instances of this class.
  72  * <p>
  73  * Strings are constant; their values cannot be changed after they
  74  * are created. String buffers support mutable strings.
  75  * Because String objects are immutable they can be shared. For example:
  76  * <blockquote><pre>
  77  *     String str = "abc";
  78  * </pre></blockquote><p>
  79  * is equivalent to:
  80  * <blockquote><pre>
  81  *     char data[] = {'a', 'b', 'c'};
  82  *     String str = new String(data);
  83  * </pre></blockquote><p>
  84  * Here are some more examples of how strings can be used:
  85  * <blockquote><pre>
  86  *     System.out.println("abc");
  87  *     String cde = "cde";
  88  *     System.out.println("abc" + cde);
  89  *     String c = "abc".substring(2, 3);
  90  *     String d = cde.substring(1, 2);
  91  * </pre></blockquote>
  92  * <p>
  93  * The class {@code String} includes methods for examining
  94  * individual characters of the sequence, for comparing strings, for
  95  * searching strings, for extracting substrings, and for creating a
  96  * copy of a string with all characters translated to uppercase or to
  97  * lowercase. Case mapping is based on the Unicode Standard version
  98  * specified by the {@link java.lang.Character Character} class.
  99  * <p>
 100  * The Java language provides special support for the string
 101  * concatenation operator (&nbsp;+&nbsp;), and for conversion of
 102  * other objects to strings. For additional information on string
 103  * concatenation and conversion, see <i>The Java Language Specification</i>.
 104  *
 105  * <p> Unless otherwise noted, passing a {@code null} argument to a constructor
 106  * or method in this class will cause a {@link NullPointerException} to be
 107  * thrown.
 108  *
 109  * <p>A {@code String} represents a string in the UTF-16 format
 110  * in which <em>supplementary characters</em> are represented by <em>surrogate
 111  * pairs</em> (see the section <a href="Character.html#unicode">Unicode
 112  * Character Representations</a> in the {@code Character} class for
 113  * more information).
 114  * Index values refer to {@code char} code units, so a supplementary
 115  * character uses two positions in a {@code String}.
 116  * <p>The {@code String} class provides methods for dealing with
 117  * Unicode code points (i.e., characters), in addition to those for
 118  * dealing with Unicode code units (i.e., {@code char} values).
 119  *
 120  * <p>Unless otherwise noted, methods for comparing Strings do not take locale
 121  * into account.  The {@link java.text.Collator} class provides methods for
 122  * finer-grain, locale-sensitive String comparison.
 123  *
 124  * @implNote The implementation of the string concatenation operator is left to
 125  * the discretion of a Java compiler, as long as the compiler ultimately conforms
 126  * to <i>The Java Language Specification</i>. For example, the {@code javac} compiler
 127  * may implement the operator with {@code StringBuffer}, {@code StringBuilder},
 128  * or {@code java.lang.invoke.StringConcatFactory} depending on the JDK version. The
 129  * implementation of string conversion is typically through the method {@code toString},
 130  * defined by {@code Object} and inherited by all classes in Java.
 131  *
 132  * @author  Lee Boynton
 133  * @author  Arthur van Hoff
 134  * @author  Martin Buchholz
 135  * @author  Ulf Zibis
 136  * @see     java.lang.Object#toString()
 137  * @see     java.lang.StringBuffer
 138  * @see     java.lang.StringBuilder
 139  * @see     java.nio.charset.Charset
 140  * @since   1.0
 141  * @jls     15.18.1 String Concatenation Operator +
 142  */
 143 
 144 public final class String
 145     implements java.io.Serializable, Comparable<String>, CharSequence,
 146                Constable, ConstantDesc {
 147 
 148     /**
 149      * The value is used for character storage.
 150      *
 151      * @implNote This field is trusted by the VM, and is a subject to
 152      * constant folding if String instance is constant. Overwriting this
 153      * field after construction will cause problems.
 154      *
 155      * Additionally, it is marked with {@link Stable} to trust the contents
 156      * of the array. No other facility in JDK provides this functionality (yet).
 157      * {@link Stable} is safe here, because value is never null.
 158      */
 159     @Stable
 160     private final byte[] value;
 161 
 162     /**
 163      * The identifier of the encoding used to encode the bytes in
 164      * {@code value}. The supported values in this implementation are
 165      *
 166      * LATIN1
 167      * UTF16
 168      *
 169      * @implNote This field is trusted by the VM, and is a subject to
 170      * constant folding if String instance is constant. Overwriting this
 171      * field after construction will cause problems.
 172      */
 173     private final byte coder;
 174 
 175     /** Cache the hash code for the string */
 176     private int hash; // Default to 0
 177 
 178     /**
 179      * Cache if the hash has been calculated as actually being zero, enabling
 180      * us to avoid recalculating this.
 181      */
 182     private boolean hashIsZero; // Default to false;
 183 
 184     /** use serialVersionUID from JDK 1.0.2 for interoperability */
 185     @java.io.Serial
 186     private static final long serialVersionUID = -6849794470754667710L;
 187 
 188     /**
 189      * If String compaction is disabled, the bytes in {@code value} are
 190      * always encoded in UTF16.
 191      *
 192      * For methods with several possible implementation paths, when String
 193      * compaction is disabled, only one code path is taken.
 194      *
 195      * The instance field value is generally opaque to optimizing JIT
 196      * compilers. Therefore, in performance-sensitive place, an explicit
 197      * check of the static boolean {@code COMPACT_STRINGS} is done first
 198      * before checking the {@code coder} field since the static boolean
 199      * {@code COMPACT_STRINGS} would be constant folded away by an
 200      * optimizing JIT compiler. The idioms for these cases are as follows.
 201      *
 202      * For code such as:
 203      *
 204      *    if (coder == LATIN1) { ... }
 205      *
 206      * can be written more optimally as
 207      *
 208      *    if (coder() == LATIN1) { ... }
 209      *
 210      * or:
 211      *
 212      *    if (COMPACT_STRINGS && coder == LATIN1) { ... }
 213      *
 214      * An optimizing JIT compiler can fold the above conditional as:
 215      *
 216      *    COMPACT_STRINGS == true  => if (coder == LATIN1) { ... }
 217      *    COMPACT_STRINGS == false => if (false)           { ... }
 218      *
 219      * @implNote
 220      * The actual value for this field is injected by JVM. The static
 221      * initialization block is used to set the value here to communicate
 222      * that this static final field is not statically foldable, and to
 223      * avoid any possible circular dependency during vm initialization.
 224      */
 225     static final boolean COMPACT_STRINGS;
 226 
 227     static {
 228         COMPACT_STRINGS = true;
 229     }
 230 
 231     /**
 232      * Class String is special cased within the Serialization Stream Protocol.
 233      *
 234      * A String instance is written into an ObjectOutputStream according to
 235      * <a href="{@docRoot}/../specs/serialization/protocol.html#stream-elements">
 236      * <cite>Java Object Serialization Specification</cite>, Section 6.2, "Stream Elements"</a>
 237      */
 238     @java.io.Serial
 239     private static final ObjectStreamField[] serialPersistentFields =
 240         new ObjectStreamField[0];
 241 
 242     /**
 243      * Initializes a newly created {@code String} object so that it represents
 244      * an empty character sequence.  Note that use of this constructor is
 245      * unnecessary since Strings are immutable.
 246      */
 247     public String() {
 248         this.value = "".value;
 249         this.coder = "".coder;
 250     }
 251 
 252     /**
 253      * Initializes a newly created {@code String} object so that it represents
 254      * the same sequence of characters as the argument; in other words, the
 255      * newly created string is a copy of the argument string. Unless an
 256      * explicit copy of {@code original} is needed, use of this constructor is
 257      * unnecessary since Strings are immutable.
 258      *
 259      * @param  original
 260      *         A {@code String}
 261      */
 262     @IntrinsicCandidate
 263     public String(String original) {
 264         this.value = original.value;
 265         this.coder = original.coder;
 266         this.hash = original.hash;
 267         this.hashIsZero = original.hashIsZero;
 268     }
 269 
 270     /**
 271      * Allocates a new {@code String} so that it represents the sequence of
 272      * characters currently contained in the character array argument. The
 273      * contents of the character array are copied; subsequent modification of
 274      * the character array does not affect the newly created string.
 275      *
 276      * @param  value
 277      *         The initial value of the string
 278      */
 279     public String(char[] value) {
 280         this(value, 0, value.length, null);
 281     }
 282 
 283     /**
 284      * Allocates a new {@code String} that contains characters from a subarray
 285      * of the character array argument. The {@code offset} argument is the
 286      * index of the first character of the subarray and the {@code count}
 287      * argument specifies the length of the subarray. The contents of the
 288      * subarray are copied; subsequent modification of the character array does
 289      * not affect the newly created string.
 290      *
 291      * @param  value
 292      *         Array that is the source of characters
 293      *
 294      * @param  offset
 295      *         The initial offset
 296      *
 297      * @param  count
 298      *         The length
 299      *
 300      * @throws  IndexOutOfBoundsException
 301      *          If {@code offset} is negative, {@code count} is negative, or
 302      *          {@code offset} is greater than {@code value.length - count}
 303      */
 304     public String(char[] value, int offset, int count) {
 305         this(value, offset, count, rangeCheck(value, offset, count));
 306     }
 307 
 308     private static Void rangeCheck(char[] value, int offset, int count) {
 309         checkBoundsOffCount(offset, count, value.length);
 310         return null;
 311     }
 312 
 313     /**
 314      * Allocates a new {@code String} that contains characters from a subarray
 315      * of the <a href="Character.html#unicode">Unicode code point</a> array
 316      * argument.  The {@code offset} argument is the index of the first code
 317      * point of the subarray and the {@code count} argument specifies the
 318      * length of the subarray.  The contents of the subarray are converted to
 319      * {@code char}s; subsequent modification of the {@code int} array does not
 320      * affect the newly created string.
 321      *
 322      * @param  codePoints
 323      *         Array that is the source of Unicode code points
 324      *
 325      * @param  offset
 326      *         The initial offset
 327      *
 328      * @param  count
 329      *         The length
 330      *
 331      * @throws  IllegalArgumentException
 332      *          If any invalid Unicode code point is found in {@code
 333      *          codePoints}
 334      *
 335      * @throws  IndexOutOfBoundsException
 336      *          If {@code offset} is negative, {@code count} is negative, or
 337      *          {@code offset} is greater than {@code codePoints.length - count}
 338      *
 339      * @since  1.5
 340      */
 341     public String(int[] codePoints, int offset, int count) {
 342         checkBoundsOffCount(offset, count, codePoints.length);
 343         if (count == 0) {
 344             this.value = "".value;
 345             this.coder = "".coder;
 346             return;
 347         }
 348         if (COMPACT_STRINGS) {
 349             byte[] val = StringLatin1.toBytes(codePoints, offset, count);
 350             if (val != null) {
 351                 this.coder = LATIN1;
 352                 this.value = val;
 353                 return;
 354             }
 355         }
 356         this.coder = UTF16;
 357         this.value = StringUTF16.toBytes(codePoints, offset, count);
 358     }
 359 
 360     /**
 361      * Allocates a new {@code String} constructed from a subarray of an array
 362      * of 8-bit integer values.
 363      *
 364      * <p> The {@code offset} argument is the index of the first byte of the
 365      * subarray, and the {@code count} argument specifies the length of the
 366      * subarray.
 367      *
 368      * <p> Each {@code byte} in the subarray is converted to a {@code char} as
 369      * specified in the {@link #String(byte[],int) String(byte[],int)} constructor.
 370      *
 371      * @deprecated This method does not properly convert bytes into characters.
 372      * As of JDK&nbsp;1.1, the preferred way to do this is via the
 373      * {@code String} constructors that take a {@link Charset}, charset name,
 374      * or that use the {@link Charset#defaultCharset() default charset}.
 375      *
 376      * @param  ascii
 377      *         The bytes to be converted to characters
 378      *
 379      * @param  hibyte
 380      *         The top 8 bits of each 16-bit Unicode code unit
 381      *
 382      * @param  offset
 383      *         The initial offset
 384      * @param  count
 385      *         The length
 386      *
 387      * @throws  IndexOutOfBoundsException
 388      *          If {@code offset} is negative, {@code count} is negative, or
 389      *          {@code offset} is greater than {@code ascii.length - count}
 390      *
 391      * @see  #String(byte[], int)
 392      * @see  #String(byte[], int, int, java.lang.String)
 393      * @see  #String(byte[], int, int, java.nio.charset.Charset)
 394      * @see  #String(byte[], int, int)
 395      * @see  #String(byte[], java.lang.String)
 396      * @see  #String(byte[], java.nio.charset.Charset)
 397      * @see  #String(byte[])
 398      */
 399     @Deprecated(since="1.1")
 400     public String(byte[] ascii, int hibyte, int offset, int count) {
 401         checkBoundsOffCount(offset, count, ascii.length);
 402         if (count == 0) {
 403             this.value = "".value;
 404             this.coder = "".coder;
 405             return;
 406         }
 407         if (COMPACT_STRINGS && (byte)hibyte == 0) {
 408             this.value = Arrays.copyOfRange(ascii, offset, offset + count);
 409             this.coder = LATIN1;
 410         } else {
 411             hibyte <<= 8;
 412             byte[] val = StringUTF16.newBytesFor(count);
 413             for (int i = 0; i < count; i++) {
 414                 StringUTF16.putChar(val, i, hibyte | (ascii[offset++] & 0xff));
 415             }
 416             this.value = val;
 417             this.coder = UTF16;
 418         }
 419     }
 420 
 421     /**
 422      * Allocates a new {@code String} containing characters constructed from
 423      * an array of 8-bit integer values. Each character <i>c</i> in the
 424      * resulting string is constructed from the corresponding component
 425      * <i>b</i> in the byte array such that:
 426      *
 427      * <blockquote><pre>
 428      *     <b><i>c</i></b> == (char)(((hibyte &amp; 0xff) &lt;&lt; 8)
 429      *                         | (<b><i>b</i></b> &amp; 0xff))
 430      * </pre></blockquote>
 431      *
 432      * @deprecated  This method does not properly convert bytes into
 433      * characters.  As of JDK&nbsp;1.1, the preferred way to do this is via the
 434      * {@code String} constructors that take a {@link Charset}, charset name,
 435      * or that use the {@link Charset#defaultCharset() default charset}.
 436      *
 437      * @param  ascii
 438      *         The bytes to be converted to characters
 439      *
 440      * @param  hibyte
 441      *         The top 8 bits of each 16-bit Unicode code unit
 442      *
 443      * @see  #String(byte[], int, int, java.lang.String)
 444      * @see  #String(byte[], int, int, java.nio.charset.Charset)
 445      * @see  #String(byte[], int, int)
 446      * @see  #String(byte[], java.lang.String)
 447      * @see  #String(byte[], java.nio.charset.Charset)
 448      * @see  #String(byte[])
 449      */
 450     @Deprecated(since="1.1")
 451     public String(byte[] ascii, int hibyte) {
 452         this(ascii, hibyte, 0, ascii.length);
 453     }
 454 
 455     /**
 456      * Constructs a new {@code String} by decoding the specified subarray of
 457      * bytes using the specified charset.  The length of the new {@code String}
 458      * is a function of the charset, and hence may not be equal to the length
 459      * of the subarray.
 460      *
 461      * <p> The behavior of this constructor when the given bytes are not valid
 462      * in the given charset is unspecified.  The {@link
 463      * java.nio.charset.CharsetDecoder} class should be used when more control
 464      * over the decoding process is required.
 465      *
 466      * @param  bytes
 467      *         The bytes to be decoded into characters
 468      *
 469      * @param  offset
 470      *         The index of the first byte to decode
 471      *
 472      * @param  length
 473      *         The number of bytes to decode
 474      *
 475      * @param  charsetName
 476      *         The name of a supported {@linkplain java.nio.charset.Charset
 477      *         charset}
 478      *
 479      * @throws  UnsupportedEncodingException
 480      *          If the named charset is not supported
 481      *
 482      * @throws  IndexOutOfBoundsException
 483      *          If {@code offset} is negative, {@code length} is negative, or
 484      *          {@code offset} is greater than {@code bytes.length - length}
 485      *
 486      * @since  1.1
 487      */
 488     public String(byte[] bytes, int offset, int length, String charsetName)
 489             throws UnsupportedEncodingException {
 490         this(lookupCharset(charsetName), bytes, checkBoundsOffCount(offset, length, bytes.length), length);
 491     }
 492 
 493     /**
 494      * Constructs a new {@code String} by decoding the specified subarray of
 495      * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
 496      * The length of the new {@code String} is a function of the charset, and
 497      * hence may not be equal to the length of the subarray.
 498      *
 499      * <p> This method always replaces malformed-input and unmappable-character
 500      * sequences with this charset's default replacement string.  The {@link
 501      * java.nio.charset.CharsetDecoder} class should be used when more control
 502      * over the decoding process is required.
 503      *
 504      * @param  bytes
 505      *         The bytes to be decoded into characters
 506      *
 507      * @param  offset
 508      *         The index of the first byte to decode
 509      *
 510      * @param  length
 511      *         The number of bytes to decode
 512      *
 513      * @param  charset
 514      *         The {@linkplain java.nio.charset.Charset charset} to be used to
 515      *         decode the {@code bytes}
 516      *
 517      * @throws  IndexOutOfBoundsException
 518      *          If {@code offset} is negative, {@code length} is negative, or
 519      *          {@code offset} is greater than {@code bytes.length - length}
 520      *
 521      * @since  1.6
 522      */
 523     public String(byte[] bytes, int offset, int length, Charset charset) {
 524         this(Objects.requireNonNull(charset), bytes, checkBoundsOffCount(offset, length, bytes.length), length);
 525     }
 526 
 527     /**
 528      * This method does not do any precondition checks on its arguments.
 529      * <p>
 530      * Important: parameter order of this method is deliberately changed in order to
 531      * disambiguate it against other similar methods of this class.
 532      */
 533     @SuppressWarnings("removal")
 534     private String(Charset charset, byte[] bytes, int offset, int length) {
 535         if (length == 0) {
 536             this.value = "".value;
 537             this.coder = "".coder;
 538         } else if (charset == UTF_8.INSTANCE) {
 539             if (COMPACT_STRINGS) {
 540                 int dp = StringCoding.countPositives(bytes, offset, length);
 541                 if (dp == length) {
 542                     this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 543                     this.coder = LATIN1;
 544                     return;
 545                 }
 546                 int sl = offset + length;
 547                 byte[] dst = new byte[length];
 548                 if (dp > 0) {
 549                     System.arraycopy(bytes, offset, dst, 0, dp);
 550                     offset += dp;
 551                 }
 552                 while (offset < sl) {
 553                     int b1 = bytes[offset++];
 554                     if (b1 >= 0) {
 555                         dst[dp++] = (byte)b1;
 556                         continue;
 557                     }
 558                     if ((b1 & 0xfe) == 0xc2 && offset < sl) { // b1 either 0xc2 or 0xc3
 559                         int b2 = bytes[offset];
 560                         if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
 561                             dst[dp++] = (byte)decode2(b1, b2);
 562                             offset++;
 563                             continue;
 564                         }
 565                     }
 566                     // anything not a latin1, including the REPL
 567                     // we have to go with the utf16
 568                     offset--;
 569                     break;
 570                 }
 571                 if (offset == sl) {
 572                     if (dp != dst.length) {
 573                         dst = Arrays.copyOf(dst, dp);
 574                     }
 575                     this.value = dst;
 576                     this.coder = LATIN1;
 577                     return;
 578                 }
 579                 byte[] buf = new byte[length << 1];
 580                 StringLatin1.inflate(dst, 0, buf, 0, dp);
 581                 dst = buf;
 582                 dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, true);
 583                 if (dp != length) {
 584                     dst = Arrays.copyOf(dst, dp << 1);
 585                 }
 586                 this.value = dst;
 587                 this.coder = UTF16;
 588             } else { // !COMPACT_STRINGS
 589                 byte[] dst = new byte[length << 1];
 590                 int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true);
 591                 if (dp != length) {
 592                     dst = Arrays.copyOf(dst, dp << 1);
 593                 }
 594                 this.value = dst;
 595                 this.coder = UTF16;
 596             }
 597         } else if (charset == ISO_8859_1.INSTANCE) {
 598             if (COMPACT_STRINGS) {
 599                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 600                 this.coder = LATIN1;
 601             } else {
 602                 this.value = StringLatin1.inflate(bytes, offset, length);
 603                 this.coder = UTF16;
 604             }
 605         } else if (charset == US_ASCII.INSTANCE) {
 606             if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
 607                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 608                 this.coder = LATIN1;
 609             } else {
 610                 byte[] dst = new byte[length << 1];
 611                 int dp = 0;
 612                 while (dp < length) {
 613                     int b = bytes[offset++];
 614                     StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
 615                 }
 616                 this.value = dst;
 617                 this.coder = UTF16;
 618             }
 619         } else {
 620             // (1)We never cache the "external" cs, the only benefit of creating
 621             // an additional StringDe/Encoder object to wrap it is to share the
 622             // de/encode() method. These SD/E objects are short-lived, the young-gen
 623             // gc should be able to take care of them well. But the best approach
 624             // is still not to generate them if not really necessary.
 625             // (2)The defensive copy of the input byte/char[] has a big performance
 626             // impact, as well as the outgoing result byte/char[]. Need to do the
 627             // optimization check of (sm==null && classLoader0==null) for both.
 628             CharsetDecoder cd = charset.newDecoder();
 629             // ArrayDecoder fastpaths
 630             if (cd instanceof ArrayDecoder ad) {
 631                 // ascii
 632                 if (ad.isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
 633                     if (COMPACT_STRINGS) {
 634                         this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 635                         this.coder = LATIN1;
 636                         return;
 637                     }
 638                     this.value = StringLatin1.inflate(bytes, offset, length);
 639                     this.coder = UTF16;
 640                     return;
 641                 }
 642 
 643                 // fastpath for always Latin1 decodable single byte
 644                 if (COMPACT_STRINGS && ad.isLatin1Decodable()) {
 645                     byte[] dst = new byte[length];
 646                     ad.decodeToLatin1(bytes, offset, length, dst);
 647                     this.value = dst;
 648                     this.coder = LATIN1;
 649                     return;
 650                 }
 651 
 652                 int en = scale(length, cd.maxCharsPerByte());
 653                 cd.onMalformedInput(CodingErrorAction.REPLACE)
 654                         .onUnmappableCharacter(CodingErrorAction.REPLACE);
 655                 char[] ca = new char[en];
 656                 int clen = ad.decode(bytes, offset, length, ca);
 657                 if (COMPACT_STRINGS) {
 658                     byte[] bs = StringUTF16.compress(ca, 0, clen);
 659                     if (bs != null) {
 660                         value = bs;
 661                         coder = LATIN1;
 662                         return;
 663                     }
 664                 }
 665                 coder = UTF16;
 666                 value = StringUTF16.toBytes(ca, 0, clen);
 667                 return;
 668             }
 669 
 670             // decode using CharsetDecoder
 671             int en = scale(length, cd.maxCharsPerByte());
 672             cd.onMalformedInput(CodingErrorAction.REPLACE)
 673                     .onUnmappableCharacter(CodingErrorAction.REPLACE);
 674             char[] ca = new char[en];
 675             if (charset.getClass().getClassLoader0() != null &&
 676                     System.getSecurityManager() != null) {
 677                 bytes = Arrays.copyOfRange(bytes, offset, offset + length);
 678                 offset = 0;
 679             }
 680 
 681             int caLen;
 682             try {
 683                 caLen = decodeWithDecoder(cd, ca, bytes, offset, length);
 684             } catch (CharacterCodingException x) {
 685                 // Substitution is enabled, so this shouldn't happen
 686                 throw new Error(x);
 687             }
 688             if (COMPACT_STRINGS) {
 689                 byte[] bs = StringUTF16.compress(ca, 0, caLen);
 690                 if (bs != null) {
 691                     value = bs;
 692                     coder = LATIN1;
 693                     return;
 694                 }
 695             }
 696             coder = UTF16;
 697             value = StringUTF16.toBytes(ca, 0, caLen);
 698         }
 699     }
 700 
 701     /*
 702      * Throws iae, instead of replacing, if malformed or unmappable.
 703      *
 704      * @param  noShare
 705      *         {@code true} if the resulting string MUST NOT share the byte array,
 706      *         {@code false} if the byte array can be exclusively used to construct
 707      *         the string and is not modified or used for any other purpose.
 708      */
 709     static String newStringUTF8NoRepl(byte[] bytes, int offset, int length, boolean noShare) {
 710         checkBoundsOffCount(offset, length, bytes.length);
 711         if (length == 0) {
 712             return "";
 713         }
 714         int dp;
 715         byte[] dst;
 716         if (COMPACT_STRINGS) {
 717             dp = StringCoding.countPositives(bytes, offset, length);
 718             int sl = offset + length;
 719             if (dp == length) {
 720                 if (noShare || length != bytes.length) {
 721                     return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
 722                 } else {
 723                     return new String(bytes, LATIN1);
 724                 }
 725             }
 726             dst = new byte[length];
 727             System.arraycopy(bytes, offset, dst, 0, dp);
 728             offset += dp;
 729             while (offset < sl) {
 730                 int b1 = bytes[offset++];
 731                 if (b1 >= 0) {
 732                     dst[dp++] = (byte)b1;
 733                     continue;
 734                 }
 735                 if ((b1 & 0xfe) == 0xc2 && offset < sl) { // b1 either 0xc2 or 0xc3
 736                     int b2 = bytes[offset];
 737                     if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
 738                         dst[dp++] = (byte)decode2(b1, b2);
 739                         offset++;
 740                         continue;
 741                     }
 742                 }
 743                 // anything not a latin1, including the REPL
 744                 // we have to go with the utf16
 745                 offset--;
 746                 break;
 747             }
 748             if (offset == sl) {
 749                 if (dp != dst.length) {
 750                     dst = Arrays.copyOf(dst, dp);
 751                 }
 752                 return new String(dst, LATIN1);
 753             }
 754             if (dp == 0) {
 755                 dst = new byte[length << 1];
 756             } else {
 757                 byte[] buf = new byte[length << 1];
 758                 StringLatin1.inflate(dst, 0, buf, 0, dp);
 759                 dst = buf;
 760             }
 761             dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
 762         } else { // !COMPACT_STRINGS
 763             dst = new byte[length << 1];
 764             dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, false);
 765         }
 766         if (dp != length) {
 767             dst = Arrays.copyOf(dst, dp << 1);
 768         }
 769         return new String(dst, UTF16);
 770     }
 771 
 772     static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {
 773         try {
 774             return newStringNoRepl1(src, cs);
 775         } catch (IllegalArgumentException e) {
 776             //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause
 777             Throwable cause = e.getCause();
 778             if (cause instanceof MalformedInputException mie) {
 779                 throw mie;
 780             }
 781             throw (CharacterCodingException)cause;
 782         }
 783     }
 784 
 785     @SuppressWarnings("removal")
 786     private static String newStringNoRepl1(byte[] src, Charset cs) {
 787         int len = src.length;
 788         if (len == 0) {
 789             return "";
 790         }
 791         if (cs == UTF_8.INSTANCE) {
 792             return newStringUTF8NoRepl(src, 0, src.length, false);
 793         }
 794         if (cs == ISO_8859_1.INSTANCE) {
 795             if (COMPACT_STRINGS)
 796                 return new String(src, LATIN1);
 797             return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
 798         }
 799         if (cs == US_ASCII.INSTANCE) {
 800             if (!StringCoding.hasNegatives(src, 0, src.length)) {
 801                 if (COMPACT_STRINGS)
 802                     return new String(src, LATIN1);
 803                 return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
 804             } else {
 805                 throwMalformed(src);
 806             }
 807         }
 808 
 809         CharsetDecoder cd = cs.newDecoder();
 810         // ascii fastpath
 811         if (cd instanceof ArrayDecoder ad &&
 812                 ad.isASCIICompatible() &&
 813                 !StringCoding.hasNegatives(src, 0, src.length)) {
 814             if (COMPACT_STRINGS)
 815                 return new String(src, LATIN1);
 816             return new String(src, 0, src.length, ISO_8859_1.INSTANCE);
 817         }
 818         int en = scale(len, cd.maxCharsPerByte());
 819         char[] ca = new char[en];
 820         if (cs.getClass().getClassLoader0() != null &&
 821                 System.getSecurityManager() != null) {
 822             src = Arrays.copyOf(src, len);
 823         }
 824         int caLen;
 825         try {
 826             caLen = decodeWithDecoder(cd, ca, src, 0, src.length);
 827         } catch (CharacterCodingException x) {
 828             // throw via IAE
 829             throw new IllegalArgumentException(x);
 830         }
 831         if (COMPACT_STRINGS) {
 832             byte[] bs = StringUTF16.compress(ca, 0, caLen);
 833             if (bs != null) {
 834                 return new String(bs, LATIN1);
 835             }
 836         }
 837         return new String(StringUTF16.toBytes(ca, 0, caLen), UTF16);
 838     }
 839 
 840     private static final char REPL = '\ufffd';
 841 
 842     // Trim the given byte array to the given length
 843     @SuppressWarnings("removal")
 844     private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
 845         if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) {
 846             return ba;
 847         } else {
 848             return Arrays.copyOf(ba, len);
 849         }
 850     }
 851 
 852     private static int scale(int len, float expansionFactor) {
 853         // We need to perform double, not float, arithmetic; otherwise
 854         // we lose low order bits when len is larger than 2**24.
 855         return (int)(len * (double)expansionFactor);
 856     }
 857 
 858     private static Charset lookupCharset(String csn) throws UnsupportedEncodingException {
 859         Objects.requireNonNull(csn);
 860         try {
 861             return Charset.forName(csn);
 862         } catch (UnsupportedCharsetException | IllegalCharsetNameException x) {
 863             throw new UnsupportedEncodingException(csn);
 864         }
 865     }
 866 
 867     private static byte[] encode(Charset cs, byte coder, byte[] val) {
 868         if (cs == UTF_8.INSTANCE) {
 869             return encodeUTF8(coder, val, true);
 870         }
 871         if (cs == ISO_8859_1.INSTANCE) {
 872             return encode8859_1(coder, val);
 873         }
 874         if (cs == US_ASCII.INSTANCE) {
 875             return encodeASCII(coder, val);
 876         }
 877         return encodeWithEncoder(cs, coder, val, true);
 878     }
 879 
 880     private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, boolean doReplace) {
 881         CharsetEncoder ce = cs.newEncoder();
 882         int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
 883         int en = scale(len, ce.maxBytesPerChar());
 884         // fastpath with ArrayEncoder implies `doReplace`.
 885         if (doReplace && ce instanceof ArrayEncoder ae) {
 886             // fastpath for ascii compatible
 887             if (coder == LATIN1 &&
 888                     ae.isASCIICompatible() &&
 889                     !StringCoding.hasNegatives(val, 0, val.length)) {
 890                 return val.clone();
 891             }
 892             byte[] ba = new byte[en];
 893             if (len == 0) {
 894                 return ba;
 895             }
 896 
 897             int blen = (coder == LATIN1) ? ae.encodeFromLatin1(val, 0, len, ba)
 898                     : ae.encodeFromUTF16(val, 0, len, ba);
 899             if (blen != -1) {
 900                 return safeTrim(ba, blen, true);
 901             }
 902         }
 903 
 904         byte[] ba = new byte[en];
 905         if (len == 0) {
 906             return ba;
 907         }
 908         if (doReplace) {
 909             ce.onMalformedInput(CodingErrorAction.REPLACE)
 910                     .onUnmappableCharacter(CodingErrorAction.REPLACE);
 911         }
 912         char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
 913                 : StringUTF16.toChars(val);
 914         ByteBuffer bb = ByteBuffer.wrap(ba);
 915         CharBuffer cb = CharBuffer.wrap(ca, 0, len);
 916         try {
 917             CoderResult cr = ce.encode(cb, bb, true);
 918             if (!cr.isUnderflow())
 919                 cr.throwException();
 920             cr = ce.flush(bb);
 921             if (!cr.isUnderflow())
 922                 cr.throwException();
 923         } catch (CharacterCodingException x) {
 924             if (!doReplace) {
 925                 throw new IllegalArgumentException(x);
 926             } else {
 927                 throw new Error(x);
 928             }
 929         }
 930         return safeTrim(ba, bb.position(), cs.getClass().getClassLoader0() == null);
 931     }
 932 
 933     /*
 934      * Throws iae, instead of replacing, if unmappable.
 935      */
 936     static byte[] getBytesUTF8NoRepl(String s) {
 937         return encodeUTF8(s.coder(), s.value(), false);
 938     }
 939 
 940     private static boolean isASCII(byte[] src) {
 941         return !StringCoding.hasNegatives(src, 0, src.length);
 942     }
 943 
 944     /*
 945      * Throws CCE, instead of replacing, if unmappable.
 946      */
 947     static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
 948         try {
 949             return getBytesNoRepl1(s, cs);
 950         } catch (IllegalArgumentException e) {
 951             //getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause
 952             Throwable cause = e.getCause();
 953             if (cause instanceof UnmappableCharacterException) {
 954                 throw (UnmappableCharacterException)cause;
 955             }
 956             throw (CharacterCodingException)cause;
 957         }
 958     }
 959 
 960     private static byte[] getBytesNoRepl1(String s, Charset cs) {
 961         byte[] val = s.value();
 962         byte coder = s.coder();
 963         if (cs == UTF_8.INSTANCE) {
 964             if (coder == LATIN1 && isASCII(val)) {
 965                 return val;
 966             }
 967             return encodeUTF8(coder, val, false);
 968         }
 969         if (cs == ISO_8859_1.INSTANCE) {
 970             if (coder == LATIN1) {
 971                 return val;
 972             }
 973             return encode8859_1(coder, val, false);
 974         }
 975         if (cs == US_ASCII.INSTANCE) {
 976             if (coder == LATIN1) {
 977                 if (isASCII(val)) {
 978                     return val;
 979                 } else {
 980                     throwUnmappable(val);
 981                 }
 982             }
 983         }
 984         return encodeWithEncoder(cs, coder, val, false);
 985     }
 986 
 987     private static byte[] encodeASCII(byte coder, byte[] val) {
 988         if (coder == LATIN1) {
 989             int positives = StringCoding.countPositives(val, 0, val.length);
 990             byte[] dst = val.clone();
 991             if (positives < dst.length) {
 992                 replaceNegatives(dst, positives);
 993             }
 994             return dst;
 995         }
 996         int len = val.length >> 1;
 997         byte[] dst = new byte[len];
 998         int dp = 0;
 999         for (int i = 0; i < len; i++) {
1000             char c = StringUTF16.getChar(val, i);
1001             if (c < 0x80) {
1002                 dst[dp++] = (byte)c;
1003                 continue;
1004             }
1005             if (Character.isHighSurrogate(c) && i + 1 < len &&
1006                     Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
1007                 i++;
1008             }
1009             dst[dp++] = '?';
1010         }
1011         if (len == dp) {
1012             return dst;
1013         }
1014         return Arrays.copyOf(dst, dp);
1015     }
1016 
1017     private static void replaceNegatives(byte[] val, int fromIndex) {
1018         for (int i = fromIndex; i < val.length; i++) {
1019             if (val[i] < 0) {
1020                 val[i] = '?';
1021             }
1022         }
1023     }
1024 
1025     private static byte[] encode8859_1(byte coder, byte[] val) {
1026         return encode8859_1(coder, val, true);
1027     }
1028 
1029     private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
1030         if (coder == LATIN1) {
1031             return val.clone();
1032         }
1033         int len = val.length >> 1;
1034         byte[] dst = new byte[len];
1035         int dp = 0;
1036         int sp = 0;
1037         int sl = len;
1038         while (sp < sl) {
1039             int ret = StringCoding.implEncodeISOArray(val, sp, dst, dp, len);
1040             sp = sp + ret;
1041             dp = dp + ret;
1042             if (ret != len) {
1043                 if (!doReplace) {
1044                     throwUnmappable(sp);
1045                 }
1046                 char c = StringUTF16.getChar(val, sp++);
1047                 if (Character.isHighSurrogate(c) && sp < sl &&
1048                         Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
1049                     sp++;
1050                 }
1051                 dst[dp++] = '?';
1052                 len = sl - sp;
1053             }
1054         }
1055         if (dp == dst.length) {
1056             return dst;
1057         }
1058         return Arrays.copyOf(dst, dp);
1059     }
1060 
1061     //////////////////////////////// utf8 ////////////////////////////////////
1062 
1063     /**
1064      * Decodes ASCII from the source byte array into the destination
1065      * char array. Used via JavaLangAccess from UTF_8 and other charset
1066      * decoders.
1067      *
1068      * @return the number of bytes successfully decoded, at most len
1069      */
1070     /* package-private */
1071     static int decodeASCII(byte[] sa, int sp, char[] da, int dp, int len) {
1072         int count = StringCoding.countPositives(sa, sp, len);
1073         while (count < len) {
1074             if (sa[sp + count] < 0) {
1075                 break;
1076             }
1077             count++;
1078         }
1079         StringLatin1.inflate(sa, sp, da, dp, count);
1080         return count;
1081     }
1082 
1083     private static boolean isNotContinuation(int b) {
1084         return (b & 0xc0) != 0x80;
1085     }
1086 
1087     private static boolean isMalformed3(int b1, int b2, int b3) {
1088         return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
1089                 (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
1090     }
1091 
1092     private static boolean isMalformed3_2(int b1, int b2) {
1093         return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
1094                 (b2 & 0xc0) != 0x80;
1095     }
1096 
1097     private static boolean isMalformed4(int b2, int b3, int b4) {
1098         return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
1099                 (b4 & 0xc0) != 0x80;
1100     }
1101 
1102     private static boolean isMalformed4_2(int b1, int b2) {
1103         return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
1104                 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
1105                 (b2 & 0xc0) != 0x80;
1106     }
1107 
1108     private static boolean isMalformed4_3(int b3) {
1109         return (b3 & 0xc0) != 0x80;
1110     }
1111 
1112     private static char decode2(int b1, int b2) {
1113         return (char)(((b1 << 6) ^ b2) ^
1114                 (((byte) 0xC0 << 6) ^
1115                         ((byte) 0x80 << 0)));
1116     }
1117 
1118     private static char decode3(int b1, int b2, int b3) {
1119         return (char)((b1 << 12) ^
1120                 (b2 <<  6) ^
1121                 (b3 ^
1122                         (((byte) 0xE0 << 12) ^
1123                                 ((byte) 0x80 <<  6) ^
1124                                 ((byte) 0x80 <<  0))));
1125     }
1126 
1127     private static int decode4(int b1, int b2, int b3, int b4) {
1128         return ((b1 << 18) ^
1129                 (b2 << 12) ^
1130                 (b3 <<  6) ^
1131                 (b4 ^
1132                         (((byte) 0xF0 << 18) ^
1133                                 ((byte) 0x80 << 12) ^
1134                                 ((byte) 0x80 <<  6) ^
1135                                 ((byte) 0x80 <<  0))));
1136     }
1137 
1138     private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int dp, boolean doReplace) {
1139         while (sp < sl) {
1140             int b1 = src[sp++];
1141             if (b1 >= 0) {
1142                 StringUTF16.putChar(dst, dp++, (char) b1);
1143             } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
1144                 if (sp < sl) {
1145                     int b2 = src[sp++];
1146                     if (isNotContinuation(b2)) {
1147                         if (!doReplace) {
1148                             throwMalformed(sp - 1, 1);
1149                         }
1150                         StringUTF16.putChar(dst, dp++, REPL);
1151                         sp--;
1152                     } else {
1153                         StringUTF16.putChar(dst, dp++, decode2(b1, b2));
1154                     }
1155                     continue;
1156                 }
1157                 if (!doReplace) {
1158                     throwMalformed(sp, 1);  // underflow()
1159                 }
1160                 StringUTF16.putChar(dst, dp++, REPL);
1161                 break;
1162             } else if ((b1 >> 4) == -2) {
1163                 if (sp + 1 < sl) {
1164                     int b2 = src[sp++];
1165                     int b3 = src[sp++];
1166                     if (isMalformed3(b1, b2, b3)) {
1167                         if (!doReplace) {
1168                             throwMalformed(sp - 3, 3);
1169                         }
1170                         StringUTF16.putChar(dst, dp++, REPL);
1171                         sp -= 3;
1172                         sp += malformed3(src, sp);
1173                     } else {
1174                         char c = decode3(b1, b2, b3);
1175                         if (Character.isSurrogate(c)) {
1176                             if (!doReplace) {
1177                                 throwMalformed(sp - 3, 3);
1178                             }
1179                             StringUTF16.putChar(dst, dp++, REPL);
1180                         } else {
1181                             StringUTF16.putChar(dst, dp++, c);
1182                         }
1183                     }
1184                     continue;
1185                 }
1186                 if (sp < sl && isMalformed3_2(b1, src[sp])) {
1187                     if (!doReplace) {
1188                         throwMalformed(sp - 1, 2);
1189                     }
1190                     StringUTF16.putChar(dst, dp++, REPL);
1191                     continue;
1192                 }
1193                 if (!doReplace) {
1194                     throwMalformed(sp, 1);
1195                 }
1196                 StringUTF16.putChar(dst, dp++, REPL);
1197                 break;
1198             } else if ((b1 >> 3) == -2) {
1199                 if (sp + 2 < sl) {
1200                     int b2 = src[sp++];
1201                     int b3 = src[sp++];
1202                     int b4 = src[sp++];
1203                     int uc = decode4(b1, b2, b3, b4);
1204                     if (isMalformed4(b2, b3, b4) ||
1205                             !Character.isSupplementaryCodePoint(uc)) { // shortest form check
1206                         if (!doReplace) {
1207                             throwMalformed(sp - 4, 4);
1208                         }
1209                         StringUTF16.putChar(dst, dp++, REPL);
1210                         sp -= 4;
1211                         sp += malformed4(src, sp);
1212                     } else {
1213                         StringUTF16.putChar(dst, dp++, Character.highSurrogate(uc));
1214                         StringUTF16.putChar(dst, dp++, Character.lowSurrogate(uc));
1215                     }
1216                     continue;
1217                 }
1218                 b1 &= 0xff;
1219                 if (b1 > 0xf4 || sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
1220                     if (!doReplace) {
1221                         throwMalformed(sp - 1, 1);  // or 2
1222                     }
1223                     StringUTF16.putChar(dst, dp++, REPL);
1224                     continue;
1225                 }
1226                 if (!doReplace) {
1227                     throwMalformed(sp - 1, 1);
1228                 }
1229                 sp++;
1230                 StringUTF16.putChar(dst, dp++, REPL);
1231                 if (sp < sl && isMalformed4_3(src[sp])) {
1232                     continue;
1233                 }
1234                 break;
1235             } else {
1236                 if (!doReplace) {
1237                     throwMalformed(sp - 1, 1);
1238                 }
1239                 StringUTF16.putChar(dst, dp++, REPL);
1240             }
1241         }
1242         return dp;
1243     }
1244 
1245     private static int decodeWithDecoder(CharsetDecoder cd, char[] dst, byte[] src, int offset, int length)
1246                                             throws CharacterCodingException {
1247         ByteBuffer bb = ByteBuffer.wrap(src, offset, length);
1248         CharBuffer cb = CharBuffer.wrap(dst, 0, dst.length);
1249         CoderResult cr = cd.decode(bb, cb, true);
1250         if (!cr.isUnderflow())
1251             cr.throwException();
1252         cr = cd.flush(cb);
1253         if (!cr.isUnderflow())
1254             cr.throwException();
1255         return cb.position();
1256     }
1257 
1258     private static int malformed3(byte[] src, int sp) {
1259         int b1 = src[sp++];
1260         int b2 = src[sp];    // no need to lookup b3
1261         return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
1262                 isNotContinuation(b2)) ? 1 : 2;
1263     }
1264 
1265     private static int malformed4(byte[] src, int sp) {
1266         // we don't care the speed here
1267         int b1 = src[sp++] & 0xff;
1268         int b2 = src[sp++] & 0xff;
1269         if (b1 > 0xf4 ||
1270                 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
1271                 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
1272                 isNotContinuation(b2))
1273             return 1;
1274         if (isNotContinuation(src[sp]))
1275             return 2;
1276         return 3;
1277     }
1278 
1279     private static void throwMalformed(int off, int nb) {
1280         String msg = "malformed input off : " + off + ", length : " + nb;
1281         throw new IllegalArgumentException(msg, new MalformedInputException(nb));
1282     }
1283 
1284     private static void throwMalformed(byte[] val) {
1285         int dp = StringCoding.countPositives(val, 0, val.length);
1286         throwMalformed(dp, 1);
1287     }
1288 
1289     private static void throwUnmappable(int off) {
1290         String msg = "malformed input off : " + off + ", length : 1";
1291         throw new IllegalArgumentException(msg, new UnmappableCharacterException(1));
1292     }
1293 
1294     private static void throwUnmappable(byte[] val) {
1295         int dp = StringCoding.countPositives(val, 0, val.length);
1296         throwUnmappable(dp);
1297     }
1298 
1299     private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
1300         if (coder == UTF16) {
1301             return encodeUTF8_UTF16(val, doReplace);
1302         }
1303 
1304         if (!StringCoding.hasNegatives(val, 0, val.length)) {
1305             return val.clone();
1306         }
1307 
1308         int dp = 0;
1309         byte[] dst = new byte[val.length << 1];
1310         for (byte c : val) {
1311             if (c < 0) {
1312                 dst[dp++] = (byte) (0xc0 | ((c & 0xff) >> 6));
1313                 dst[dp++] = (byte) (0x80 | (c & 0x3f));
1314             } else {
1315                 dst[dp++] = c;
1316             }
1317         }
1318         if (dp == dst.length) {
1319             return dst;
1320         }
1321         return Arrays.copyOf(dst, dp);
1322     }
1323 
1324     private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
1325         int dp = 0;
1326         int sp = 0;
1327         int sl = val.length >> 1;
1328         byte[] dst = new byte[sl * 3];
1329         while (sp < sl) {
1330             // ascii fast loop;
1331             char c = StringUTF16.getChar(val, sp);
1332             if (c >= '\u0080') {
1333                 break;
1334             }
1335             dst[dp++] = (byte)c;
1336             sp++;
1337         }
1338         while (sp < sl) {
1339             char c = StringUTF16.getChar(val, sp++);
1340             if (c < 0x80) {
1341                 dst[dp++] = (byte)c;
1342             } else if (c < 0x800) {
1343                 dst[dp++] = (byte)(0xc0 | (c >> 6));
1344                 dst[dp++] = (byte)(0x80 | (c & 0x3f));
1345             } else if (Character.isSurrogate(c)) {
1346                 int uc = -1;
1347                 char c2;
1348                 if (Character.isHighSurrogate(c) && sp < sl &&
1349                         Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
1350                     uc = Character.toCodePoint(c, c2);
1351                 }
1352                 if (uc < 0) {
1353                     if (doReplace) {
1354                         dst[dp++] = '?';
1355                     } else {
1356                         throwUnmappable(sp - 1);
1357                     }
1358                 } else {
1359                     dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
1360                     dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
1361                     dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
1362                     dst[dp++] = (byte)(0x80 | (uc & 0x3f));
1363                     sp++;  // 2 chars
1364                 }
1365             } else {
1366                 // 3 bytes, 16 bits
1367                 dst[dp++] = (byte)(0xe0 | ((c >> 12)));
1368                 dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
1369                 dst[dp++] = (byte)(0x80 | (c & 0x3f));
1370             }
1371         }
1372         if (dp == dst.length) {
1373             return dst;
1374         }
1375         return Arrays.copyOf(dst, dp);
1376     }
1377 
1378     /**
1379      * Constructs a new {@code String} by decoding the specified array of bytes
1380      * using the specified {@linkplain java.nio.charset.Charset charset}.  The
1381      * length of the new {@code String} is a function of the charset, and hence
1382      * may not be equal to the length of the byte array.
1383      *
1384      * <p> The behavior of this constructor when the given bytes are not valid
1385      * in the given charset is unspecified.  The {@link
1386      * java.nio.charset.CharsetDecoder} class should be used when more control
1387      * over the decoding process is required.
1388      *
1389      * @param  bytes
1390      *         The bytes to be decoded into characters
1391      *
1392      * @param  charsetName
1393      *         The name of a supported {@linkplain java.nio.charset.Charset
1394      *         charset}
1395      *
1396      * @throws  UnsupportedEncodingException
1397      *          If the named charset is not supported
1398      *
1399      * @since  1.1
1400      */
1401     public String(byte[] bytes, String charsetName)
1402             throws UnsupportedEncodingException {
1403         this(lookupCharset(charsetName), bytes, 0, bytes.length);
1404     }
1405 
1406     /**
1407      * Constructs a new {@code String} by decoding the specified array of
1408      * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
1409      * The length of the new {@code String} is a function of the charset, and
1410      * hence may not be equal to the length of the byte array.
1411      *
1412      * <p> This method always replaces malformed-input and unmappable-character
1413      * sequences with this charset's default replacement string.  The {@link
1414      * java.nio.charset.CharsetDecoder} class should be used when more control
1415      * over the decoding process is required.
1416      *
1417      * @param  bytes
1418      *         The bytes to be decoded into characters
1419      *
1420      * @param  charset
1421      *         The {@linkplain java.nio.charset.Charset charset} to be used to
1422      *         decode the {@code bytes}
1423      *
1424      * @since  1.6
1425      */
1426     public String(byte[] bytes, Charset charset) {
1427         this(Objects.requireNonNull(charset), bytes, 0, bytes.length);
1428     }
1429 
1430     /**
1431      * Constructs a new {@code String} by decoding the specified subarray of
1432      * bytes using the {@link Charset#defaultCharset() default charset}.
1433      * The length of the new {@code String} is a function of the charset,
1434      * and hence may not be equal to the length of the subarray.
1435      *
1436      * <p> The behavior of this constructor when the given bytes are not valid
1437      * in the default charset is unspecified.  The {@link
1438      * java.nio.charset.CharsetDecoder} class should be used when more control
1439      * over the decoding process is required.
1440      *
1441      * @param  bytes
1442      *         The bytes to be decoded into characters
1443      *
1444      * @param  offset
1445      *         The index of the first byte to decode
1446      *
1447      * @param  length
1448      *         The number of bytes to decode
1449      *
1450      * @throws  IndexOutOfBoundsException
1451      *          If {@code offset} is negative, {@code length} is negative, or
1452      *          {@code offset} is greater than {@code bytes.length - length}
1453      *
1454      * @since  1.1
1455      */
1456     public String(byte[] bytes, int offset, int length) {
1457         this(Charset.defaultCharset(), bytes, checkBoundsOffCount(offset, length, bytes.length), length);
1458     }
1459 
1460     /**
1461      * Constructs a new {@code String} by decoding the specified array of bytes
1462      * using the {@link Charset#defaultCharset() default charset}. The length
1463      * of the new {@code String} is a function of the charset, and hence may not
1464      * be equal to the length of the byte array.
1465      *
1466      * <p> The behavior of this constructor when the given bytes are not valid
1467      * in the default charset is unspecified.  The {@link
1468      * java.nio.charset.CharsetDecoder} class should be used when more control
1469      * over the decoding process is required.
1470      *
1471      * @param  bytes
1472      *         The bytes to be decoded into characters
1473      *
1474      * @since  1.1
1475      */
1476     public String(byte[] bytes) {
1477         this(Charset.defaultCharset(), bytes, 0, bytes.length);
1478     }
1479 
1480     /**
1481      * Allocates a new string that contains the sequence of characters
1482      * currently contained in the string buffer argument. The contents of the
1483      * string buffer are copied; subsequent modification of the string buffer
1484      * does not affect the newly created string.
1485      *
1486      * @param  buffer
1487      *         A {@code StringBuffer}
1488      */
1489     public String(StringBuffer buffer) {
1490         this(buffer.toString());
1491     }
1492 
1493     /**
1494      * Allocates a new string that contains the sequence of characters
1495      * currently contained in the string builder argument. The contents of the
1496      * string builder are copied; subsequent modification of the string builder
1497      * does not affect the newly created string.
1498      *
1499      * <p> This constructor is provided to ease migration to {@code
1500      * StringBuilder}. Obtaining a string from a string builder via the {@code
1501      * toString} method is likely to run faster and is generally preferred.
1502      *
1503      * @param   builder
1504      *          A {@code StringBuilder}
1505      *
1506      * @since  1.5
1507      */
1508     public String(StringBuilder builder) {
1509         this(builder, null);
1510     }
1511 
1512     /**
1513      * Returns the length of this string.
1514      * The length is equal to the number of <a href="Character.html#unicode">Unicode
1515      * code units</a> in the string.
1516      *
1517      * @return  the length of the sequence of characters represented by this
1518      *          object.
1519      */
1520     public int length() {
1521         return value.length >> coder();
1522     }
1523 
1524     /**
1525      * Returns {@code true} if, and only if, {@link #length()} is {@code 0}.
1526      *
1527      * @return {@code true} if {@link #length()} is {@code 0}, otherwise
1528      * {@code false}
1529      *
1530      * @since 1.6
1531      */
1532     @Override
1533     public boolean isEmpty() {
1534         return value.length == 0;
1535     }
1536 
1537     /**
1538      * Returns the {@code char} value at the
1539      * specified index. An index ranges from {@code 0} to
1540      * {@code length() - 1}. The first {@code char} value of the sequence
1541      * is at index {@code 0}, the next at index {@code 1},
1542      * and so on, as for array indexing.
1543      *
1544      * <p>If the {@code char} value specified by the index is a
1545      * <a href="Character.html#unicode">surrogate</a>, the surrogate
1546      * value is returned.
1547      *
1548      * @param      index   the index of the {@code char} value.
1549      * @return     the {@code char} value at the specified index of this string.
1550      *             The first {@code char} value is at index {@code 0}.
1551      * @throws     IndexOutOfBoundsException  if the {@code index}
1552      *             argument is negative or not less than the length of this
1553      *             string.
1554      */
1555     public char charAt(int index) {
1556         if (isLatin1()) {
1557             return StringLatin1.charAt(value, index);
1558         } else {
1559             return StringUTF16.charAt(value, index);
1560         }
1561     }
1562 
1563     /**
1564      * Returns the character (Unicode code point) at the specified
1565      * index. The index refers to {@code char} values
1566      * (Unicode code units) and ranges from {@code 0} to
1567      * {@link #length()}{@code  - 1}.
1568      *
1569      * <p> If the {@code char} value specified at the given index
1570      * is in the high-surrogate range, the following index is less
1571      * than the length of this {@code String}, and the
1572      * {@code char} value at the following index is in the
1573      * low-surrogate range, then the supplementary code point
1574      * corresponding to this surrogate pair is returned. Otherwise,
1575      * the {@code char} value at the given index is returned.
1576      *
1577      * @param      index the index to the {@code char} values
1578      * @return     the code point value of the character at the
1579      *             {@code index}
1580      * @throws     IndexOutOfBoundsException  if the {@code index}
1581      *             argument is negative or not less than the length of this
1582      *             string.
1583      * @since      1.5
1584      */
1585     public int codePointAt(int index) {
1586         if (isLatin1()) {
1587             checkIndex(index, value.length);
1588             return value[index] & 0xff;
1589         }
1590         int length = value.length >> 1;
1591         checkIndex(index, length);
1592         return StringUTF16.codePointAt(value, index, length);
1593     }
1594 
1595     /**
1596      * Returns the character (Unicode code point) before the specified
1597      * index. The index refers to {@code char} values
1598      * (Unicode code units) and ranges from {@code 1} to {@link
1599      * CharSequence#length() length}.
1600      *
1601      * <p> If the {@code char} value at {@code (index - 1)}
1602      * is in the low-surrogate range, {@code (index - 2)} is not
1603      * negative, and the {@code char} value at {@code (index -
1604      * 2)} is in the high-surrogate range, then the
1605      * supplementary code point value of the surrogate pair is
1606      * returned. If the {@code char} value at {@code index -
1607      * 1} is an unpaired low-surrogate or a high-surrogate, the
1608      * surrogate value is returned.
1609      *
1610      * @param     index the index following the code point that should be returned
1611      * @return    the Unicode code point value before the given index.
1612      * @throws    IndexOutOfBoundsException if the {@code index}
1613      *            argument is less than 1 or greater than the length
1614      *            of this string.
1615      * @since     1.5
1616      */
1617     public int codePointBefore(int index) {
1618         int i = index - 1;
1619         checkIndex(i, length());
1620         if (isLatin1()) {
1621             return (value[i] & 0xff);
1622         }
1623         return StringUTF16.codePointBefore(value, index);
1624     }
1625 
1626     /**
1627      * Returns the number of Unicode code points in the specified text
1628      * range of this {@code String}. The text range begins at the
1629      * specified {@code beginIndex} and extends to the
1630      * {@code char} at index {@code endIndex - 1}. Thus the
1631      * length (in {@code char}s) of the text range is
1632      * {@code endIndex-beginIndex}. Unpaired surrogates within
1633      * the text range count as one code point each.
1634      *
1635      * @param beginIndex the index to the first {@code char} of
1636      * the text range.
1637      * @param endIndex the index after the last {@code char} of
1638      * the text range.
1639      * @return the number of Unicode code points in the specified text
1640      * range
1641      * @throws    IndexOutOfBoundsException if the
1642      * {@code beginIndex} is negative, or {@code endIndex}
1643      * is larger than the length of this {@code String}, or
1644      * {@code beginIndex} is larger than {@code endIndex}.
1645      * @since  1.5
1646      */
1647     public int codePointCount(int beginIndex, int endIndex) {
1648         Objects.checkFromToIndex(beginIndex, endIndex, length());
1649         if (isLatin1()) {
1650             return endIndex - beginIndex;
1651         }
1652         return StringUTF16.codePointCount(value, beginIndex, endIndex);
1653     }
1654 
1655     /**
1656      * Returns the index within this {@code String} that is
1657      * offset from the given {@code index} by
1658      * {@code codePointOffset} code points. Unpaired surrogates
1659      * within the text range given by {@code index} and
1660      * {@code codePointOffset} count as one code point each.
1661      *
1662      * @param index the index to be offset
1663      * @param codePointOffset the offset in code points
1664      * @return the index within this {@code String}
1665      * @throws    IndexOutOfBoundsException if {@code index}
1666      *   is negative or larger than the length of this
1667      *   {@code String}, or if {@code codePointOffset} is positive
1668      *   and the substring starting with {@code index} has fewer
1669      *   than {@code codePointOffset} code points,
1670      *   or if {@code codePointOffset} is negative and the substring
1671      *   before {@code index} has fewer than the absolute value
1672      *   of {@code codePointOffset} code points.
1673      * @since 1.5
1674      */
1675     public int offsetByCodePoints(int index, int codePointOffset) {
1676         return Character.offsetByCodePoints(this, index, codePointOffset);
1677     }
1678 
1679     /**
1680      * Copies characters from this string into the destination character
1681      * array.
1682      * <p>
1683      * The first character to be copied is at index {@code srcBegin};
1684      * the last character to be copied is at index {@code srcEnd-1}
1685      * (thus the total number of characters to be copied is
1686      * {@code srcEnd-srcBegin}). The characters are copied into the
1687      * subarray of {@code dst} starting at index {@code dstBegin}
1688      * and ending at index:
1689      * <blockquote><pre>
1690      *     dstBegin + (srcEnd-srcBegin) - 1
1691      * </pre></blockquote>
1692      *
1693      * @param      srcBegin   index of the first character in the string
1694      *                        to copy.
1695      * @param      srcEnd     index after the last character in the string
1696      *                        to copy.
1697      * @param      dst        the destination array.
1698      * @param      dstBegin   the start offset in the destination array.
1699      * @throws    IndexOutOfBoundsException If any of the following
1700      *            is true:
1701      *            <ul><li>{@code srcBegin} is negative.
1702      *            <li>{@code srcBegin} is greater than {@code srcEnd}
1703      *            <li>{@code srcEnd} is greater than the length of this
1704      *                string
1705      *            <li>{@code dstBegin} is negative
1706      *            <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than
1707      *                {@code dst.length}</ul>
1708      */
1709     public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) {
1710         checkBoundsBeginEnd(srcBegin, srcEnd, length());
1711         checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length);
1712         if (isLatin1()) {
1713             StringLatin1.getChars(value, srcBegin, srcEnd, dst, dstBegin);
1714         } else {
1715             StringUTF16.getChars(value, srcBegin, srcEnd, dst, dstBegin);
1716         }
1717     }
1718 
1719     /**
1720      * Copies characters from this string into the destination byte array. Each
1721      * byte receives the 8 low-order bits of the corresponding character. The
1722      * eight high-order bits of each character are not copied and do not
1723      * participate in the transfer in any way.
1724      *
1725      * <p> The first character to be copied is at index {@code srcBegin}; the
1726      * last character to be copied is at index {@code srcEnd-1}.  The total
1727      * number of characters to be copied is {@code srcEnd-srcBegin}. The
1728      * characters, converted to bytes, are copied into the subarray of {@code
1729      * dst} starting at index {@code dstBegin} and ending at index:
1730      *
1731      * <blockquote><pre>
1732      *     dstBegin + (srcEnd-srcBegin) - 1
1733      * </pre></blockquote>
1734      *
1735      * @deprecated  This method does not properly convert characters into
1736      * bytes.  As of JDK&nbsp;1.1, the preferred way to do this is via the
1737      * {@link #getBytes()} method, which uses the {@link Charset#defaultCharset()
1738      * default charset}.
1739      *
1740      * @param  srcBegin
1741      *         Index of the first character in the string to copy
1742      *
1743      * @param  srcEnd
1744      *         Index after the last character in the string to copy
1745      *
1746      * @param  dst
1747      *         The destination array
1748      *
1749      * @param  dstBegin
1750      *         The start offset in the destination array
1751      *
1752      * @throws  IndexOutOfBoundsException
1753      *          If any of the following is true:
1754      *          <ul>
1755      *            <li> {@code srcBegin} is negative
1756      *            <li> {@code srcBegin} is greater than {@code srcEnd}
1757      *            <li> {@code srcEnd} is greater than the length of this String
1758      *            <li> {@code dstBegin} is negative
1759      *            <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code
1760      *                 dst.length}
1761      *          </ul>
1762      */
1763     @Deprecated(since="1.1")
1764     public void getBytes(int srcBegin, int srcEnd, byte[] dst, int dstBegin) {
1765         checkBoundsBeginEnd(srcBegin, srcEnd, length());
1766         Objects.requireNonNull(dst);
1767         checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length);
1768         if (isLatin1()) {
1769             StringLatin1.getBytes(value, srcBegin, srcEnd, dst, dstBegin);
1770         } else {
1771             StringUTF16.getBytes(value, srcBegin, srcEnd, dst, dstBegin);
1772         }
1773     }
1774 
1775     /**
1776      * Encodes this {@code String} into a sequence of bytes using the named
1777      * charset, storing the result into a new byte array.
1778      *
1779      * <p> The behavior of this method when this string cannot be encoded in
1780      * the given charset is unspecified.  The {@link
1781      * java.nio.charset.CharsetEncoder} class should be used when more control
1782      * over the encoding process is required.
1783      *
1784      * @param  charsetName
1785      *         The name of a supported {@linkplain java.nio.charset.Charset
1786      *         charset}
1787      *
1788      * @return  The resultant byte array
1789      *
1790      * @throws  UnsupportedEncodingException
1791      *          If the named charset is not supported
1792      *
1793      * @since  1.1
1794      */
1795     public byte[] getBytes(String charsetName)
1796             throws UnsupportedEncodingException {
1797         return encode(lookupCharset(charsetName), coder(), value);
1798     }
1799 
1800     /**
1801      * Encodes this {@code String} into a sequence of bytes using the given
1802      * {@linkplain java.nio.charset.Charset charset}, storing the result into a
1803      * new byte array.
1804      *
1805      * <p> This method always replaces malformed-input and unmappable-character
1806      * sequences with this charset's default replacement byte array.  The
1807      * {@link java.nio.charset.CharsetEncoder} class should be used when more
1808      * control over the encoding process is required.
1809      *
1810      * @param  charset
1811      *         The {@linkplain java.nio.charset.Charset} to be used to encode
1812      *         the {@code String}
1813      *
1814      * @return  The resultant byte array
1815      *
1816      * @since  1.6
1817      */
1818     public byte[] getBytes(Charset charset) {
1819         if (charset == null) throw new NullPointerException();
1820         return encode(charset, coder(), value);
1821      }
1822 
1823     /**
1824      * Encodes this {@code String} into a sequence of bytes using the
1825      * {@link Charset#defaultCharset() default charset}, storing the result
1826      * into a new byte array.
1827      *
1828      * <p> The behavior of this method when this string cannot be encoded in
1829      * the default charset is unspecified.  The {@link
1830      * java.nio.charset.CharsetEncoder} class should be used when more control
1831      * over the encoding process is required.
1832      *
1833      * @return  The resultant byte array
1834      *
1835      * @since      1.1
1836      */
1837     public byte[] getBytes() {
1838         return encode(Charset.defaultCharset(), coder(), value);
1839     }
1840 
1841     boolean bytesCompatible(Charset charset) {
1842         if (isLatin1()) {
1843             if (charset == ISO_8859_1.INSTANCE) {
1844                 return true; // ok, same encoding
1845             } else if (charset == UTF_8.INSTANCE || charset == US_ASCII.INSTANCE) {
1846                 return !StringCoding.hasNegatives(value, 0, value.length); // ok, if ASCII-compatible
1847             }
1848         }
1849         return false;
1850     }
1851 
1852     void copyToSegmentRaw(MemorySegment segment, long offset) {
1853         MemorySegment.copy(value, 0, segment, ValueLayout.JAVA_BYTE, offset, value.length);
1854     }
1855 
1856     /**
1857      * Compares this string to the specified object.  The result is {@code
1858      * true} if and only if the argument is not {@code null} and is a {@code
1859      * String} object that represents the same sequence of characters as this
1860      * object.
1861      *
1862      * <p>For finer-grained String comparison, refer to
1863      * {@link java.text.Collator}.
1864      *
1865      * @param  anObject
1866      *         The object to compare this {@code String} against
1867      *
1868      * @return  {@code true} if the given object represents a {@code String}
1869      *          equivalent to this string, {@code false} otherwise
1870      *
1871      * @see  #compareTo(String)
1872      * @see  #equalsIgnoreCase(String)
1873      */
1874     public boolean equals(Object anObject) {
1875         if (this == anObject) {
1876             return true;
1877         }
1878         return (anObject instanceof String aString)
1879                 && (!COMPACT_STRINGS || this.coder == aString.coder)
1880                 && StringLatin1.equals(value, aString.value);
1881     }
1882 
1883     /**
1884      * Compares this string to the specified {@code StringBuffer}.  The result
1885      * is {@code true} if and only if this {@code String} represents the same
1886      * sequence of characters as the specified {@code StringBuffer}. This method
1887      * synchronizes on the {@code StringBuffer}.
1888      *
1889      * <p>For finer-grained String comparison, refer to
1890      * {@link java.text.Collator}.
1891      *
1892      * @param  sb
1893      *         The {@code StringBuffer} to compare this {@code String} against
1894      *
1895      * @return  {@code true} if this {@code String} represents the same
1896      *          sequence of characters as the specified {@code StringBuffer},
1897      *          {@code false} otherwise
1898      *
1899      * @since  1.4
1900      */
1901     public boolean contentEquals(StringBuffer sb) {
1902         return contentEquals((CharSequence)sb);
1903     }
1904 
1905     private boolean nonSyncContentEquals(AbstractStringBuilder sb) {
1906         int len = length();
1907         if (len != sb.length()) {
1908             return false;
1909         }
1910         byte[] v1 = value;
1911         byte[] v2 = sb.getValue();
1912         byte coder = coder();
1913         if (coder == sb.getCoder()) {
1914             return v1.length <= v2.length && ArraysSupport.mismatch(v1, v2, v1.length) < 0;
1915         } else {
1916             if (coder != LATIN1) {  // utf16 str and latin1 abs can never be "equal"
1917                 return false;
1918             }
1919             return StringUTF16.contentEquals(v1, v2, len);
1920         }
1921     }
1922 
1923     /**
1924      * Compares this string to the specified {@code CharSequence}.  The
1925      * result is {@code true} if and only if this {@code String} represents the
1926      * same sequence of char values as the specified sequence. Note that if the
1927      * {@code CharSequence} is a {@code StringBuffer} then the method
1928      * synchronizes on it.
1929      *
1930      * <p>For finer-grained String comparison, refer to
1931      * {@link java.text.Collator}.
1932      *
1933      * @param  cs
1934      *         The sequence to compare this {@code String} against
1935      *
1936      * @return  {@code true} if this {@code String} represents the same
1937      *          sequence of char values as the specified sequence, {@code
1938      *          false} otherwise
1939      *
1940      * @since  1.5
1941      */
1942     public boolean contentEquals(CharSequence cs) {
1943         // Argument is a StringBuffer, StringBuilder
1944         if (cs instanceof AbstractStringBuilder) {
1945             if (cs instanceof StringBuffer) {
1946                 synchronized(cs) {
1947                    return nonSyncContentEquals((AbstractStringBuilder)cs);
1948                 }
1949             } else {
1950                 return nonSyncContentEquals((AbstractStringBuilder)cs);
1951             }
1952         }
1953         // Argument is a String
1954         if (cs instanceof String) {
1955             return equals(cs);
1956         }
1957         // Argument is a generic CharSequence
1958         int n = cs.length();
1959         if (n != length()) {
1960             return false;
1961         }
1962         byte[] val = this.value;
1963         if (isLatin1()) {
1964             for (int i = 0; i < n; i++) {
1965                 if ((val[i] & 0xff) != cs.charAt(i)) {
1966                     return false;
1967                 }
1968             }
1969         } else {
1970             if (!StringUTF16.contentEquals(val, cs, n)) {
1971                 return false;
1972             }
1973         }
1974         return true;
1975     }
1976 
1977     /**
1978      * Compares this {@code String} to another {@code String}, ignoring case
1979      * considerations.  Two strings are considered equal ignoring case if they
1980      * are of the same length and corresponding Unicode code points in the two
1981      * strings are equal ignoring case.
1982      *
1983      * <p> Two Unicode code points are considered the same
1984      * ignoring case if at least one of the following is true:
1985      * <ul>
1986      *   <li> The two Unicode code points are the same (as compared by the
1987      *        {@code ==} operator)
1988      *   <li> Calling {@code Character.toLowerCase(Character.toUpperCase(int))}
1989      *        on each Unicode code point produces the same result
1990      * </ul>
1991      *
1992      * <p>Note that this method does <em>not</em> take locale into account, and
1993      * will result in unsatisfactory results for certain locales.  The
1994      * {@link java.text.Collator} class provides locale-sensitive comparison.
1995      *
1996      * @param  anotherString
1997      *         The {@code String} to compare this {@code String} against
1998      *
1999      * @return  {@code true} if the argument is not {@code null} and it
2000      *          represents an equivalent {@code String} ignoring case; {@code
2001      *          false} otherwise
2002      *
2003      * @see  #equals(Object)
2004      * @see  #codePoints()
2005      */
2006     public boolean equalsIgnoreCase(String anotherString) {
2007         return (this == anotherString) ? true
2008                 : (anotherString != null)
2009                 && (anotherString.length() == length())
2010                 && regionMatches(true, 0, anotherString, 0, length());
2011     }
2012 
2013     /**
2014      * Compares two strings lexicographically.
2015      * The comparison is based on the Unicode value of each character in
2016      * the strings. The character sequence represented by this
2017      * {@code String} object is compared lexicographically to the
2018      * character sequence represented by the argument string. The result is
2019      * a negative integer if this {@code String} object
2020      * lexicographically precedes the argument string. The result is a
2021      * positive integer if this {@code String} object lexicographically
2022      * follows the argument string. The result is zero if the strings
2023      * are equal; {@code compareTo} returns {@code 0} exactly when
2024      * the {@link #equals(Object)} method would return {@code true}.
2025      * <p>
2026      * This is the definition of lexicographic ordering. If two strings are
2027      * different, then either they have different characters at some index
2028      * that is a valid index for both strings, or their lengths are different,
2029      * or both. If they have different characters at one or more index
2030      * positions, let <i>k</i> be the smallest such index; then the string
2031      * whose character at position <i>k</i> has the smaller value, as
2032      * determined by using the {@code <} operator, lexicographically precedes the
2033      * other string. In this case, {@code compareTo} returns the
2034      * difference of the two character values at position {@code k} in
2035      * the two string -- that is, the value:
2036      * <blockquote><pre>
2037      * this.charAt(k)-anotherString.charAt(k)
2038      * </pre></blockquote>
2039      * If there is no index position at which they differ, then the shorter
2040      * string lexicographically precedes the longer string. In this case,
2041      * {@code compareTo} returns the difference of the lengths of the
2042      * strings -- that is, the value:
2043      * <blockquote><pre>
2044      * this.length()-anotherString.length()
2045      * </pre></blockquote>
2046      *
2047      * <p>For finer-grained String comparison, refer to
2048      * {@link java.text.Collator}.
2049      *
2050      * @param   anotherString   the {@code String} to be compared.
2051      * @return  the value {@code 0} if the argument string is equal to
2052      *          this string; a value less than {@code 0} if this string
2053      *          is lexicographically less than the string argument; and a
2054      *          value greater than {@code 0} if this string is
2055      *          lexicographically greater than the string argument.
2056      */
2057     public int compareTo(String anotherString) {
2058         byte[] v1 = value;
2059         byte[] v2 = anotherString.value;
2060         byte coder = coder();
2061         if (coder == anotherString.coder()) {
2062             return coder == LATIN1 ? StringLatin1.compareTo(v1, v2)
2063                                    : StringUTF16.compareTo(v1, v2);
2064         }
2065         return coder == LATIN1 ? StringLatin1.compareToUTF16(v1, v2)
2066                                : StringUTF16.compareToLatin1(v1, v2);
2067      }
2068 
2069     /**
2070      * A Comparator that orders {@code String} objects as by
2071      * {@link #compareToIgnoreCase(String) compareToIgnoreCase}.
2072      * This comparator is serializable.
2073      * <p>
2074      * Note that this Comparator does <em>not</em> take locale into account,
2075      * and will result in an unsatisfactory ordering for certain locales.
2076      * The {@link java.text.Collator} class provides locale-sensitive comparison.
2077      *
2078      * @see     java.text.Collator
2079      * @since   1.2
2080      */
2081     public static final Comparator<String> CASE_INSENSITIVE_ORDER
2082                                          = new CaseInsensitiveComparator();
2083 
2084     /**
2085      * CaseInsensitiveComparator for Strings.
2086      */
2087     private static class CaseInsensitiveComparator
2088             implements Comparator<String>, java.io.Serializable {
2089         // use serialVersionUID from JDK 1.2.2 for interoperability
2090         @java.io.Serial
2091         private static final long serialVersionUID = 8575799808933029326L;
2092 
2093         public int compare(String s1, String s2) {
2094             byte[] v1 = s1.value;
2095             byte[] v2 = s2.value;
2096             byte coder = s1.coder();
2097             if (coder == s2.coder()) {
2098                 return coder == LATIN1 ? StringLatin1.compareToCI(v1, v2)
2099                                        : StringUTF16.compareToCI(v1, v2);
2100             }
2101             return coder == LATIN1 ? StringLatin1.compareToCI_UTF16(v1, v2)
2102                                    : StringUTF16.compareToCI_Latin1(v1, v2);
2103         }
2104 
2105         /** Replaces the de-serialized object. */
2106         @java.io.Serial
2107         private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
2108     }
2109 
2110     /**
2111      * Compares two strings lexicographically, ignoring case
2112      * differences. This method returns an integer whose sign is that of
2113      * calling {@code compareTo} with case folded versions of the strings
2114      * where case differences have been eliminated by calling
2115      * {@code Character.toLowerCase(Character.toUpperCase(int))} on
2116      * each Unicode code point.
2117      * <p>
2118      * Note that this method does <em>not</em> take locale into account,
2119      * and will result in an unsatisfactory ordering for certain locales.
2120      * The {@link java.text.Collator} class provides locale-sensitive comparison.
2121      *
2122      * @param   str   the {@code String} to be compared.
2123      * @return  a negative integer, zero, or a positive integer as the
2124      *          specified String is greater than, equal to, or less
2125      *          than this String, ignoring case considerations.
2126      * @see     java.text.Collator
2127      * @see     #codePoints()
2128      * @since   1.2
2129      */
2130     public int compareToIgnoreCase(String str) {
2131         return CASE_INSENSITIVE_ORDER.compare(this, str);
2132     }
2133 
2134     /**
2135      * Tests if two string regions are equal.
2136      * <p>
2137      * A substring of this {@code String} object is compared to a substring
2138      * of the argument other. The result is true if these substrings
2139      * represent identical character sequences. The substring of this
2140      * {@code String} object to be compared begins at index {@code toffset}
2141      * and has length {@code len}. The substring of other to be compared
2142      * begins at index {@code ooffset} and has length {@code len}. The
2143      * result is {@code false} if and only if at least one of the following
2144      * is true:
2145      * <ul><li>{@code toffset} is negative.
2146      * <li>{@code ooffset} is negative.
2147      * <li>{@code toffset+len} is greater than the length of this
2148      * {@code String} object.
2149      * <li>{@code ooffset+len} is greater than the length of the other
2150      * argument.
2151      * <li>There is some nonnegative integer <i>k</i> less than {@code len}
2152      * such that:
2153      * {@code this.charAt(toffset + }<i>k</i>{@code ) != other.charAt(ooffset + }
2154      * <i>k</i>{@code )}
2155      * </ul>
2156      *
2157      * <p>Note that this method does <em>not</em> take locale into account.  The
2158      * {@link java.text.Collator} class provides locale-sensitive comparison.
2159      *
2160      * @param   toffset   the starting offset of the subregion in this string.
2161      * @param   other     the string argument.
2162      * @param   ooffset   the starting offset of the subregion in the string
2163      *                    argument.
2164      * @param   len       the number of characters to compare.
2165      * @return  {@code true} if the specified subregion of this string
2166      *          exactly matches the specified subregion of the string argument;
2167      *          {@code false} otherwise.
2168      */
2169     public boolean regionMatches(int toffset, String other, int ooffset, int len) {
2170         // Note: toffset, ooffset, or len might be near -1>>>1.
2171         if ((ooffset < 0) || (toffset < 0) ||
2172              (toffset > (long)length() - len) ||
2173              (ooffset > (long)other.length() - len)) {
2174             return false;
2175         }
2176         byte[] tv = value;
2177         byte[] ov = other.value;
2178         byte coder = coder();
2179         if (coder == other.coder()) {
2180             if (coder == UTF16) {
2181                 toffset <<= UTF16;
2182                 ooffset <<= UTF16;
2183                 len <<= UTF16;
2184             }
2185             return ArraysSupport.mismatch(tv, toffset,
2186                     ov, ooffset, len) < 0;
2187         } else {
2188             if (coder == LATIN1) {
2189                 while (len-- > 0) {
2190                     if (StringLatin1.getChar(tv, toffset++) !=
2191                         StringUTF16.getChar(ov, ooffset++)) {
2192                         return false;
2193                     }
2194                 }
2195             } else {
2196                 while (len-- > 0) {
2197                     if (StringUTF16.getChar(tv, toffset++) !=
2198                         StringLatin1.getChar(ov, ooffset++)) {
2199                         return false;
2200                     }
2201                 }
2202             }
2203         }
2204         return true;
2205     }
2206 
2207     /**
2208      * Tests if two string regions are equal.
2209      * <p>
2210      * A substring of this {@code String} object is compared to a substring
2211      * of the argument {@code other}. The result is {@code true} if these
2212      * substrings represent Unicode code point sequences that are the same,
2213      * ignoring case if and only if {@code ignoreCase} is true.
2214      * The sequences {@code tsequence} and {@code osequence} are compared,
2215      * where {@code tsequence} is the sequence produced as if by calling
2216      * {@code this.substring(toffset, toffset + len).codePoints()} and
2217      * {@code osequence} is the sequence produced as if by calling
2218      * {@code other.substring(ooffset, ooffset + len).codePoints()}.
2219      * The result is {@code true} if and only if all of the following
2220      * are true:
2221      * <ul><li>{@code toffset} is non-negative.
2222      * <li>{@code ooffset} is non-negative.
2223      * <li>{@code toffset+len} is less than or equal to the length of this
2224      * {@code String} object.
2225      * <li>{@code ooffset+len} is less than or equal to the length of the other
2226      * argument.
2227      * <li>if {@code ignoreCase} is {@code false}, all pairs of corresponding Unicode
2228      * code points are equal integer values; or if {@code ignoreCase} is {@code true},
2229      * {@link Character#toLowerCase(int) Character.toLowerCase(}
2230      * {@link Character#toUpperCase(int)}{@code )} on all pairs of Unicode code points
2231      * results in equal integer values.
2232      * </ul>
2233      *
2234      * <p>Note that this method does <em>not</em> take locale into account,
2235      * and will result in unsatisfactory results for certain locales when
2236      * {@code ignoreCase} is {@code true}.  The {@link java.text.Collator} class
2237      * provides locale-sensitive comparison.
2238      *
2239      * @param   ignoreCase   if {@code true}, ignore case when comparing
2240      *                       characters.
2241      * @param   toffset      the starting offset of the subregion in this
2242      *                       string.
2243      * @param   other        the string argument.
2244      * @param   ooffset      the starting offset of the subregion in the string
2245      *                       argument.
2246      * @param   len          the number of characters (Unicode code units -
2247      *                       16bit {@code char} value) to compare.
2248      * @return  {@code true} if the specified subregion of this string
2249      *          matches the specified subregion of the string argument;
2250      *          {@code false} otherwise. Whether the matching is exact
2251      *          or case insensitive depends on the {@code ignoreCase}
2252      *          argument.
2253      * @see     #codePoints()
2254      */
2255     public boolean regionMatches(boolean ignoreCase, int toffset,
2256             String other, int ooffset, int len) {
2257         if (!ignoreCase) {
2258             return regionMatches(toffset, other, ooffset, len);
2259         }
2260         // Note: toffset, ooffset, or len might be near -1>>>1.
2261         if ((ooffset < 0) || (toffset < 0)
2262                 || (toffset > (long)length() - len)
2263                 || (ooffset > (long)other.length() - len)) {
2264             return false;
2265         }
2266         byte[] tv = value;
2267         byte[] ov = other.value;
2268         byte coder = coder();
2269         if (coder == other.coder()) {
2270             return coder == LATIN1
2271               ? StringLatin1.regionMatchesCI(tv, toffset, ov, ooffset, len)
2272               : StringUTF16.regionMatchesCI(tv, toffset, ov, ooffset, len);
2273         }
2274         return coder == LATIN1
2275               ? StringLatin1.regionMatchesCI_UTF16(tv, toffset, ov, ooffset, len)
2276               : StringUTF16.regionMatchesCI_Latin1(tv, toffset, ov, ooffset, len);
2277     }
2278 
2279     /**
2280      * Tests if the substring of this string beginning at the
2281      * specified index starts with the specified prefix.
2282      *
2283      * @param   prefix    the prefix.
2284      * @param   toffset   where to begin looking in this string.
2285      * @return  {@code true} if the character sequence represented by the
2286      *          argument is a prefix of the substring of this object starting
2287      *          at index {@code toffset}; {@code false} otherwise.
2288      *          The result is {@code false} if {@code toffset} is
2289      *          negative or greater than the length of this
2290      *          {@code String} object; otherwise the result is the same
2291      *          as the result of the expression
2292      *          <pre>
2293      *          this.substring(toffset).startsWith(prefix)
2294      *          </pre>
2295      */
2296     public boolean startsWith(String prefix, int toffset) {
2297         // Note: toffset might be near -1>>>1.
2298         if (toffset < 0 || toffset > length() - prefix.length()) {
2299             return false;
2300         }
2301         byte[] ta = value;
2302         byte[] pa = prefix.value;
2303         int po = 0;
2304         int pc = pa.length;
2305         byte coder = coder();
2306         if (coder == prefix.coder()) {
2307             if (coder == UTF16) {
2308                 toffset <<= UTF16;
2309             }
2310             return ArraysSupport.mismatch(ta, toffset,
2311                     pa, 0, pc) < 0;
2312         } else {
2313             if (coder == LATIN1) {  // && pcoder == UTF16
2314                 return false;
2315             }
2316             // coder == UTF16 && pcoder == LATIN1)
2317             while (po < pc) {
2318                 if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) {
2319                     return false;
2320                }
2321             }
2322         }
2323         return true;
2324     }
2325 
2326     /**
2327      * Tests if this string starts with the specified prefix.
2328      *
2329      * @param   prefix   the prefix.
2330      * @return  {@code true} if the character sequence represented by the
2331      *          argument is a prefix of the character sequence represented by
2332      *          this string; {@code false} otherwise.
2333      *          Note also that {@code true} will be returned if the
2334      *          argument is an empty string or is equal to this
2335      *          {@code String} object as determined by the
2336      *          {@link #equals(Object)} method.
2337      * @since   1.0
2338      */
2339     public boolean startsWith(String prefix) {
2340         return startsWith(prefix, 0);
2341     }
2342 
2343     /**
2344      * Tests if this string ends with the specified suffix.
2345      *
2346      * @param   suffix   the suffix.
2347      * @return  {@code true} if the character sequence represented by the
2348      *          argument is a suffix of the character sequence represented by
2349      *          this object; {@code false} otherwise. Note that the
2350      *          result will be {@code true} if the argument is the
2351      *          empty string or is equal to this {@code String} object
2352      *          as determined by the {@link #equals(Object)} method.
2353      */
2354     public boolean endsWith(String suffix) {
2355         return startsWith(suffix, length() - suffix.length());
2356     }
2357 
2358     /**
2359      * Returns a hash code for this string. The hash code for a
2360      * {@code String} object is computed as
2361      * <blockquote><pre>
2362      * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
2363      * </pre></blockquote>
2364      * using {@code int} arithmetic, where {@code s[i]} is the
2365      * <i>i</i>th character of the string, {@code n} is the length of
2366      * the string, and {@code ^} indicates exponentiation.
2367      * (The hash value of the empty string is zero.)
2368      *
2369      * @return  a hash code value for this object.
2370      */
2371     public int hashCode() {
2372         // The hash or hashIsZero fields are subject to a benign data race,
2373         // making it crucial to ensure that any observable result of the
2374         // calculation in this method stays correct under any possible read of
2375         // these fields. Necessary restrictions to allow this to be correct
2376         // without explicit memory fences or similar concurrency primitives is
2377         // that we can ever only write to one of these two fields for a given
2378         // String instance, and that the computation is idempotent and derived
2379         // from immutable state
2380         int h = hash;
2381         if (h == 0 && !hashIsZero) {
2382             h = isLatin1() ? StringLatin1.hashCode(value)
2383                            : StringUTF16.hashCode(value);
2384             if (h == 0) {
2385                 hashIsZero = true;
2386             } else {
2387                 hash = h;
2388             }
2389         }
2390         return h;
2391     }
2392 
2393     /**
2394      * Returns the index within this string of the first occurrence of
2395      * the specified character. If a character with value
2396      * {@code ch} occurs in the character sequence represented by
2397      * this {@code String} object, then the index (in Unicode
2398      * code units) of the first such occurrence is returned. For
2399      * values of {@code ch} in the range from 0 to 0xFFFF
2400      * (inclusive), this is the smallest value <i>k</i> such that:
2401      * <blockquote><pre>
2402      * this.charAt(<i>k</i>) == ch
2403      * </pre></blockquote>
2404      * is true. For other values of {@code ch}, it is the
2405      * smallest value <i>k</i> such that:
2406      * <blockquote><pre>
2407      * this.codePointAt(<i>k</i>) == ch
2408      * </pre></blockquote>
2409      * is true. In either case, if no such character occurs in this
2410      * string, then {@code -1} is returned.
2411      *
2412      * @param   ch   a character (Unicode code point).
2413      * @return  the index of the first occurrence of the character in the
2414      *          character sequence represented by this object, or
2415      *          {@code -1} if the character does not occur.
2416      */
2417     public int indexOf(int ch) {
2418         return indexOf(ch, 0);
2419     }
2420 
2421     /**
2422      * Returns the index within this string of the first occurrence of the
2423      * specified character, starting the search at the specified index.
2424      * <p>
2425      * If a character with value {@code ch} occurs in the
2426      * character sequence represented by this {@code String}
2427      * object at an index no smaller than {@code fromIndex}, then
2428      * the index of the first such occurrence is returned. For values
2429      * of {@code ch} in the range from 0 to 0xFFFF (inclusive),
2430      * this is the smallest value <i>k</i> such that:
2431      * <blockquote><pre>
2432      * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &gt;= fromIndex)
2433      * </pre></blockquote>
2434      * is true. For other values of {@code ch}, it is the
2435      * smallest value <i>k</i> such that:
2436      * <blockquote><pre>
2437      * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &gt;= fromIndex)
2438      * </pre></blockquote>
2439      * is true. In either case, if no such character occurs in this
2440      * string at or after position {@code fromIndex}, then
2441      * {@code -1} is returned.
2442      *
2443      * <p>
2444      * There is no restriction on the value of {@code fromIndex}. If it
2445      * is negative, it has the same effect as if it were zero: this entire
2446      * string may be searched. If it is greater than the length of this
2447      * string, it has the same effect as if it were equal to the length of
2448      * this string: {@code -1} is returned.
2449      *
2450      * <p>All indices are specified in {@code char} values
2451      * (Unicode code units).
2452      *
2453      * @param   ch          a character (Unicode code point).
2454      * @param   fromIndex   the index to start the search from.
2455      * @return  the index of the first occurrence of the character in the
2456      *          character sequence represented by this object that is greater
2457      *          than or equal to {@code fromIndex}, or {@code -1}
2458      *          if the character does not occur.
2459      *
2460      * @apiNote
2461      * Unlike {@link #substring(int)}, for example, this method does not throw
2462      * an exception when {@code fromIndex} is outside the valid range.
2463      * Rather, it returns -1 when {@code fromIndex} is larger than the length of
2464      * the string.
2465      * This result is, by itself, indistinguishable from a genuine absence of
2466      * {@code ch} in the string.
2467      * If stricter behavior is needed, {@link #indexOf(int, int, int)}
2468      * should be considered instead.
2469      * On a {@link String} {@code s}, for example,
2470      * {@code s.indexOf(ch, fromIndex, s.length())} would throw if
2471      * {@code fromIndex} were larger than the string length, or were negative.
2472      */
2473     public int indexOf(int ch, int fromIndex) {
2474         return isLatin1() ? StringLatin1.indexOf(value, ch, fromIndex, length())
2475                 : StringUTF16.indexOf(value, ch, fromIndex, length());
2476     }
2477 
2478     /**
2479      * Returns the index within this string of the first occurrence of the
2480      * specified character, starting the search at {@code beginIndex} and
2481      * stopping before {@code endIndex}.
2482      *
2483      * <p>If a character with value {@code ch} occurs in the
2484      * character sequence represented by this {@code String}
2485      * object at an index no smaller than {@code beginIndex} but smaller than
2486      * {@code endIndex}, then
2487      * the index of the first such occurrence is returned. For values
2488      * of {@code ch} in the range from 0 to 0xFFFF (inclusive),
2489      * this is the smallest value <i>k</i> such that:
2490      * <blockquote><pre>
2491      * (this.charAt(<i>k</i>) == ch) &amp;&amp; (beginIndex &lt;= <i>k</i> &lt; endIndex)
2492      * </pre></blockquote>
2493      * is true. For other values of {@code ch}, it is the
2494      * smallest value <i>k</i> such that:
2495      * <blockquote><pre>
2496      * (this.codePointAt(<i>k</i>) == ch) &amp;&amp; (beginIndex &lt;= <i>k</i> &lt; endIndex)
2497      * </pre></blockquote>
2498      * is true. In either case, if no such character occurs in this
2499      * string at or after position {@code beginIndex} and before position
2500      * {@code endIndex}, then {@code -1} is returned.
2501      *
2502      * <p>All indices are specified in {@code char} values
2503      * (Unicode code units).
2504      *
2505      * @param   ch          a character (Unicode code point).
2506      * @param   beginIndex  the index to start the search from (included).
2507      * @param   endIndex    the index to stop the search at (excluded).
2508      * @return  the index of the first occurrence of the character in the
2509      *          character sequence represented by this object that is greater
2510      *          than or equal to {@code beginIndex} and less than {@code endIndex},
2511      *          or {@code -1} if the character does not occur.
2512      * @throws  StringIndexOutOfBoundsException if {@code beginIndex}
2513      *          is negative, or {@code endIndex} is larger than the length of
2514      *          this {@code String} object, or {@code beginIndex} is larger than
2515      *          {@code endIndex}.
2516      * @since   21
2517      */
2518     public int indexOf(int ch, int beginIndex, int endIndex) {
2519         checkBoundsBeginEnd(beginIndex, endIndex, length());
2520         return isLatin1() ? StringLatin1.indexOf(value, ch, beginIndex, endIndex)
2521                 : StringUTF16.indexOf(value, ch, beginIndex, endIndex);
2522     }
2523 
2524     /**
2525      * Returns the index within this string of the last occurrence of
2526      * the specified character. For values of {@code ch} in the
2527      * range from 0 to 0xFFFF (inclusive), the index (in Unicode code
2528      * units) returned is the largest value <i>k</i> such that:
2529      * <blockquote><pre>
2530      * this.charAt(<i>k</i>) == ch
2531      * </pre></blockquote>
2532      * is true. For other values of {@code ch}, it is the
2533      * largest value <i>k</i> such that:
2534      * <blockquote><pre>
2535      * this.codePointAt(<i>k</i>) == ch
2536      * </pre></blockquote>
2537      * is true.  In either case, if no such character occurs in this
2538      * string, then {@code -1} is returned.  The
2539      * {@code String} is searched backwards starting at the last
2540      * character.
2541      *
2542      * @param   ch   a character (Unicode code point).
2543      * @return  the index of the last occurrence of the character in the
2544      *          character sequence represented by this object, or
2545      *          {@code -1} if the character does not occur.
2546      */
2547     public int lastIndexOf(int ch) {
2548         return lastIndexOf(ch, length() - 1);
2549     }
2550 
2551     /**
2552      * Returns the index within this string of the last occurrence of
2553      * the specified character, searching backward starting at the
2554      * specified index. For values of {@code ch} in the range
2555      * from 0 to 0xFFFF (inclusive), the index returned is the largest
2556      * value <i>k</i> such that:
2557      * <blockquote><pre>
2558      * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &lt;= fromIndex)
2559      * </pre></blockquote>
2560      * is true. For other values of {@code ch}, it is the
2561      * largest value <i>k</i> such that:
2562      * <blockquote><pre>
2563      * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &lt;= fromIndex)
2564      * </pre></blockquote>
2565      * is true. In either case, if no such character occurs in this
2566      * string at or before position {@code fromIndex}, then
2567      * {@code -1} is returned.
2568      *
2569      * <p>All indices are specified in {@code char} values
2570      * (Unicode code units).
2571      *
2572      * @param   ch          a character (Unicode code point).
2573      * @param   fromIndex   the index to start the search from. There is no
2574      *          restriction on the value of {@code fromIndex}. If it is
2575      *          greater than or equal to the length of this string, it has
2576      *          the same effect as if it were equal to one less than the
2577      *          length of this string: this entire string may be searched.
2578      *          If it is negative, it has the same effect as if it were -1:
2579      *          -1 is returned.
2580      * @return  the index of the last occurrence of the character in the
2581      *          character sequence represented by this object that is less
2582      *          than or equal to {@code fromIndex}, or {@code -1}
2583      *          if the character does not occur before that point.
2584      */
2585     public int lastIndexOf(int ch, int fromIndex) {
2586         return isLatin1() ? StringLatin1.lastIndexOf(value, ch, fromIndex)
2587                           : StringUTF16.lastIndexOf(value, ch, fromIndex);
2588     }
2589 
2590     /**
2591      * Returns the index within this string of the first occurrence of the
2592      * specified substring.
2593      *
2594      * <p>The returned index is the smallest value {@code k} for which:
2595      * <pre>{@code
2596      * this.startsWith(str, k)
2597      * }</pre>
2598      * If no such value of {@code k} exists, then {@code -1} is returned.
2599      *
2600      * @param   str   the substring to search for.
2601      * @return  the index of the first occurrence of the specified substring,
2602      *          or {@code -1} if there is no such occurrence.
2603      */
2604     public int indexOf(String str) {
2605         byte coder = coder();
2606         if (coder == str.coder()) {
2607             return isLatin1() ? StringLatin1.indexOf(value, str.value)
2608                               : StringUTF16.indexOf(value, str.value);
2609         }
2610         if (coder == LATIN1) {  // str.coder == UTF16
2611             return -1;
2612         }
2613         return StringUTF16.indexOfLatin1(value, str.value);
2614     }
2615 
2616     /**
2617      * Returns the index within this string of the first occurrence of the
2618      * specified substring, starting at the specified index.
2619      *
2620      * <p>The returned index is the smallest value {@code k} for which:
2621      * <pre>{@code
2622      *     k >= Math.min(fromIndex, this.length()) &&
2623      *                   this.startsWith(str, k)
2624      * }</pre>
2625      * If no such value of {@code k} exists, then {@code -1} is returned.
2626      *
2627      * @apiNote
2628      * Unlike {@link #substring(int)}, for example, this method does not throw
2629      * an exception when {@code fromIndex} is outside the valid range.
2630      * Rather, it returns -1 when {@code fromIndex} is larger than the length of
2631      * the string.
2632      * This result is, by itself, indistinguishable from a genuine absence of
2633      * {@code str} in the string.
2634      * If stricter behavior is needed, {@link #indexOf(String, int, int)}
2635      * should be considered instead.
2636      * On {@link String} {@code s} and a non-empty {@code str}, for example,
2637      * {@code s.indexOf(str, fromIndex, s.length())} would throw if
2638      * {@code fromIndex} were larger than the string length, or were negative.
2639      *
2640      * @param   str         the substring to search for.
2641      * @param   fromIndex   the index from which to start the search.
2642      * @return  the index of the first occurrence of the specified substring,
2643      *          starting at the specified index,
2644      *          or {@code -1} if there is no such occurrence.
2645      */
2646     public int indexOf(String str, int fromIndex) {
2647         return indexOf(value, coder(), length(), str, fromIndex);
2648     }
2649 
2650     /**
2651      * Returns the index of the first occurrence of the specified substring
2652      * within the specified index range of {@code this} string.
2653      *
2654      * <p>This method returns the same result as the one of the invocation
2655      * <pre>{@code
2656      *     s.substring(beginIndex, endIndex).indexOf(str) + beginIndex
2657      * }</pre>
2658      * if the index returned by {@link #indexOf(String)} is non-negative,
2659      * and returns -1 otherwise.
2660      * (No substring is instantiated, though.)
2661      *
2662      * @param   str         the substring to search for.
2663      * @param   beginIndex  the index to start the search from (included).
2664      * @param   endIndex    the index to stop the search at (excluded).
2665      * @return  the index of the first occurrence of the specified substring
2666      *          within the specified index range,
2667      *          or {@code -1} if there is no such occurrence.
2668      * @throws  StringIndexOutOfBoundsException if {@code beginIndex}
2669      *          is negative, or {@code endIndex} is larger than the length of
2670      *          this {@code String} object, or {@code beginIndex} is larger than
2671      *          {@code endIndex}.
2672      * @since   21
2673      */
2674     public int indexOf(String str, int beginIndex, int endIndex) {
2675         if (str.length() == 1) {
2676             /* Simple optimization, can be omitted without behavioral impact */
2677             return indexOf(str.charAt(0), beginIndex, endIndex);
2678         }
2679         checkBoundsBeginEnd(beginIndex, endIndex, length());
2680         return indexOf(value, coder(), endIndex, str, beginIndex);
2681     }
2682 
2683     /**
2684      * Code shared by String and AbstractStringBuilder to do searches. The
2685      * source is the character array being searched, and the target
2686      * is the string being searched for.
2687      *
2688      * @param   src       the characters being searched.
2689      * @param   srcCoder  the coder of the source string.
2690      * @param   srcCount  last index (exclusive) in the source string.
2691      * @param   tgtStr    the characters being searched for.
2692      * @param   fromIndex the index to begin searching from.
2693      */
2694     static int indexOf(byte[] src, byte srcCoder, int srcCount,
2695                        String tgtStr, int fromIndex) {
2696         fromIndex = Math.clamp(fromIndex, 0, srcCount);
2697         int tgtCount = tgtStr.length();
2698         if (tgtCount > srcCount - fromIndex) {
2699             return -1;
2700         }
2701         if (tgtCount == 0) {
2702             return fromIndex;
2703         }
2704 
2705         byte[] tgt = tgtStr.value;
2706         byte tgtCoder = tgtStr.coder();
2707         if (srcCoder == tgtCoder) {
2708             return srcCoder == LATIN1
2709                 ? StringLatin1.indexOf(src, srcCount, tgt, tgtCount, fromIndex)
2710                 : StringUTF16.indexOf(src, srcCount, tgt, tgtCount, fromIndex);
2711         }
2712         if (srcCoder == LATIN1) {    //  && tgtCoder == UTF16
2713             return -1;
2714         }
2715         // srcCoder == UTF16 && tgtCoder == LATIN1) {
2716         return StringUTF16.indexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex);
2717     }
2718 
2719     /**
2720      * Returns the index within this string of the last occurrence of the
2721      * specified substring.  The last occurrence of the empty string ""
2722      * is considered to occur at the index value {@code this.length()}.
2723      *
2724      * <p>The returned index is the largest value {@code k} for which:
2725      * <pre>{@code
2726      * this.startsWith(str, k)
2727      * }</pre>
2728      * If no such value of {@code k} exists, then {@code -1} is returned.
2729      *
2730      * @param   str   the substring to search for.
2731      * @return  the index of the last occurrence of the specified substring,
2732      *          or {@code -1} if there is no such occurrence.
2733      */
2734     public int lastIndexOf(String str) {
2735         return lastIndexOf(str, length());
2736     }
2737 
2738     /**
2739      * Returns the index within this string of the last occurrence of the
2740      * specified substring, searching backward starting at the specified index.
2741      *
2742      * <p>The returned index is the largest value {@code k} for which:
2743      * <pre>{@code
2744      *     k <= Math.min(fromIndex, this.length()) &&
2745      *                   this.startsWith(str, k)
2746      * }</pre>
2747      * If no such value of {@code k} exists, then {@code -1} is returned.
2748      *
2749      * @param   str         the substring to search for.
2750      * @param   fromIndex   the index to start the search from.
2751      * @return  the index of the last occurrence of the specified substring,
2752      *          searching backward from the specified index,
2753      *          or {@code -1} if there is no such occurrence.
2754      */
2755     public int lastIndexOf(String str, int fromIndex) {
2756         return lastIndexOf(value, coder(), length(), str, fromIndex);
2757     }
2758 
2759     /**
2760      * Code shared by String and AbstractStringBuilder to do searches. The
2761      * source is the character array being searched, and the target
2762      * is the string being searched for.
2763      *
2764      * @param   src         the characters being searched.
2765      * @param   srcCoder    coder handles the mapping between bytes/chars
2766      * @param   srcCount    count of the source string.
2767      * @param   tgtStr      the characters being searched for.
2768      * @param   fromIndex   the index to begin searching from.
2769      */
2770     static int lastIndexOf(byte[] src, byte srcCoder, int srcCount,
2771                            String tgtStr, int fromIndex) {
2772         byte[] tgt = tgtStr.value;
2773         byte tgtCoder = tgtStr.coder();
2774         int tgtCount = tgtStr.length();
2775         /*
2776          * Check arguments; return immediately where possible. For
2777          * consistency, don't check for null str.
2778          */
2779         int rightIndex = srcCount - tgtCount;
2780         if (fromIndex > rightIndex) {
2781             fromIndex = rightIndex;
2782         }
2783         if (fromIndex < 0) {
2784             return -1;
2785         }
2786         /* Empty string always matches. */
2787         if (tgtCount == 0) {
2788             return fromIndex;
2789         }
2790         if (srcCoder == tgtCoder) {
2791             return srcCoder == LATIN1
2792                 ? StringLatin1.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex)
2793                 : StringUTF16.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex);
2794         }
2795         if (srcCoder == LATIN1) {    // && tgtCoder == UTF16
2796             return -1;
2797         }
2798         // srcCoder == UTF16 && tgtCoder == LATIN1
2799         return StringUTF16.lastIndexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex);
2800     }
2801 
2802     /**
2803      * Returns a string that is a substring of this string. The
2804      * substring begins with the character at the specified index and
2805      * extends to the end of this string. <p>
2806      * Examples:
2807      * <blockquote><pre>
2808      * "unhappy".substring(2) returns "happy"
2809      * "Harbison".substring(3) returns "bison"
2810      * "emptiness".substring(9) returns "" (an empty string)
2811      * </pre></blockquote>
2812      *
2813      * @param      beginIndex   the beginning index, inclusive.
2814      * @return     the specified substring.
2815      * @throws     IndexOutOfBoundsException  if
2816      *             {@code beginIndex} is negative or larger than the
2817      *             length of this {@code String} object.
2818      */
2819     public String substring(int beginIndex) {
2820         return substring(beginIndex, length());
2821     }
2822 
2823     /**
2824      * Returns a string that is a substring of this string. The
2825      * substring begins at the specified {@code beginIndex} and
2826      * extends to the character at index {@code endIndex - 1}.
2827      * Thus the length of the substring is {@code endIndex-beginIndex}.
2828      * <p>
2829      * Examples:
2830      * <blockquote><pre>
2831      * "hamburger".substring(4, 8) returns "urge"
2832      * "smiles".substring(1, 5) returns "mile"
2833      * </pre></blockquote>
2834      *
2835      * @param      beginIndex   the beginning index, inclusive.
2836      * @param      endIndex     the ending index, exclusive.
2837      * @return     the specified substring.
2838      * @throws     IndexOutOfBoundsException  if the
2839      *             {@code beginIndex} is negative, or
2840      *             {@code endIndex} is larger than the length of
2841      *             this {@code String} object, or
2842      *             {@code beginIndex} is larger than
2843      *             {@code endIndex}.
2844      */
2845     public String substring(int beginIndex, int endIndex) {
2846         int length = length();
2847         checkBoundsBeginEnd(beginIndex, endIndex, length);
2848         if (beginIndex == 0 && endIndex == length) {
2849             return this;
2850         }
2851         int subLen = endIndex - beginIndex;
2852         return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen)
2853                           : StringUTF16.newString(value, beginIndex, subLen);
2854     }
2855 
2856     /**
2857      * Returns a character sequence that is a subsequence of this sequence.
2858      *
2859      * <p> An invocation of this method of the form
2860      *
2861      * <blockquote><pre>
2862      * str.subSequence(begin,&nbsp;end)</pre></blockquote>
2863      *
2864      * behaves in exactly the same way as the invocation
2865      *
2866      * <blockquote><pre>
2867      * str.substring(begin,&nbsp;end)</pre></blockquote>
2868      *
2869      * @apiNote
2870      * This method is defined so that the {@code String} class can implement
2871      * the {@link CharSequence} interface.
2872      *
2873      * @param   beginIndex   the begin index, inclusive.
2874      * @param   endIndex     the end index, exclusive.
2875      * @return  the specified subsequence.
2876      *
2877      * @throws  IndexOutOfBoundsException
2878      *          if {@code beginIndex} or {@code endIndex} is negative,
2879      *          if {@code endIndex} is greater than {@code length()},
2880      *          or if {@code beginIndex} is greater than {@code endIndex}
2881      *
2882      * @since 1.4
2883      */
2884     public CharSequence subSequence(int beginIndex, int endIndex) {
2885         return this.substring(beginIndex, endIndex);
2886     }
2887 
2888     /**
2889      * Concatenates the specified string to the end of this string.
2890      * <p>
2891      * If the length of the argument string is {@code 0}, then this
2892      * {@code String} object is returned. Otherwise, a
2893      * {@code String} object is returned that represents a character
2894      * sequence that is the concatenation of the character sequence
2895      * represented by this {@code String} object and the character
2896      * sequence represented by the argument string.<p>
2897      * Examples:
2898      * <blockquote><pre>
2899      * "cares".concat("s") returns "caress"
2900      * "to".concat("get").concat("her") returns "together"
2901      * </pre></blockquote>
2902      *
2903      * @param   str   the {@code String} that is concatenated to the end
2904      *                of this {@code String}.
2905      * @return  a string that represents the concatenation of this object's
2906      *          characters followed by the string argument's characters.
2907      */
2908     public String concat(String str) {
2909         if (str.isEmpty()) {
2910             return this;
2911         }
2912         return StringConcatHelper.simpleConcat(this, str);
2913     }
2914 
2915     /**
2916      * Returns a string resulting from replacing all occurrences of
2917      * {@code oldChar} in this string with {@code newChar}.
2918      * <p>
2919      * If the character {@code oldChar} does not occur in the
2920      * character sequence represented by this {@code String} object,
2921      * then a reference to this {@code String} object is returned.
2922      * Otherwise, a {@code String} object is returned that
2923      * represents a character sequence identical to the character sequence
2924      * represented by this {@code String} object, except that every
2925      * occurrence of {@code oldChar} is replaced by an occurrence
2926      * of {@code newChar}.
2927      * <p>
2928      * Examples:
2929      * <blockquote><pre>
2930      * "mesquite in your cellar".replace('e', 'o')
2931      *         returns "mosquito in your collar"
2932      * "the war of baronets".replace('r', 'y')
2933      *         returns "the way of bayonets"
2934      * "sparring with a purple porpoise".replace('p', 't')
2935      *         returns "starring with a turtle tortoise"
2936      * "JonL".replace('q', 'x') returns "JonL" (no change)
2937      * </pre></blockquote>
2938      *
2939      * @param   oldChar   the old character.
2940      * @param   newChar   the new character.
2941      * @return  a string derived from this string by replacing every
2942      *          occurrence of {@code oldChar} with {@code newChar}.
2943      */
2944     public String replace(char oldChar, char newChar) {
2945         if (oldChar != newChar) {
2946             String ret = isLatin1() ? StringLatin1.replace(value, oldChar, newChar)
2947                                     : StringUTF16.replace(value, oldChar, newChar);
2948             if (ret != null) {
2949                 return ret;
2950             }
2951         }
2952         return this;
2953     }
2954 
2955     /**
2956      * Tells whether or not this string matches the given <a
2957      * href="../util/regex/Pattern.html#sum">regular expression</a>.
2958      *
2959      * <p> An invocation of this method of the form
2960      * <i>str</i>{@code .matches(}<i>regex</i>{@code )} yields exactly the
2961      * same result as the expression
2962      *
2963      * <blockquote>
2964      * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#matches(String,CharSequence)
2965      * matches(<i>regex</i>, <i>str</i>)}
2966      * </blockquote>
2967      *
2968      * @param   regex
2969      *          the regular expression to which this string is to be matched
2970      *
2971      * @return  {@code true} if, and only if, this string matches the
2972      *          given regular expression
2973      *
2974      * @throws  PatternSyntaxException
2975      *          if the regular expression's syntax is invalid
2976      *
2977      * @see java.util.regex.Pattern
2978      *
2979      * @since 1.4
2980      */
2981     public boolean matches(String regex) {
2982         return Pattern.matches(regex, this);
2983     }
2984 
2985     /**
2986      * Returns true if and only if this string contains the specified
2987      * sequence of char values.
2988      *
2989      * @param s the sequence to search for
2990      * @return true if this string contains {@code s}, false otherwise
2991      * @since 1.5
2992      */
2993     public boolean contains(CharSequence s) {
2994         return indexOf(s.toString()) >= 0;
2995     }
2996 
2997     /**
2998      * Replaces the first substring of this string that matches the given <a
2999      * href="../util/regex/Pattern.html#sum">regular expression</a> with the
3000      * given replacement.
3001      *
3002      * <p> An invocation of this method of the form
3003      * <i>str</i>{@code .replaceFirst(}<i>regex</i>{@code ,} <i>repl</i>{@code )}
3004      * yields exactly the same result as the expression
3005      *
3006      * <blockquote>
3007      * <code>
3008      * {@link java.util.regex.Pattern}.{@link
3009      * java.util.regex.Pattern#compile(String) compile}(<i>regex</i>).{@link
3010      * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link
3011      * java.util.regex.Matcher#replaceFirst(String) replaceFirst}(<i>repl</i>)
3012      * </code>
3013      * </blockquote>
3014      *
3015      *<p>
3016      * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the
3017      * replacement string may cause the results to be different than if it were
3018      * being treated as a literal replacement string; see
3019      * {@link java.util.regex.Matcher#replaceFirst}.
3020      * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
3021      * meaning of these characters, if desired.
3022      *
3023      * @param   regex
3024      *          the regular expression to which this string is to be matched
3025      * @param   replacement
3026      *          the string to be substituted for the first match
3027      *
3028      * @return  The resulting {@code String}
3029      *
3030      * @throws  PatternSyntaxException
3031      *          if the regular expression's syntax is invalid
3032      *
3033      * @see java.util.regex.Pattern
3034      *
3035      * @since 1.4
3036      */
3037     public String replaceFirst(String regex, String replacement) {
3038         return Pattern.compile(regex).matcher(this).replaceFirst(replacement);
3039     }
3040 
3041     /**
3042      * Replaces each substring of this string that matches the given <a
3043      * href="../util/regex/Pattern.html#sum">regular expression</a> with the
3044      * given replacement.
3045      *
3046      * <p> An invocation of this method of the form
3047      * <i>str</i>{@code .replaceAll(}<i>regex</i>{@code ,} <i>repl</i>{@code )}
3048      * yields exactly the same result as the expression
3049      *
3050      * <blockquote>
3051      * <code>
3052      * {@link java.util.regex.Pattern}.{@link
3053      * java.util.regex.Pattern#compile(String) compile}(<i>regex</i>).{@link
3054      * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link
3055      * java.util.regex.Matcher#replaceAll(String) replaceAll}(<i>repl</i>)
3056      * </code>
3057      * </blockquote>
3058      *
3059      *<p>
3060      * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the
3061      * replacement string may cause the results to be different than if it were
3062      * being treated as a literal replacement string; see
3063      * {@link java.util.regex.Matcher#replaceAll Matcher.replaceAll}.
3064      * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
3065      * meaning of these characters, if desired.
3066      *
3067      * @param   regex
3068      *          the regular expression to which this string is to be matched
3069      * @param   replacement
3070      *          the string to be substituted for each match
3071      *
3072      * @return  The resulting {@code String}
3073      *
3074      * @throws  PatternSyntaxException
3075      *          if the regular expression's syntax is invalid
3076      *
3077      * @see java.util.regex.Pattern
3078      *
3079      * @since 1.4
3080      */
3081     public String replaceAll(String regex, String replacement) {
3082         return Pattern.compile(regex).matcher(this).replaceAll(replacement);
3083     }
3084 
3085     /**
3086      * Replaces each substring of this string that matches the literal target
3087      * sequence with the specified literal replacement sequence. The
3088      * replacement proceeds from the beginning of the string to the end, for
3089      * example, replacing "aa" with "b" in the string "aaa" will result in
3090      * "ba" rather than "ab".
3091      *
3092      * @param  target The sequence of char values to be replaced
3093      * @param  replacement The replacement sequence of char values
3094      * @return  The resulting string
3095      * @since 1.5
3096      */
3097     public String replace(CharSequence target, CharSequence replacement) {
3098         String trgtStr = target.toString();
3099         String replStr = replacement.toString();
3100         int thisLen = length();
3101         int trgtLen = trgtStr.length();
3102         int replLen = replStr.length();
3103 
3104         if (trgtLen > 0) {
3105             if (trgtLen == 1 && replLen == 1) {
3106                 return replace(trgtStr.charAt(0), replStr.charAt(0));
3107             }
3108 
3109             boolean thisIsLatin1 = this.isLatin1();
3110             boolean trgtIsLatin1 = trgtStr.isLatin1();
3111             boolean replIsLatin1 = replStr.isLatin1();
3112             String ret = (thisIsLatin1 && trgtIsLatin1 && replIsLatin1)
3113                     ? StringLatin1.replace(value, thisLen,
3114                                            trgtStr.value, trgtLen,
3115                                            replStr.value, replLen)
3116                     : StringUTF16.replace(value, thisLen, thisIsLatin1,
3117                                           trgtStr.value, trgtLen, trgtIsLatin1,
3118                                           replStr.value, replLen, replIsLatin1);
3119             if (ret != null) {
3120                 return ret;
3121             }
3122             return this;
3123 
3124         } else { // trgtLen == 0
3125             int resultLen;
3126             try {
3127                 resultLen = Math.addExact(thisLen, Math.multiplyExact(
3128                         Math.addExact(thisLen, 1), replLen));
3129             } catch (ArithmeticException ignored) {
3130                 throw new OutOfMemoryError("Required length exceeds implementation limit");
3131             }
3132 
3133             StringBuilder sb = new StringBuilder(resultLen);
3134             sb.append(replStr);
3135             for (int i = 0; i < thisLen; ++i) {
3136                 sb.append(charAt(i)).append(replStr);
3137             }
3138             return sb.toString();
3139         }
3140     }
3141 
3142     /**
3143      * Splits this string around matches of the given
3144      * <a href="../util/regex/Pattern.html#sum">regular expression</a>.
3145      *
3146      * <p> The array returned by this method contains each substring of this
3147      * string that is terminated by another substring that matches the given
3148      * expression or is terminated by the end of the string.  The substrings in
3149      * the array are in the order in which they occur in this string.  If the
3150      * expression does not match any part of the input then the resulting array
3151      * has just one element, namely this string.
3152      *
3153      * <p> When there is a positive-width match at the beginning of this
3154      * string then an empty leading substring is included at the beginning
3155      * of the resulting array. A zero-width match at the beginning however
3156      * never produces such empty leading substring.
3157      *
3158      * <p> The {@code limit} parameter controls the number of times the
3159      * pattern is applied and therefore affects the length of the resulting
3160      * array.
3161      * <ul>
3162      *    <li><p>
3163      *    If the <i>limit</i> is positive then the pattern will be applied
3164      *    at most <i>limit</i>&nbsp;-&nbsp;1 times, the array's length will be
3165      *    no greater than <i>limit</i>, and the array's last entry will contain
3166      *    all input beyond the last matched delimiter.</p></li>
3167      *
3168      *    <li><p>
3169      *    If the <i>limit</i> is zero then the pattern will be applied as
3170      *    many times as possible, the array can have any length, and trailing
3171      *    empty strings will be discarded.</p></li>
3172      *
3173      *    <li><p>
3174      *    If the <i>limit</i> is negative then the pattern will be applied
3175      *    as many times as possible and the array can have any length.</p></li>
3176      * </ul>
3177      *
3178      * <p> The string {@code "boo:and:foo"}, for example, yields the
3179      * following results with these parameters:
3180      *
3181      * <blockquote><table class="plain">
3182      * <caption style="display:none">Split example showing regex, limit, and result</caption>
3183      * <thead>
3184      * <tr>
3185      *     <th scope="col">Regex</th>
3186      *     <th scope="col">Limit</th>
3187      *     <th scope="col">Result</th>
3188      * </tr>
3189      * </thead>
3190      * <tbody>
3191      * <tr><th scope="row" rowspan="3" style="font-weight:normal">:</th>
3192      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th>
3193      *     <td>{@code { "boo", "and:foo" }}</td></tr>
3194      * <tr><!-- : -->
3195      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
3196      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
3197      * <tr><!-- : -->
3198      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
3199      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
3200      * <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th>
3201      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
3202      *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
3203      * <tr><!-- o -->
3204      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
3205      *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
3206      * <tr><!-- o -->
3207      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th>
3208      *     <td>{@code { "b", "", ":and:f" }}</td></tr>
3209      * </tbody>
3210      * </table></blockquote>
3211      *
3212      * <p> An invocation of this method of the form
3213      * <i>str.</i>{@code split(}<i>regex</i>{@code ,}&nbsp;<i>n</i>{@code )}
3214      * yields the same result as the expression
3215      *
3216      * <blockquote>
3217      * <code>
3218      * {@link java.util.regex.Pattern}.{@link
3219      * java.util.regex.Pattern#compile(String) compile}(<i>regex</i>).{@link
3220      * java.util.regex.Pattern#split(java.lang.CharSequence,int) split}(<i>str</i>,&nbsp;<i>n</i>)
3221      * </code>
3222      * </blockquote>
3223      *
3224      *
3225      * @param  regex
3226      *         the delimiting regular expression
3227      *
3228      * @param  limit
3229      *         the result threshold, as described above
3230      *
3231      * @return  the array of strings computed by splitting this string
3232      *          around matches of the given regular expression
3233      *
3234      * @throws  PatternSyntaxException
3235      *          if the regular expression's syntax is invalid
3236      *
3237      * @see java.util.regex.Pattern
3238      *
3239      * @since 1.4
3240      */
3241     public String[] split(String regex, int limit) {
3242         return split(regex, limit, false);
3243     }
3244 
3245     /**
3246      * Splits this string around matches of the given regular expression and
3247      * returns both the strings and the matching delimiters.
3248      *
3249      * <p> The array returned by this method contains each substring of this
3250      * string that is terminated by another substring that matches the given
3251      * expression or is terminated by the end of the string.
3252      * Each substring is immediately followed by the subsequence (the delimiter)
3253      * that matches the given expression, <em>except</em> for the last
3254      * substring, which is not followed by anything.
3255      * The substrings in the array and the delimiters are in the order in which
3256      * they occur in the input.
3257      * If the expression does not match any part of the input then the resulting
3258      * array has just one element, namely this string.
3259      *
3260      * <p> When there is a positive-width match at the beginning of this
3261      * string then an empty leading substring is included at the beginning
3262      * of the resulting array. A zero-width match at the beginning however
3263      * never produces such empty leading substring nor the empty delimiter.
3264      *
3265      * <p> The {@code limit} parameter controls the number of times the
3266      * pattern is applied and therefore affects the length of the resulting
3267      * array.
3268      * <ul>
3269      *    <li> If the <i>limit</i> is positive then the pattern will be applied
3270      *    at most <i>limit</i>&nbsp;-&nbsp;1 times, the array's length will be
3271      *    no greater than 2 &times; <i>limit</i> - 1, and the array's last
3272      *    entry will contain all input beyond the last matched delimiter.</li>
3273      *
3274      *    <li> If the <i>limit</i> is zero then the pattern will be applied as
3275      *    many times as possible, the array can have any length, and trailing
3276      *    empty strings will be discarded.</li>
3277      *
3278      *    <li> If the <i>limit</i> is negative then the pattern will be applied
3279      *    as many times as possible and the array can have any length.</li>
3280      * </ul>
3281      *
3282      * <p> The input {@code "boo:::and::foo"}, for example, yields the following
3283      * results with these parameters:
3284      *
3285      * <table class="plain" style="margin-left:2em;">
3286      * <caption style="display:none">Split example showing regex, limit, and result</caption>
3287      * <thead>
3288      * <tr>
3289      *     <th scope="col">Regex</th>
3290      *     <th scope="col">Limit</th>
3291      *     <th scope="col">Result</th>
3292      * </tr>
3293      * </thead>
3294      * <tbody>
3295      * <tr><th scope="row" rowspan="3" style="font-weight:normal">:+</th>
3296      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th>
3297      *     <td>{@code { "boo", ":::", "and::foo" }}</td></tr>
3298      * <tr><!-- : -->
3299      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
3300      *     <td>{@code { "boo", ":::", "and", "::", "foo" }}</td></tr>
3301      * <tr><!-- : -->
3302      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-1</th>
3303      *     <td>{@code { "boo", ":::", "and", "::", "foo" }}</td></tr>
3304      * <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th>
3305      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
3306      *     <td>{@code { "b", "o", "", "o", ":::and::f", "o", "", "o", "" }}</td></tr>
3307      * <tr><!-- o -->
3308      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-1</th>
3309      *     <td>{@code { "b", "o", "", "o", ":::and::f", "o", "", "o", "" }}</td></tr>
3310      * <tr><!-- o -->
3311      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th>
3312      *     <td>{@code { "b", "o", "", "o", ":::and::f", "o", "", "o" }}</td></tr>
3313      * </tbody>
3314      * </table>
3315      *
3316      * @apiNote An invocation of this method of the form
3317      * <i>str.</i>{@code splitWithDelimiters(}<i>regex</i>{@code ,}&nbsp;<i>n</i>{@code )}
3318      * yields the same result as the expression
3319      *
3320      * <blockquote>
3321      * <code>
3322      * {@link java.util.regex.Pattern}.{@link
3323      * java.util.regex.Pattern#compile(String) compile}(<i>regex</i>).{@link
3324      * java.util.regex.Pattern#splitWithDelimiters(CharSequence,int) splitWithDelimiters}(<i>str</i>,&nbsp;<i>n</i>)
3325      * </code>
3326      * </blockquote>
3327      *
3328      * @param  regex
3329      *         the delimiting regular expression
3330      *
3331      * @param  limit
3332      *         the result threshold, as described above
3333      *
3334      * @return  the array of strings computed by splitting this string
3335      *          around matches of the given regular expression, alternating
3336      *          substrings and matching delimiters
3337      *
3338      * @since   21
3339      */
3340     public String[] splitWithDelimiters(String regex, int limit) {
3341         return split(regex, limit, true);
3342     }
3343 
3344     private String[] split(String regex, int limit, boolean withDelimiters) {
3345         /* fastpath if the regex is a
3346          * (1) one-char String and this character is not one of the
3347          *     RegEx's meta characters ".$|()[{^?*+\\", or
3348          * (2) two-char String and the first char is the backslash and
3349          *     the second is not the ascii digit or ascii letter.
3350          */
3351         char ch = 0;
3352         if (((regex.length() == 1 &&
3353                 ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
3354                 (regex.length() == 2 &&
3355                         regex.charAt(0) == '\\' &&
3356                         (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
3357                         ((ch-'a')|('z'-ch)) < 0 &&
3358                         ((ch-'A')|('Z'-ch)) < 0)) &&
3359                 (ch < Character.MIN_HIGH_SURROGATE ||
3360                         ch > Character.MAX_LOW_SURROGATE))
3361         {
3362             // All the checks above can potentially be constant folded by
3363             // a JIT/AOT compiler when the regex is a constant string.
3364             // That requires method inlining of the checks, which is only
3365             // possible when the actual split logic is in a separate method
3366             // because the large split loop can usually not be inlined.
3367             return split(ch, limit, withDelimiters);
3368         }
3369         Pattern pattern = Pattern.compile(regex);
3370         return withDelimiters
3371                 ? pattern.splitWithDelimiters(this, limit)
3372                 : pattern.split(this, limit);
3373     }
3374 
3375     private String[] split(char ch, int limit, boolean withDelimiters) {
3376         int matchCount = 0;
3377         int off = 0;
3378         int next;
3379         boolean limited = limit > 0;
3380         ArrayList<String> list = new ArrayList<>();
3381         String del = withDelimiters ? String.valueOf(ch) : null;
3382         while ((next = indexOf(ch, off)) != -1) {
3383             if (!limited || matchCount < limit - 1) {
3384                 list.add(substring(off, next));
3385                 if (withDelimiters) {
3386                     list.add(del);
3387                 }
3388                 off = next + 1;
3389                 ++matchCount;
3390             } else {    // last one
3391                 int last = length();
3392                 list.add(substring(off, last));
3393                 off = last;
3394                 ++matchCount;
3395                 break;
3396             }
3397         }
3398         // If no match was found, return this
3399         if (off == 0)
3400             return new String[] {this};
3401 
3402         // Add remaining segment
3403         if (!limited || matchCount < limit)
3404             list.add(substring(off, length()));
3405 
3406         // Construct result
3407         int resultSize = list.size();
3408         if (limit == 0) {
3409             while (resultSize > 0 && list.get(resultSize - 1).isEmpty()) {
3410                 resultSize--;
3411             }
3412         }
3413         String[] result = new String[resultSize];
3414         return list.subList(0, resultSize).toArray(result);
3415     }
3416 
3417     /**
3418      * Splits this string around matches of the given <a
3419      * href="../util/regex/Pattern.html#sum">regular expression</a>.
3420      *
3421      * <p> This method works as if by invoking the two-argument {@link
3422      * #split(String, int) split} method with the given expression and a limit
3423      * argument of zero.  Trailing empty strings are therefore not included in
3424      * the resulting array.
3425      *
3426      * <p> The string {@code "boo:and:foo"}, for example, yields the following
3427      * results with these expressions:
3428      *
3429      * <blockquote><table class="plain">
3430      * <caption style="display:none">Split examples showing regex and result</caption>
3431      * <thead>
3432      * <tr>
3433      *  <th scope="col">Regex</th>
3434      *  <th scope="col">Result</th>
3435      * </tr>
3436      * </thead>
3437      * <tbody>
3438      * <tr><th scope="row" style="text-weight:normal">:</th>
3439      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
3440      * <tr><th scope="row" style="text-weight:normal">o</th>
3441      *     <td>{@code { "b", "", ":and:f" }}</td></tr>
3442      * </tbody>
3443      * </table></blockquote>
3444      *
3445      *
3446      * @param  regex
3447      *         the delimiting regular expression
3448      *
3449      * @return  the array of strings computed by splitting this string
3450      *          around matches of the given regular expression
3451      *
3452      * @throws  PatternSyntaxException
3453      *          if the regular expression's syntax is invalid
3454      *
3455      * @see java.util.regex.Pattern
3456      *
3457      * @since 1.4
3458      */
3459     public String[] split(String regex) {
3460         return split(regex, 0, false);
3461     }
3462 
3463     /**
3464      * Returns a new String composed of copies of the
3465      * {@code CharSequence elements} joined together with a copy of
3466      * the specified {@code delimiter}.
3467      *
3468      * <blockquote>For example,
3469      * <pre>{@code
3470      *     String message = String.join("-", "Java", "is", "cool");
3471      *     // message returned is: "Java-is-cool"
3472      * }</pre></blockquote>
3473      *
3474      * Note that if an element is null, then {@code "null"} is added.
3475      *
3476      * @param  delimiter the delimiter that separates each element
3477      * @param  elements the elements to join together.
3478      *
3479      * @return a new {@code String} that is composed of the {@code elements}
3480      *         separated by the {@code delimiter}
3481      *
3482      * @throws NullPointerException If {@code delimiter} or {@code elements}
3483      *         is {@code null}
3484      *
3485      * @see java.util.StringJoiner
3486      * @since 1.8
3487      */
3488     public static String join(CharSequence delimiter, CharSequence... elements) {
3489         var delim = delimiter.toString();
3490         var elems = new String[elements.length];
3491         for (int i = 0; i < elements.length; i++) {
3492             elems[i] = String.valueOf(elements[i]);
3493         }
3494         return join("", "", delim, elems, elems.length);
3495     }
3496 
3497     /**
3498      * Designated join routine.
3499      *
3500      * @param prefix the non-null prefix
3501      * @param suffix the non-null suffix
3502      * @param delimiter the non-null delimiter
3503      * @param elements the non-null array of non-null elements
3504      * @param size the number of elements in the array (<= elements.length)
3505      * @return the joined string
3506      */
3507     @ForceInline
3508     static String join(String prefix, String suffix, String delimiter, String[] elements, int size) {
3509         int icoder = prefix.coder() | suffix.coder();
3510         long len = (long) prefix.length() + suffix.length();
3511         if (size > 1) { // when there are more than one element, size - 1 delimiters will be emitted
3512             len += (long) (size - 1) * delimiter.length();
3513             icoder |= delimiter.coder();
3514         }
3515         // assert len > 0L; // max: (long) Integer.MAX_VALUE << 32
3516         // following loop will add max: (long) Integer.MAX_VALUE * Integer.MAX_VALUE to len
3517         // so len can overflow at most once
3518         for (int i = 0; i < size; i++) {
3519             var el = elements[i];
3520             len += el.length();
3521             icoder |= el.coder();
3522         }
3523         byte coder = (byte) icoder;
3524         // long len overflow check, char -> byte length, int len overflow check
3525         if (len < 0L || (len <<= coder) != (int) len) {
3526             throw new OutOfMemoryError("Requested string length exceeds VM limit");
3527         }
3528         byte[] value = StringConcatHelper.newArray(len);
3529 
3530         int off = 0;
3531         prefix.getBytes(value, off, coder); off += prefix.length();
3532         if (size > 0) {
3533             var el = elements[0];
3534             el.getBytes(value, off, coder); off += el.length();
3535             for (int i = 1; i < size; i++) {
3536                 delimiter.getBytes(value, off, coder); off += delimiter.length();
3537                 el = elements[i];
3538                 el.getBytes(value, off, coder); off += el.length();
3539             }
3540         }
3541         suffix.getBytes(value, off, coder);
3542         // assert off + suffix.length() == value.length >> coder;
3543 
3544         return new String(value, coder);
3545     }
3546 
3547     /**
3548      * Returns a new {@code String} composed of copies of the
3549      * {@code CharSequence elements} joined together with a copy of the
3550      * specified {@code delimiter}.
3551      *
3552      * <blockquote>For example,
3553      * <pre>{@code
3554      *     List<String> strings = List.of("Java", "is", "cool");
3555      *     String message = String.join(" ", strings);
3556      *     // message returned is: "Java is cool"
3557      *
3558      *     Set<String> strings =
3559      *         new LinkedHashSet<>(List.of("Java", "is", "very", "cool"));
3560      *     String message = String.join("-", strings);
3561      *     // message returned is: "Java-is-very-cool"
3562      * }</pre></blockquote>
3563      *
3564      * Note that if an individual element is {@code null}, then {@code "null"} is added.
3565      *
3566      * @param  delimiter a sequence of characters that is used to separate each
3567      *         of the {@code elements} in the resulting {@code String}
3568      * @param  elements an {@code Iterable} that will have its {@code elements}
3569      *         joined together.
3570      *
3571      * @return a new {@code String} that is composed from the {@code elements}
3572      *         argument
3573      *
3574      * @throws NullPointerException If {@code delimiter} or {@code elements}
3575      *         is {@code null}
3576      *
3577      * @see    #join(CharSequence,CharSequence...)
3578      * @see    java.util.StringJoiner
3579      * @since 1.8
3580      */
3581     public static String join(CharSequence delimiter,
3582             Iterable<? extends CharSequence> elements) {
3583         Objects.requireNonNull(delimiter);
3584         Objects.requireNonNull(elements);
3585         var delim = delimiter.toString();
3586         var elems = new String[8];
3587         int size = 0;
3588         for (CharSequence cs: elements) {
3589             if (size >= elems.length) {
3590                 elems = Arrays.copyOf(elems, elems.length << 1);
3591             }
3592             elems[size++] = String.valueOf(cs);
3593         }
3594         return join("", "", delim, elems, size);
3595     }
3596 
3597     /**
3598      * Converts all of the characters in this {@code String} to lower
3599      * case using the rules of the given {@code Locale}.  Case mapping is based
3600      * on the Unicode Standard version specified by the {@link java.lang.Character Character}
3601      * class. Since case mappings are not always 1:1 char mappings, the resulting {@code String}
3602      * and this {@code String} may differ in length.
3603      * <p>
3604      * Examples of lowercase mappings are in the following table:
3605      * <table class="plain">
3606      * <caption style="display:none">Lowercase mapping examples showing language code of locale, upper case, lower case, and description</caption>
3607      * <thead>
3608      * <tr>
3609      *   <th scope="col">Language Code of Locale</th>
3610      *   <th scope="col">Upper Case</th>
3611      *   <th scope="col">Lower Case</th>
3612      *   <th scope="col">Description</th>
3613      * </tr>
3614      * </thead>
3615      * <tbody>
3616      * <tr>
3617      *   <td>tr (Turkish)</td>
3618      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0130</th>
3619      *   <td>&#92;u0069</td>
3620      *   <td>capital letter I with dot above -&gt; small letter i</td>
3621      * </tr>
3622      * <tr>
3623      *   <td>tr (Turkish)</td>
3624      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0049</th>
3625      *   <td>&#92;u0131</td>
3626      *   <td>capital letter I -&gt; small letter dotless i </td>
3627      * </tr>
3628      * <tr>
3629      *   <td>(all)</td>
3630      *   <th scope="row" style="font-weight:normal; text-align:left">French Fries</th>
3631      *   <td>french fries</td>
3632      *   <td>lowercased all chars in String</td>
3633      * </tr>
3634      * <tr>
3635      *   <td>(all)</td>
3636      *   <th scope="row" style="font-weight:normal; text-align:left">
3637      *       &Iota;&Chi;&Theta;&Upsilon;&Sigma;</th>
3638      *   <td>&iota;&chi;&theta;&upsilon;&sigma;</td>
3639      *   <td>lowercased all chars in String</td>
3640      * </tr>
3641      * </tbody>
3642      * </table>
3643      *
3644      * @param locale use the case transformation rules for this locale
3645      * @return the {@code String}, converted to lowercase.
3646      * @see     java.lang.String#toLowerCase()
3647      * @see     java.lang.String#toUpperCase()
3648      * @see     java.lang.String#toUpperCase(Locale)
3649      * @since   1.1
3650      */
3651     public String toLowerCase(Locale locale) {
3652         return isLatin1() ? StringLatin1.toLowerCase(this, value, locale)
3653                           : StringUTF16.toLowerCase(this, value, locale);
3654     }
3655 
3656     /**
3657      * Converts all of the characters in this {@code String} to lower
3658      * case using the rules of the default locale. This method is equivalent to
3659      * {@code toLowerCase(Locale.getDefault())}.
3660      *
3661      * @apiNote This method is locale sensitive, and may produce unexpected
3662      * results if used for strings that are intended to be interpreted locale
3663      * independently.
3664      * Examples are programming language identifiers, protocol keys, and HTML
3665      * tags.
3666      * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale
3667      * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the
3668      * LATIN SMALL LETTER DOTLESS I character.
3669      * To obtain correct results for locale insensitive strings, use
3670      * {@code toLowerCase(Locale.ROOT)}.
3671      *
3672      * @return  the {@code String}, converted to lowercase.
3673      * @see     java.lang.String#toLowerCase(Locale)
3674      */
3675     public String toLowerCase() {
3676         return toLowerCase(Locale.getDefault());
3677     }
3678 
3679     /**
3680      * Converts all of the characters in this {@code String} to upper
3681      * case using the rules of the given {@code Locale}. Case mapping is based
3682      * on the Unicode Standard version specified by the {@link java.lang.Character Character}
3683      * class. Since case mappings are not always 1:1 char mappings, the resulting {@code String}
3684      * and this {@code String} may differ in length.
3685      * <p>
3686      * Examples of locale-sensitive and 1:M case mappings are in the following table:
3687      * <table class="plain">
3688      * <caption style="display:none">Examples of locale-sensitive and 1:M case mappings. Shows Language code of locale, lower case, upper case, and description.</caption>
3689      * <thead>
3690      * <tr>
3691      *   <th scope="col">Language Code of Locale</th>
3692      *   <th scope="col">Lower Case</th>
3693      *   <th scope="col">Upper Case</th>
3694      *   <th scope="col">Description</th>
3695      * </tr>
3696      * </thead>
3697      * <tbody>
3698      * <tr>
3699      *   <td>tr (Turkish)</td>
3700      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0069</th>
3701      *   <td>&#92;u0130</td>
3702      *   <td>small letter i -&gt; capital letter I with dot above</td>
3703      * </tr>
3704      * <tr>
3705      *   <td>tr (Turkish)</td>
3706      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0131</th>
3707      *   <td>&#92;u0049</td>
3708      *   <td>small letter dotless i -&gt; capital letter I</td>
3709      * </tr>
3710      * <tr>
3711      *   <td>(all)</td>
3712      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u00df</th>
3713      *   <td>&#92;u0053 &#92;u0053</td>
3714      *   <td>small letter sharp s -&gt; two letters: SS</td>
3715      * </tr>
3716      * <tr>
3717      *   <td>(all)</td>
3718      *   <th scope="row" style="font-weight:normal; text-align:left">Fahrvergn&uuml;gen</th>
3719      *   <td>FAHRVERGN&Uuml;GEN</td>
3720      *   <td></td>
3721      * </tr>
3722      * </tbody>
3723      * </table>
3724      * @param locale use the case transformation rules for this locale
3725      * @return the {@code String}, converted to uppercase.
3726      * @see     java.lang.String#toUpperCase()
3727      * @see     java.lang.String#toLowerCase()
3728      * @see     java.lang.String#toLowerCase(Locale)
3729      * @since   1.1
3730      */
3731     public String toUpperCase(Locale locale) {
3732         return isLatin1() ? StringLatin1.toUpperCase(this, value, locale)
3733                           : StringUTF16.toUpperCase(this, value, locale);
3734     }
3735 
3736     /**
3737      * Converts all of the characters in this {@code String} to upper
3738      * case using the rules of the default locale. This method is equivalent to
3739      * {@code toUpperCase(Locale.getDefault())}.
3740      *
3741      * @apiNote This method is locale sensitive, and may produce unexpected
3742      * results if used for strings that are intended to be interpreted locale
3743      * independently.
3744      * Examples are programming language identifiers, protocol keys, and HTML
3745      * tags.
3746      * For instance, {@code "title".toUpperCase()} in a Turkish locale
3747      * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the
3748      * LATIN CAPITAL LETTER I WITH DOT ABOVE character.
3749      * To obtain correct results for locale insensitive strings, use
3750      * {@code toUpperCase(Locale.ROOT)}.
3751      *
3752      * @return  the {@code String}, converted to uppercase.
3753      * @see     java.lang.String#toUpperCase(Locale)
3754      */
3755     public String toUpperCase() {
3756         return toUpperCase(Locale.getDefault());
3757     }
3758 
3759     /**
3760      * Returns a string whose value is this string, with all leading
3761      * and trailing space removed, where space is defined
3762      * as any character whose codepoint is less than or equal to
3763      * {@code 'U+0020'} (the space character).
3764      * <p>
3765      * If this {@code String} object represents an empty character
3766      * sequence, or the first and last characters of character sequence
3767      * represented by this {@code String} object both have codes
3768      * that are not space (as defined above), then a
3769      * reference to this {@code String} object is returned.
3770      * <p>
3771      * Otherwise, if all characters in this string are space (as
3772      * defined above), then a  {@code String} object representing an
3773      * empty string is returned.
3774      * <p>
3775      * Otherwise, let <i>k</i> be the index of the first character in the
3776      * string whose code is not a space (as defined above) and let
3777      * <i>m</i> be the index of the last character in the string whose code
3778      * is not a space (as defined above). A {@code String}
3779      * object is returned, representing the substring of this string that
3780      * begins with the character at index <i>k</i> and ends with the
3781      * character at index <i>m</i>-that is, the result of
3782      * {@code this.substring(k, m + 1)}.
3783      * <p>
3784      * This method may be used to trim space (as defined above) from
3785      * the beginning and end of a string.
3786      *
3787      * @return  a string whose value is this string, with all leading
3788      *          and trailing space removed, or this string if it
3789      *          has no leading or trailing space.
3790      */
3791     public String trim() {
3792         String ret = isLatin1() ? StringLatin1.trim(value)
3793                                 : StringUTF16.trim(value);
3794         return ret == null ? this : ret;
3795     }
3796 
3797     /**
3798      * Returns a string whose value is this string, with all leading
3799      * and trailing {@linkplain Character#isWhitespace(int) white space}
3800      * removed.
3801      * <p>
3802      * If this {@code String} object represents an empty string,
3803      * or if all code points in this string are
3804      * {@linkplain Character#isWhitespace(int) white space}, then an empty string
3805      * is returned.
3806      * <p>
3807      * Otherwise, returns a substring of this string beginning with the first
3808      * code point that is not a {@linkplain Character#isWhitespace(int) white space}
3809      * up to and including the last code point that is not a
3810      * {@linkplain Character#isWhitespace(int) white space}.
3811      * <p>
3812      * This method may be used to strip
3813      * {@linkplain Character#isWhitespace(int) white space} from
3814      * the beginning and end of a string.
3815      *
3816      * @return  a string whose value is this string, with all leading
3817      *          and trailing white space removed
3818      *
3819      * @see Character#isWhitespace(int)
3820      *
3821      * @since 11
3822      */
3823     public String strip() {
3824         String ret = isLatin1() ? StringLatin1.strip(value)
3825                                 : StringUTF16.strip(value);
3826         return ret == null ? this : ret;
3827     }
3828 
3829     /**
3830      * Returns a string whose value is this string, with all leading
3831      * {@linkplain Character#isWhitespace(int) white space} removed.
3832      * <p>
3833      * If this {@code String} object represents an empty string,
3834      * or if all code points in this string are
3835      * {@linkplain Character#isWhitespace(int) white space}, then an empty string
3836      * is returned.
3837      * <p>
3838      * Otherwise, returns a substring of this string beginning with the first
3839      * code point that is not a {@linkplain Character#isWhitespace(int) white space}
3840      * up to and including the last code point of this string.
3841      * <p>
3842      * This method may be used to trim
3843      * {@linkplain Character#isWhitespace(int) white space} from
3844      * the beginning of a string.
3845      *
3846      * @return  a string whose value is this string, with all leading white
3847      *          space removed
3848      *
3849      * @see Character#isWhitespace(int)
3850      *
3851      * @since 11
3852      */
3853     public String stripLeading() {
3854         String ret = isLatin1() ? StringLatin1.stripLeading(value)
3855                                 : StringUTF16.stripLeading(value);
3856         return ret == null ? this : ret;
3857     }
3858 
3859     /**
3860      * Returns a string whose value is this string, with all trailing
3861      * {@linkplain Character#isWhitespace(int) white space} removed.
3862      * <p>
3863      * If this {@code String} object represents an empty string,
3864      * or if all characters in this string are
3865      * {@linkplain Character#isWhitespace(int) white space}, then an empty string
3866      * is returned.
3867      * <p>
3868      * Otherwise, returns a substring of this string beginning with the first
3869      * code point of this string up to and including the last code point
3870      * that is not a {@linkplain Character#isWhitespace(int) white space}.
3871      * <p>
3872      * This method may be used to trim
3873      * {@linkplain Character#isWhitespace(int) white space} from
3874      * the end of a string.
3875      *
3876      * @return  a string whose value is this string, with all trailing white
3877      *          space removed
3878      *
3879      * @see Character#isWhitespace(int)
3880      *
3881      * @since 11
3882      */
3883     public String stripTrailing() {
3884         String ret = isLatin1() ? StringLatin1.stripTrailing(value)
3885                                 : StringUTF16.stripTrailing(value);
3886         return ret == null ? this : ret;
3887     }
3888 
3889     /**
3890      * Returns {@code true} if the string is empty or contains only
3891      * {@linkplain Character#isWhitespace(int) white space} codepoints,
3892      * otherwise {@code false}.
3893      *
3894      * @return {@code true} if the string is empty or contains only
3895      *         {@linkplain Character#isWhitespace(int) white space} codepoints,
3896      *         otherwise {@code false}
3897      *
3898      * @see Character#isWhitespace(int)
3899      *
3900      * @since 11
3901      */
3902     public boolean isBlank() {
3903         return indexOfNonWhitespace() == length();
3904     }
3905 
3906     /**
3907      * Returns a stream of lines extracted from this string,
3908      * separated by line terminators.
3909      * <p>
3910      * A <i>line terminator</i> is one of the following:
3911      * a line feed character {@code "\n"} (U+000A),
3912      * a carriage return character {@code "\r"} (U+000D),
3913      * or a carriage return followed immediately by a line feed
3914      * {@code "\r\n"} (U+000D U+000A).
3915      * <p>
3916      * A <i>line</i> is either a sequence of zero or more characters
3917      * followed by a line terminator, or it is a sequence of one or
3918      * more characters followed by the end of the string. A
3919      * line does not include the line terminator.
3920      * <p>
3921      * The stream returned by this method contains the lines from
3922      * this string in the order in which they occur.
3923      *
3924      * @apiNote This definition of <i>line</i> implies that an empty
3925      *          string has zero lines and that there is no empty line
3926      *          following a line terminator at the end of a string.
3927      *
3928      * @implNote This method provides better performance than
3929      *           split("\R") by supplying elements lazily and
3930      *           by faster search of new line terminators.
3931      *
3932      * @return  the stream of lines extracted from this string
3933      *
3934      * @since 11
3935      */
3936     public Stream<String> lines() {
3937         return isLatin1() ? StringLatin1.lines(value) : StringUTF16.lines(value);
3938     }
3939 
3940     /**
3941      * Adjusts the indentation of each line of this string based on the value of
3942      * {@code n}, and normalizes line termination characters.
3943      * <p>
3944      * This string is conceptually separated into lines using
3945      * {@link String#lines()}. Each line is then adjusted as described below
3946      * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting
3947      * lines are then concatenated and returned.
3948      * <p>
3949      * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the
3950      * beginning of each line.
3951      * <p>
3952      * If {@code n < 0} then up to {@code n}
3953      * {@linkplain Character#isWhitespace(int) white space characters} are removed
3954      * from the beginning of each line. If a given line does not contain
3955      * sufficient white space then all leading
3956      * {@linkplain Character#isWhitespace(int) white space characters} are removed.
3957      * Each white space character is treated as a single character. In
3958      * particular, the tab character {@code "\t"} (U+0009) is considered a
3959      * single character; it is not expanded.
3960      * <p>
3961      * If {@code n == 0} then the line remains unchanged. However, line
3962      * terminators are still normalized.
3963      *
3964      * @param n  number of leading
3965      *           {@linkplain Character#isWhitespace(int) white space characters}
3966      *           to add or remove
3967      *
3968      * @return string with indentation adjusted and line endings normalized
3969      *
3970      * @see String#lines()
3971      * @see String#isBlank()
3972      * @see Character#isWhitespace(int)
3973      *
3974      * @since 12
3975      */
3976     public String indent(int n) {
3977         if (isEmpty()) {
3978             return "";
3979         }
3980         Stream<String> stream = lines();
3981         if (n > 0) {
3982             final String spaces = " ".repeat(n);
3983             stream = stream.map(s -> spaces + s);
3984         } else if (n == Integer.MIN_VALUE) {
3985             stream = stream.map(s -> s.stripLeading());
3986         } else if (n < 0) {
3987             stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace())));
3988         }
3989         return stream.collect(Collectors.joining("\n", "", "\n"));
3990     }
3991 
3992     private int indexOfNonWhitespace() {
3993         return isLatin1() ? StringLatin1.indexOfNonWhitespace(value)
3994                           : StringUTF16.indexOfNonWhitespace(value);
3995     }
3996 
3997     private int lastIndexOfNonWhitespace() {
3998         return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value)
3999                           : StringUTF16.lastIndexOfNonWhitespace(value);
4000     }
4001 
4002     /**
4003      * Returns a string whose value is this string, with incidental
4004      * {@linkplain Character#isWhitespace(int) white space} removed from
4005      * the beginning and end of every line.
4006      * <p>
4007      * Incidental {@linkplain Character#isWhitespace(int) white space}
4008      * is often present in a text block to align the content with the opening
4009      * delimiter. For example, in the following code, dots represent incidental
4010      * {@linkplain Character#isWhitespace(int) white space}:
4011      * <blockquote><pre>
4012      * String html = """
4013      * ..............&lt;html&gt;
4014      * ..............    &lt;body&gt;
4015      * ..............        &lt;p&gt;Hello, world&lt;/p&gt;
4016      * ..............    &lt;/body&gt;
4017      * ..............&lt;/html&gt;
4018      * ..............""";
4019      * </pre></blockquote>
4020      * This method treats the incidental
4021      * {@linkplain Character#isWhitespace(int) white space} as indentation to be
4022      * stripped, producing a string that preserves the relative indentation of
4023      * the content. Using | to visualize the start of each line of the string:
4024      * <blockquote><pre>
4025      * |&lt;html&gt;
4026      * |    &lt;body&gt;
4027      * |        &lt;p&gt;Hello, world&lt;/p&gt;
4028      * |    &lt;/body&gt;
4029      * |&lt;/html&gt;
4030      * </pre></blockquote>
4031      * First, the individual lines of this string are extracted. A <i>line</i>
4032      * is a sequence of zero or more characters followed by either a line
4033      * terminator or the end of the string.
4034      * If the string has at least one line terminator, the last line consists
4035      * of the characters between the last terminator and the end of the string.
4036      * Otherwise, if the string has no terminators, the last line is the start
4037      * of the string to the end of the string, in other words, the entire
4038      * string.
4039      * A line does not include the line terminator.
4040      * <p>
4041      * Then, the <i>minimum indentation</i> (min) is determined as follows:
4042      * <ul>
4043      *   <li><p>For each non-blank line (as defined by {@link String#isBlank()}),
4044      *   the leading {@linkplain Character#isWhitespace(int) white space}
4045      *   characters are counted.</p>
4046      *   </li>
4047      *   <li><p>The leading {@linkplain Character#isWhitespace(int) white space}
4048      *   characters on the last line are also counted even if
4049      *   {@linkplain String#isBlank() blank}.</p>
4050      *   </li>
4051      * </ul>
4052      * <p>The <i>min</i> value is the smallest of these counts.
4053      * <p>
4054      * For each {@linkplain String#isBlank() non-blank} line, <i>min</i> leading
4055      * {@linkplain Character#isWhitespace(int) white space} characters are
4056      * removed, and any trailing {@linkplain Character#isWhitespace(int) white
4057      * space} characters are removed. {@linkplain String#isBlank() Blank} lines
4058      * are replaced with the empty string.
4059      *
4060      * <p>
4061      * Finally, the lines are joined into a new string, using the LF character
4062      * {@code "\n"} (U+000A) to separate lines.
4063      *
4064      * @apiNote
4065      * This method's primary purpose is to shift a block of lines as far as
4066      * possible to the left, while preserving relative indentation. Lines
4067      * that were indented the least will thus have no leading
4068      * {@linkplain Character#isWhitespace(int) white space}.
4069      * The result will have the same number of line terminators as this string.
4070      * If this string ends with a line terminator then the result will end
4071      * with a line terminator.
4072      *
4073      * @implSpec
4074      * This method treats all {@linkplain Character#isWhitespace(int) white space}
4075      * characters as having equal width. As long as the indentation on every
4076      * line is consistently composed of the same character sequences, then the
4077      * result will be as described above.
4078      *
4079      * @return string with incidental indentation removed and line
4080      *         terminators normalized
4081      *
4082      * @see String#lines()
4083      * @see String#isBlank()
4084      * @see String#indent(int)
4085      * @see Character#isWhitespace(int)
4086      *
4087      * @since 15
4088      *
4089      */
4090     public String stripIndent() {
4091         int length = length();
4092         if (length == 0) {
4093             return "";
4094         }
4095         char lastChar = charAt(length - 1);
4096         boolean optOut = lastChar == '\n' || lastChar == '\r';
4097         List<String> lines = lines().toList();
4098         final int outdent = optOut ? 0 : outdent(lines);
4099         return lines.stream()
4100             .map(line -> {
4101                 int firstNonWhitespace = line.indexOfNonWhitespace();
4102                 int lastNonWhitespace = line.lastIndexOfNonWhitespace();
4103                 int incidentalWhitespace = Math.min(outdent, firstNonWhitespace);
4104                 return firstNonWhitespace > lastNonWhitespace
4105                     ? "" : line.substring(incidentalWhitespace, lastNonWhitespace);
4106             })
4107             .collect(Collectors.joining("\n", "", optOut ? "\n" : ""));
4108     }
4109 
4110     private static int outdent(List<String> lines) {
4111         // Note: outdent is guaranteed to be zero or positive number.
4112         // If there isn't a non-blank line then the last must be blank
4113         int outdent = Integer.MAX_VALUE;
4114         for (String line : lines) {
4115             int leadingWhitespace = line.indexOfNonWhitespace();
4116             if (leadingWhitespace != line.length()) {
4117                 outdent = Integer.min(outdent, leadingWhitespace);
4118             }
4119         }
4120         String lastLine = lines.get(lines.size() - 1);
4121         if (lastLine.isBlank()) {
4122             outdent = Integer.min(outdent, lastLine.length());
4123         }
4124         return outdent;
4125     }
4126 
4127     /**
4128      * Returns a string whose value is this string, with escape sequences
4129      * translated as if in a string literal.
4130      * <p>
4131      * Escape sequences are translated as follows;
4132      * <table class="striped">
4133      *   <caption style="display:none">Translation</caption>
4134      *   <thead>
4135      *   <tr>
4136      *     <th scope="col">Escape</th>
4137      *     <th scope="col">Name</th>
4138      *     <th scope="col">Translation</th>
4139      *   </tr>
4140      *   </thead>
4141      *   <tbody>
4142      *   <tr>
4143      *     <th scope="row">{@code \u005Cb}</th>
4144      *     <td>backspace</td>
4145      *     <td>{@code U+0008}</td>
4146      *   </tr>
4147      *   <tr>
4148      *     <th scope="row">{@code \u005Ct}</th>
4149      *     <td>horizontal tab</td>
4150      *     <td>{@code U+0009}</td>
4151      *   </tr>
4152      *   <tr>
4153      *     <th scope="row">{@code \u005Cn}</th>
4154      *     <td>line feed</td>
4155      *     <td>{@code U+000A}</td>
4156      *   </tr>
4157      *   <tr>
4158      *     <th scope="row">{@code \u005Cf}</th>
4159      *     <td>form feed</td>
4160      *     <td>{@code U+000C}</td>
4161      *   </tr>
4162      *   <tr>
4163      *     <th scope="row">{@code \u005Cr}</th>
4164      *     <td>carriage return</td>
4165      *     <td>{@code U+000D}</td>
4166      *   </tr>
4167      *   <tr>
4168      *     <th scope="row">{@code \u005Cs}</th>
4169      *     <td>space</td>
4170      *     <td>{@code U+0020}</td>
4171      *   </tr>
4172      *   <tr>
4173      *     <th scope="row">{@code \u005C"}</th>
4174      *     <td>double quote</td>
4175      *     <td>{@code U+0022}</td>
4176      *   </tr>
4177      *   <tr>
4178      *     <th scope="row">{@code \u005C'}</th>
4179      *     <td>single quote</td>
4180      *     <td>{@code U+0027}</td>
4181      *   </tr>
4182      *   <tr>
4183      *     <th scope="row">{@code \u005C\u005C}</th>
4184      *     <td>backslash</td>
4185      *     <td>{@code U+005C}</td>
4186      *   </tr>
4187      *   <tr>
4188      *     <th scope="row">{@code \u005C0 - \u005C377}</th>
4189      *     <td>octal escape</td>
4190      *     <td>code point equivalents</td>
4191      *   </tr>
4192      *   <tr>
4193      *     <th scope="row">{@code \u005C<line-terminator>}</th>
4194      *     <td>continuation</td>
4195      *     <td>discard</td>
4196      *   </tr>
4197      *   </tbody>
4198      * </table>
4199      *
4200      * @implNote
4201      * This method does <em>not</em> translate Unicode escapes such as "{@code \u005cu2022}".
4202      * Unicode escapes are translated by the Java compiler when reading input characters and
4203      * are not part of the string literal specification.
4204      *
4205      * @throws IllegalArgumentException when an escape sequence is malformed.
4206      *
4207      * @return String with escape sequences translated.
4208      *
4209      * @jls 3.10.7 Escape Sequences
4210      *
4211      * @since 15
4212      */
4213     public String translateEscapes() {
4214         if (isEmpty()) {
4215             return "";
4216         }
4217         char[] chars = toCharArray();
4218         int length = chars.length;
4219         int from = 0;
4220         int to = 0;
4221         while (from < length) {
4222             char ch = chars[from++];
4223             if (ch == '\\') {
4224                 ch = from < length ? chars[from++] : '\0';
4225                 switch (ch) {
4226                 case 'b':
4227                     ch = '\b';
4228                     break;
4229                 case 'f':
4230                     ch = '\f';
4231                     break;
4232                 case 'n':
4233                     ch = '\n';
4234                     break;
4235                 case 'r':
4236                     ch = '\r';
4237                     break;
4238                 case 's':
4239                     ch = ' ';
4240                     break;
4241                 case 't':
4242                     ch = '\t';
4243                     break;
4244                 case '\'':
4245                 case '\"':
4246                 case '\\':
4247                     // as is
4248                     break;
4249                 case '0': case '1': case '2': case '3':
4250                 case '4': case '5': case '6': case '7':
4251                     int limit = Integer.min(from + (ch <= '3' ? 2 : 1), length);
4252                     int code = ch - '0';
4253                     while (from < limit) {
4254                         ch = chars[from];
4255                         if (ch < '0' || '7' < ch) {
4256                             break;
4257                         }
4258                         from++;
4259                         code = (code << 3) | (ch - '0');
4260                     }
4261                     ch = (char)code;
4262                     break;
4263                 case '\n':
4264                     continue;
4265                 case '\r':
4266                     if (from < length && chars[from] == '\n') {
4267                         from++;
4268                     }
4269                     continue;
4270                 default: {
4271                     String msg = String.format(
4272                         "Invalid escape sequence: \\%c \\\\u%04X",
4273                         ch, (int)ch);
4274                     throw new IllegalArgumentException(msg);
4275                 }
4276                 }
4277             }
4278 
4279             chars[to++] = ch;
4280         }
4281 
4282         return new String(chars, 0, to);
4283     }
4284 
4285     /**
4286      * This method allows the application of a function to {@code this}
4287      * string. The function should expect a single String argument
4288      * and produce an {@code R} result.
4289      * <p>
4290      * Any exception thrown by {@code f.apply()} will be propagated to the
4291      * caller.
4292      *
4293      * @param f    a function to apply
4294      *
4295      * @param <R>  the type of the result
4296      *
4297      * @return     the result of applying the function to this string
4298      *
4299      * @see java.util.function.Function
4300      *
4301      * @since 12
4302      */
4303     public <R> R transform(Function<? super String, ? extends R> f) {
4304         return f.apply(this);
4305     }
4306 
4307     /**
4308      * This object (which is already a string!) is itself returned.
4309      *
4310      * @return  the string itself.
4311      */
4312     public String toString() {
4313         return this;
4314     }
4315 
4316     /**
4317      * Returns a stream of {@code int} zero-extending the {@code char} values
4318      * from this sequence.  Any char which maps to a {@linkplain
4319      * Character##unicode surrogate code point} is passed through
4320      * uninterpreted.
4321      *
4322      * @return an IntStream of char values from this sequence
4323      * @since 9
4324      */
4325     @Override
4326     public IntStream chars() {
4327         return StreamSupport.intStream(
4328             isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE)
4329                        : new StringUTF16.CharsSpliterator(value, Spliterator.IMMUTABLE),
4330             false);
4331     }
4332 
4333 
4334     /**
4335      * Returns a stream of code point values from this sequence.  Any surrogate
4336      * pairs encountered in the sequence are combined as if by {@linkplain
4337      * Character#toCodePoint Character.toCodePoint} and the result is passed
4338      * to the stream. Any other code units, including ordinary BMP characters,
4339      * unpaired surrogates, and undefined code units, are zero-extended to
4340      * {@code int} values which are then passed to the stream.
4341      *
4342      * @return an IntStream of Unicode code points from this sequence
4343      * @since 9
4344      */
4345     @Override
4346     public IntStream codePoints() {
4347         return StreamSupport.intStream(
4348             isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE)
4349                        : new StringUTF16.CodePointsSpliterator(value, Spliterator.IMMUTABLE),
4350             false);
4351     }
4352 
4353     /**
4354      * Converts this string to a new character array.
4355      *
4356      * @return  a newly allocated character array whose length is the length
4357      *          of this string and whose contents are initialized to contain
4358      *          the character sequence represented by this string.
4359      */
4360     public char[] toCharArray() {
4361         return isLatin1() ? StringLatin1.toChars(value)
4362                           : StringUTF16.toChars(value);
4363     }
4364 
4365     /**
4366      * Returns a formatted string using the specified format string and
4367      * arguments.
4368      *
4369      * <p> The locale always used is the one returned by {@link
4370      * java.util.Locale#getDefault(java.util.Locale.Category)
4371      * Locale.getDefault(Locale.Category)} with
4372      * {@link java.util.Locale.Category#FORMAT FORMAT} category specified.
4373      *
4374      * @param  format
4375      *         A <a href="../util/Formatter.html#syntax">format string</a>
4376      *
4377      * @param  args
4378      *         Arguments referenced by the format specifiers in the format
4379      *         string.  If there are more arguments than format specifiers, the
4380      *         extra arguments are ignored.  The number of arguments is
4381      *         variable and may be zero.  The maximum number of arguments is
4382      *         limited by the maximum dimension of a Java array as defined by
4383      *         <cite>The Java Virtual Machine Specification</cite>.
4384      *         The behaviour on a
4385      *         {@code null} argument depends on the <a
4386      *         href="../util/Formatter.html#syntax">conversion</a>.
4387      *
4388      * @throws  java.util.IllegalFormatException
4389      *          If a format string contains an illegal syntax, a format
4390      *          specifier that is incompatible with the given arguments,
4391      *          insufficient arguments given the format string, or other
4392      *          illegal conditions.  For specification of all possible
4393      *          formatting errors, see the <a
4394      *          href="../util/Formatter.html#detail">Details</a> section of the
4395      *          formatter class specification.
4396      *
4397      * @return  A formatted string
4398      *
4399      * @see  java.util.Formatter
4400      * @since  1.5
4401      */
4402     public static String format(String format, Object... args) {
4403         return new Formatter().format(format, args).toString();
4404     }
4405 
4406     /**
4407      * Returns a formatted string using the specified locale, format string,
4408      * and arguments.
4409      *
4410      * @param  l
4411      *         The {@linkplain java.util.Locale locale} to apply during
4412      *         formatting.  If {@code l} is {@code null} then no localization
4413      *         is applied.
4414      *
4415      * @param  format
4416      *         A <a href="../util/Formatter.html#syntax">format string</a>
4417      *
4418      * @param  args
4419      *         Arguments referenced by the format specifiers in the format
4420      *         string.  If there are more arguments than format specifiers, the
4421      *         extra arguments are ignored.  The number of arguments is
4422      *         variable and may be zero.  The maximum number of arguments is
4423      *         limited by the maximum dimension of a Java array as defined by
4424      *         <cite>The Java Virtual Machine Specification</cite>.
4425      *         The behaviour on a
4426      *         {@code null} argument depends on the
4427      *         <a href="../util/Formatter.html#syntax">conversion</a>.
4428      *
4429      * @throws  java.util.IllegalFormatException
4430      *          If a format string contains an illegal syntax, a format
4431      *          specifier that is incompatible with the given arguments,
4432      *          insufficient arguments given the format string, or other
4433      *          illegal conditions.  For specification of all possible
4434      *          formatting errors, see the <a
4435      *          href="../util/Formatter.html#detail">Details</a> section of the
4436      *          formatter class specification
4437      *
4438      * @return  A formatted string
4439      *
4440      * @see  java.util.Formatter
4441      * @since  1.5
4442      */
4443     public static String format(Locale l, String format, Object... args) {
4444         return new Formatter(l).format(format, args).toString();
4445     }
4446 
4447     /**
4448      * Formats using this string as the format string, and the supplied
4449      * arguments.
4450      *
4451      * @implSpec This method is equivalent to {@code String.format(this, args)}.
4452      *
4453      * @param  args
4454      *         Arguments referenced by the format specifiers in this string.
4455      *
4456      * @return  A formatted string
4457      *
4458      * @see  java.lang.String#format(String,Object...)
4459      * @see  java.util.Formatter
4460      *
4461      * @since 15
4462      *
4463      */
4464     public String formatted(Object... args) {
4465         return new Formatter().format(this, args).toString();
4466     }
4467 
4468     /**
4469      * Returns the string representation of the {@code Object} argument.
4470      *
4471      * @param   obj   an {@code Object}.
4472      * @return  if the argument is {@code null}, then a string equal to
4473      *          {@code "null"}; otherwise, the value of
4474      *          {@code obj.toString()} is returned.
4475      * @see     java.lang.Object#toString()
4476      */
4477     public static String valueOf(Object obj) {
4478         return (obj == null) ? "null" : obj.toString();
4479     }
4480 
4481     /**
4482      * Returns the string representation of the {@code char} array
4483      * argument. The contents of the character array are copied; subsequent
4484      * modification of the character array does not affect the returned
4485      * string.
4486      *
4487      * @param   data     the character array.
4488      * @return  a {@code String} that contains the characters of the
4489      *          character array.
4490      */
4491     public static String valueOf(char[] data) {
4492         return new String(data);
4493     }
4494 
4495     /**
4496      * Returns the string representation of a specific subarray of the
4497      * {@code char} array argument.
4498      * <p>
4499      * The {@code offset} argument is the index of the first
4500      * character of the subarray. The {@code count} argument
4501      * specifies the length of the subarray. The contents of the subarray
4502      * are copied; subsequent modification of the character array does not
4503      * affect the returned string.
4504      *
4505      * @param   data     the character array.
4506      * @param   offset   initial offset of the subarray.
4507      * @param   count    length of the subarray.
4508      * @return  a {@code String} that contains the characters of the
4509      *          specified subarray of the character array.
4510      * @throws    IndexOutOfBoundsException if {@code offset} is
4511      *          negative, or {@code count} is negative, or
4512      *          {@code offset+count} is larger than
4513      *          {@code data.length}.
4514      */
4515     public static String valueOf(char[] data, int offset, int count) {
4516         return new String(data, offset, count);
4517     }
4518 
4519     /**
4520      * Equivalent to {@link #valueOf(char[], int, int)}.
4521      *
4522      * @param   data     the character array.
4523      * @param   offset   initial offset of the subarray.
4524      * @param   count    length of the subarray.
4525      * @return  a {@code String} that contains the characters of the
4526      *          specified subarray of the character array.
4527      * @throws    IndexOutOfBoundsException if {@code offset} is
4528      *          negative, or {@code count} is negative, or
4529      *          {@code offset+count} is larger than
4530      *          {@code data.length}.
4531      */
4532     public static String copyValueOf(char[] data, int offset, int count) {
4533         return new String(data, offset, count);
4534     }
4535 
4536     /**
4537      * Equivalent to {@link #valueOf(char[])}.
4538      *
4539      * @param   data   the character array.
4540      * @return  a {@code String} that contains the characters of the
4541      *          character array.
4542      */
4543     public static String copyValueOf(char[] data) {
4544         return new String(data);
4545     }
4546 
4547     /**
4548      * Returns the string representation of the {@code boolean} argument.
4549      *
4550      * @param   b   a {@code boolean}.
4551      * @return  if the argument is {@code true}, a string equal to
4552      *          {@code "true"} is returned; otherwise, a string equal to
4553      *          {@code "false"} is returned.
4554      */
4555     public static String valueOf(boolean b) {
4556         return b ? "true" : "false";
4557     }
4558 
4559     /**
4560      * Returns the string representation of the {@code char}
4561      * argument.
4562      *
4563      * @param   c   a {@code char}.
4564      * @return  a string of length {@code 1} containing
4565      *          as its single character the argument {@code c}.
4566      */
4567     public static String valueOf(char c) {
4568         if (COMPACT_STRINGS && StringLatin1.canEncode(c)) {
4569             return new String(StringLatin1.toBytes(c), LATIN1);
4570         }
4571         return new String(StringUTF16.toBytes(c), UTF16);
4572     }
4573 
4574     /**
4575      * Returns the string representation of the {@code int} argument.
4576      * <p>
4577      * The representation is exactly the one returned by the
4578      * {@code Integer.toString} method of one argument.
4579      *
4580      * @param   i   an {@code int}.
4581      * @return  a string representation of the {@code int} argument.
4582      * @see     java.lang.Integer#toString(int, int)
4583      */
4584     public static String valueOf(int i) {
4585         return Integer.toString(i);
4586     }
4587 
4588     /**
4589      * Returns the string representation of the {@code long} argument.
4590      * <p>
4591      * The representation is exactly the one returned by the
4592      * {@code Long.toString} method of one argument.
4593      *
4594      * @param   l   a {@code long}.
4595      * @return  a string representation of the {@code long} argument.
4596      * @see     java.lang.Long#toString(long)
4597      */
4598     public static String valueOf(long l) {
4599         return Long.toString(l);
4600     }
4601 
4602     /**
4603      * Returns the string representation of the {@code float} argument.
4604      * <p>
4605      * The representation is exactly the one returned by the
4606      * {@code Float.toString} method of one argument.
4607      *
4608      * @param   f   a {@code float}.
4609      * @return  a string representation of the {@code float} argument.
4610      * @see     java.lang.Float#toString(float)
4611      */
4612     public static String valueOf(float f) {
4613         return Float.toString(f);
4614     }
4615 
4616     /**
4617      * Returns the string representation of the {@code double} argument.
4618      * <p>
4619      * The representation is exactly the one returned by the
4620      * {@code Double.toString} method of one argument.
4621      *
4622      * @param   d   a {@code double}.
4623      * @return  a  string representation of the {@code double} argument.
4624      * @see     java.lang.Double#toString(double)
4625      */
4626     public static String valueOf(double d) {
4627         return Double.toString(d);
4628     }
4629 
4630     /**
4631      * Returns a canonical representation for the string object.
4632      * <p>
4633      * A pool of strings, initially empty, is maintained privately by the
4634      * class {@code String}.
4635      * <p>
4636      * When the intern method is invoked, if the pool already contains a
4637      * string equal to this {@code String} object as determined by
4638      * the {@link #equals(Object)} method, then the string from the pool is
4639      * returned. Otherwise, this {@code String} object is added to the
4640      * pool and a reference to this {@code String} object is returned.
4641      * <p>
4642      * It follows that for any two strings {@code s} and {@code t},
4643      * {@code s.intern() == t.intern()} is {@code true}
4644      * if and only if {@code s.equals(t)} is {@code true}.
4645      * <p>
4646      * All literal strings and string-valued constant expressions are
4647      * interned. String literals are defined in section {@jls 3.10.5} of the
4648      * <cite>The Java Language Specification</cite>.
4649      *
4650      * @return  a string that has the same contents as this string, but is
4651      *          guaranteed to be from a pool of unique strings.
4652      */
4653     public native String intern();
4654 
4655     /**
4656      * Returns a string whose value is the concatenation of this
4657      * string repeated {@code count} times.
4658      * <p>
4659      * If this string is empty or count is zero then the empty
4660      * string is returned.
4661      *
4662      * @param   count number of times to repeat
4663      *
4664      * @return  A string composed of this string repeated
4665      *          {@code count} times or the empty string if this
4666      *          string is empty or count is zero
4667      *
4668      * @throws  IllegalArgumentException if the {@code count} is
4669      *          negative.
4670      *
4671      * @since 11
4672      */
4673     public String repeat(int count) {
4674         if (count < 0) {
4675             throw new IllegalArgumentException("count is negative: " + count);
4676         }
4677         if (count == 1) {
4678             return this;
4679         }
4680         final int len = value.length;
4681         if (len == 0 || count == 0) {
4682             return "";
4683         }
4684         if (Integer.MAX_VALUE / count < len) {
4685             throw new OutOfMemoryError("Required length exceeds implementation limit");
4686         }
4687         if (len == 1) {
4688             final byte[] single = new byte[count];
4689             Arrays.fill(single, value[0]);
4690             return new String(single, coder);
4691         }
4692         final int limit = len * count;
4693         final byte[] multiple = new byte[limit];
4694         System.arraycopy(value, 0, multiple, 0, len);
4695         repeatCopyRest(multiple, 0, limit, len);
4696         return new String(multiple, coder);
4697     }
4698 
4699     /**
4700      * Used to perform copying after the initial insertion. Copying is optimized
4701      * by using power of two duplication. First pass duplicates original copy,
4702      * second pass then duplicates the original and the copy yielding four copies,
4703      * third pass duplicates four copies yielding eight copies, and so on.
4704      * Finally, the remainder is filled in with prior copies.
4705      *
4706      * @implNote The technique used here is significantly faster than hand-rolled
4707      * loops or special casing small numbers due to the intensive optimization
4708      * done by intrinsic {@code System.arraycopy}.
4709      *
4710      * @param buffer    destination buffer
4711      * @param offset    offset in the destination buffer
4712      * @param limit     total replicated including what is already in the buffer
4713      * @param copied    number of bytes that have already in the buffer
4714      */
4715     static void repeatCopyRest(byte[] buffer, int offset, int limit, int copied) {
4716         // Initial copy is in the buffer.
4717         for (; copied < limit - copied; copied <<= 1) {
4718             // Power of two duplicate.
4719             System.arraycopy(buffer, offset, buffer, offset + copied, copied);
4720         }
4721         // Duplicate remainder.
4722         System.arraycopy(buffer, offset, buffer, offset + copied, limit - copied);
4723     }
4724 
4725     ////////////////////////////////////////////////////////////////
4726 
4727     /**
4728      * Copy character bytes from this string into dst starting at dstBegin.
4729      * This method doesn't perform any range checking.
4730      *
4731      * Invoker guarantees: dst is in UTF16 (inflate itself for asb), if two
4732      * coders are different, and dst is big enough (range check)
4733      *
4734      * @param dstBegin  the char index, not offset of byte[]
4735      * @param coder     the coder of dst[]
4736      */
4737     void getBytes(byte[] dst, int dstBegin, byte coder) {
4738         if (coder() == coder) {
4739             System.arraycopy(value, 0, dst, dstBegin << coder, value.length);
4740         } else {    // this.coder == LATIN && coder == UTF16
4741             StringLatin1.inflate(value, 0, dst, dstBegin, value.length);
4742         }
4743     }
4744 
4745     /**
4746      * Copy character bytes from this string into dst starting at dstBegin.
4747      * This method doesn't perform any range checking.
4748      *
4749      * Invoker guarantees: dst is in UTF16 (inflate itself for asb), if two
4750      * coders are different, and dst is big enough (range check)
4751      *
4752      * @param srcPos    the char index, not offset of byte[]
4753      * @param dstBegin  the char index to start from
4754      * @param coder     the coder of dst[]
4755      * @param length    the amount of copied chars
4756      */
4757     void getBytes(byte[] dst, int srcPos, int dstBegin, byte coder, int length) {
4758         if (coder() == coder) {
4759             System.arraycopy(value, srcPos << coder, dst, dstBegin << coder, length << coder);
4760         } else {    // this.coder == LATIN && coder == UTF16
4761             StringLatin1.inflate(value, srcPos, dst, dstBegin, length);
4762         }
4763     }
4764 
4765     /*
4766      * Package private constructor. Trailing Void argument is there for
4767      * disambiguating it against other (public) constructors.
4768      *
4769      * Stores the char[] value into a byte[] that each byte represents
4770      * the8 low-order bits of the corresponding character, if the char[]
4771      * contains only latin1 character. Or a byte[] that stores all
4772      * characters in their byte sequences defined by the {@code StringUTF16}.
4773      */
4774     String(char[] value, int off, int len, Void sig) {
4775         if (len == 0) {
4776             this.value = "".value;
4777             this.coder = "".coder;
4778             return;
4779         }
4780         if (COMPACT_STRINGS) {
4781             byte[] val = StringUTF16.compress(value, off, len);
4782             if (val != null) {
4783                 this.value = val;
4784                 this.coder = LATIN1;
4785                 return;
4786             }
4787         }
4788         this.coder = UTF16;
4789         this.value = StringUTF16.toBytes(value, off, len);
4790     }
4791 
4792     /*
4793      * Package private constructor. Trailing Void argument is there for
4794      * disambiguating it against other (public) constructors.
4795      */
4796     String(AbstractStringBuilder asb, Void sig) {
4797         byte[] val = asb.getValue();
4798         int length = asb.length();
4799         if (asb.isLatin1()) {
4800             this.coder = LATIN1;
4801             this.value = Arrays.copyOfRange(val, 0, length);
4802         } else {
4803             // only try to compress val if some characters were deleted.
4804             if (COMPACT_STRINGS && asb.maybeLatin1) {
4805                 byte[] buf = StringUTF16.compress(val, 0, length);
4806                 if (buf != null) {
4807                     this.coder = LATIN1;
4808                     this.value = buf;
4809                     return;
4810                 }
4811             }
4812             this.coder = UTF16;
4813             this.value = Arrays.copyOfRange(val, 0, length << 1);
4814         }
4815     }
4816 
4817    /*
4818     * Package private constructor which shares value array for speed.
4819     */
4820     String(byte[] value, byte coder) {
4821         this.value = value;
4822         this.coder = coder;
4823     }
4824 
4825     byte coder() {
4826         return COMPACT_STRINGS ? coder : UTF16;
4827     }
4828 
4829     byte[] value() {
4830         return value;
4831     }
4832 
4833     boolean isLatin1() {
4834         return COMPACT_STRINGS && coder == LATIN1;
4835     }
4836 
4837     @Native static final byte LATIN1 = 0;
4838     @Native static final byte UTF16  = 1;
4839 
4840     /*
4841      * StringIndexOutOfBoundsException  if {@code index} is
4842      * negative or greater than or equal to {@code length}.
4843      */
4844     static void checkIndex(int index, int length) {
4845         Preconditions.checkIndex(index, length, Preconditions.SIOOBE_FORMATTER);
4846     }
4847 
4848     /*
4849      * StringIndexOutOfBoundsException  if {@code offset}
4850      * is negative or greater than {@code length}.
4851      */
4852     static void checkOffset(int offset, int length) {
4853         Preconditions.checkFromToIndex(offset, length, length, Preconditions.SIOOBE_FORMATTER);
4854     }
4855 
4856     /*
4857      * Check {@code offset}, {@code count} against {@code 0} and {@code length}
4858      * bounds.
4859      *
4860      * @return  {@code offset} if the sub-range within bounds of the range
4861      * @throws  StringIndexOutOfBoundsException
4862      *          If {@code offset} is negative, {@code count} is negative,
4863      *          or {@code offset} is greater than {@code length - count}
4864      */
4865     static int checkBoundsOffCount(int offset, int count, int length) {
4866         return Preconditions.checkFromIndexSize(offset, count, length, Preconditions.SIOOBE_FORMATTER);
4867     }
4868 
4869     /*
4870      * Check {@code begin}, {@code end} against {@code 0} and {@code length}
4871      * bounds.
4872      *
4873      * @throws  StringIndexOutOfBoundsException
4874      *          If {@code begin} is negative, {@code begin} is greater than
4875      *          {@code end}, or {@code end} is greater than {@code length}.
4876      */
4877     static void checkBoundsBeginEnd(int begin, int end, int length) {
4878         Preconditions.checkFromToIndex(begin, end, length, Preconditions.SIOOBE_FORMATTER);
4879     }
4880 
4881     /**
4882      * Returns the string representation of the {@code codePoint}
4883      * argument.
4884      *
4885      * @param   codePoint a {@code codePoint}.
4886      * @return  a string of length {@code 1} or {@code 2} containing
4887      *          as its single character the argument {@code codePoint}.
4888      * @throws IllegalArgumentException if the specified
4889      *          {@code codePoint} is not a {@linkplain Character#isValidCodePoint
4890      *          valid Unicode code point}.
4891      */
4892     static String valueOfCodePoint(int codePoint) {
4893         if (COMPACT_STRINGS && StringLatin1.canEncode(codePoint)) {
4894             return new String(StringLatin1.toBytes((char)codePoint), LATIN1);
4895         } else if (Character.isBmpCodePoint(codePoint)) {
4896             return new String(StringUTF16.toBytes((char)codePoint), UTF16);
4897         } else if (Character.isSupplementaryCodePoint(codePoint)) {
4898             return new String(StringUTF16.toBytesSupplementary(codePoint), UTF16);
4899         }
4900 
4901         throw new IllegalArgumentException(
4902             format("Not a valid Unicode code point: 0x%X", codePoint));
4903     }
4904 
4905     /**
4906      * Returns an {@link Optional} containing the nominal descriptor for this
4907      * instance, which is the instance itself.
4908      *
4909      * @return an {@link Optional} describing the {@linkplain String} instance
4910      * @since 12
4911      */
4912     @Override
4913     public Optional<String> describeConstable() {
4914         return Optional.of(this);
4915     }
4916 
4917     /**
4918      * Resolves this instance as a {@link ConstantDesc}, the result of which is
4919      * the instance itself.
4920      *
4921      * @param lookup ignored
4922      * @return the {@linkplain String} instance
4923      * @since 12
4924      */
4925     @Override
4926     public String resolveConstantDesc(MethodHandles.Lookup lookup) {
4927         return this;
4928     }
4929 
4930 }