1 /*
   2  * Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.ObjectStreamField;
  29 import java.io.UnsupportedEncodingException;
  30 import java.lang.annotation.Native;
  31 import java.lang.invoke.MethodHandles;
  32 import java.lang.constant.Constable;
  33 import java.lang.constant.ConstantDesc;
  34 import java.nio.ByteBuffer;
  35 import java.nio.CharBuffer;
  36 import java.nio.charset.*;
  37 import java.util.ArrayList;
  38 import java.util.Arrays;
  39 import java.util.Comparator;
  40 import java.util.Formatter;
  41 import java.util.List;
  42 import java.util.Locale;
  43 import java.util.Objects;
  44 import java.util.Optional;
  45 import java.util.Spliterator;
  46 import java.util.function.Function;
  47 import java.util.regex.Pattern;
  48 import java.util.regex.PatternSyntaxException;
  49 import java.util.stream.Collectors;
  50 import java.util.stream.IntStream;
  51 import java.util.stream.Stream;
  52 import java.util.stream.StreamSupport;
  53 
  54 import jdk.internal.util.ArraysSupport;
  55 import jdk.internal.util.Preconditions;
  56 import jdk.internal.vm.annotation.ForceInline;
  57 import jdk.internal.vm.annotation.IntrinsicCandidate;
  58 import jdk.internal.vm.annotation.Stable;
  59 import sun.nio.cs.ArrayDecoder;
  60 import sun.nio.cs.ArrayEncoder;
  61 
  62 import sun.nio.cs.ISO_8859_1;
  63 import sun.nio.cs.US_ASCII;
  64 import sun.nio.cs.UTF_8;
  65 
  66 /**
  67  * The {@code String} class represents character strings. All
  68  * string literals in Java programs, such as {@code "abc"}, are
  69  * implemented as instances of this class.
  70  * <p>
  71  * Strings are constant; their values cannot be changed after they
  72  * are created. String buffers support mutable strings.
  73  * Because String objects are immutable they can be shared. For example:
  74  * <blockquote><pre>
  75  *     String str = "abc";
  76  * </pre></blockquote><p>
  77  * is equivalent to:
  78  * <blockquote><pre>
  79  *     char data[] = {'a', 'b', 'c'};
  80  *     String str = new String(data);
  81  * </pre></blockquote><p>
  82  * Here are some more examples of how strings can be used:
  83  * <blockquote><pre>
  84  *     System.out.println("abc");
  85  *     String cde = "cde";
  86  *     System.out.println("abc" + cde);
  87  *     String c = "abc".substring(2, 3);
  88  *     String d = cde.substring(1, 2);
  89  * </pre></blockquote>
  90  * <p>
  91  * The class {@code String} includes methods for examining
  92  * individual characters of the sequence, for comparing strings, for
  93  * searching strings, for extracting substrings, and for creating a
  94  * copy of a string with all characters translated to uppercase or to
  95  * lowercase. Case mapping is based on the Unicode Standard version
  96  * specified by the {@link java.lang.Character Character} class.
  97  * <p>
  98  * The Java language provides special support for the string
  99  * concatenation operator (&nbsp;+&nbsp;), and for conversion of
 100  * other objects to strings. For additional information on string
 101  * concatenation and conversion, see <i>The Java Language Specification</i>.
 102  *
 103  * <p> Unless otherwise noted, passing a {@code null} argument to a constructor
 104  * or method in this class will cause a {@link NullPointerException} to be
 105  * thrown.
 106  *
 107  * <p>A {@code String} represents a string in the UTF-16 format
 108  * in which <em>supplementary characters</em> are represented by <em>surrogate
 109  * pairs</em> (see the section <a href="Character.html#unicode">Unicode
 110  * Character Representations</a> in the {@code Character} class for
 111  * more information).
 112  * Index values refer to {@code char} code units, so a supplementary
 113  * character uses two positions in a {@code String}.
 114  * <p>The {@code String} class provides methods for dealing with
 115  * Unicode code points (i.e., characters), in addition to those for
 116  * dealing with Unicode code units (i.e., {@code char} values).
 117  *
 118  * <p>Unless otherwise noted, methods for comparing Strings do not take locale
 119  * into account.  The {@link java.text.Collator} class provides methods for
 120  * finer-grain, locale-sensitive String comparison.
 121  *
 122  * @implNote The implementation of the string concatenation operator is left to
 123  * the discretion of a Java compiler, as long as the compiler ultimately conforms
 124  * to <i>The Java Language Specification</i>. For example, the {@code javac} compiler
 125  * may implement the operator with {@code StringBuffer}, {@code StringBuilder},
 126  * or {@code java.lang.invoke.StringConcatFactory} depending on the JDK version. The
 127  * implementation of string conversion is typically through the method {@code toString},
 128  * defined by {@code Object} and inherited by all classes in Java.
 129  *
 130  * @author  Lee Boynton
 131  * @author  Arthur van Hoff
 132  * @author  Martin Buchholz
 133  * @author  Ulf Zibis
 134  * @see     java.lang.Object#toString()
 135  * @see     java.lang.StringBuffer
 136  * @see     java.lang.StringBuilder
 137  * @see     java.nio.charset.Charset
 138  * @since   1.0
 139  * @jls     15.18.1 String Concatenation Operator +
 140  */
 141 
 142 public final class String
 143     implements java.io.Serializable, Comparable<String>, CharSequence,
 144                Constable, ConstantDesc {
 145 
 146     /**
 147      * The value is used for character storage.
 148      *
 149      * @implNote This field is trusted by the VM, and is a subject to
 150      * constant folding if String instance is constant. Overwriting this
 151      * field after construction will cause problems.
 152      *
 153      * Additionally, it is marked with {@link Stable} to trust the contents
 154      * of the array. No other facility in JDK provides this functionality (yet).
 155      * {@link Stable} is safe here, because value is never null.
 156      */
 157     @Stable
 158     private final byte[] value;
 159 
 160     /**
 161      * The identifier of the encoding used to encode the bytes in
 162      * {@code value}. The supported values in this implementation are
 163      *
 164      * LATIN1
 165      * UTF16
 166      *
 167      * @implNote This field is trusted by the VM, and is a subject to
 168      * constant folding if String instance is constant. Overwriting this
 169      * field after construction will cause problems.
 170      */
 171     private final byte coder;
 172 
 173     /** Cache the hash code for the string */
 174     private int hash; // Default to 0
 175 
 176     /**
 177      * Cache if the hash has been calculated as actually being zero, enabling
 178      * us to avoid recalculating this.
 179      */
 180     private boolean hashIsZero; // Default to false;
 181 
 182     /** use serialVersionUID from JDK 1.0.2 for interoperability */
 183     @java.io.Serial
 184     private static final long serialVersionUID = -6849794470754667710L;
 185 
 186     /**
 187      * If String compaction is disabled, the bytes in {@code value} are
 188      * always encoded in UTF16.
 189      *
 190      * For methods with several possible implementation paths, when String
 191      * compaction is disabled, only one code path is taken.
 192      *
 193      * The instance field value is generally opaque to optimizing JIT
 194      * compilers. Therefore, in performance-sensitive place, an explicit
 195      * check of the static boolean {@code COMPACT_STRINGS} is done first
 196      * before checking the {@code coder} field since the static boolean
 197      * {@code COMPACT_STRINGS} would be constant folded away by an
 198      * optimizing JIT compiler. The idioms for these cases are as follows.
 199      *
 200      * For code such as:
 201      *
 202      *    if (coder == LATIN1) { ... }
 203      *
 204      * can be written more optimally as
 205      *
 206      *    if (coder() == LATIN1) { ... }
 207      *
 208      * or:
 209      *
 210      *    if (COMPACT_STRINGS && coder == LATIN1) { ... }
 211      *
 212      * An optimizing JIT compiler can fold the above conditional as:
 213      *
 214      *    COMPACT_STRINGS == true  => if (coder == LATIN1) { ... }
 215      *    COMPACT_STRINGS == false => if (false)           { ... }
 216      *
 217      * @implNote
 218      * The actual value for this field is injected by JVM. The static
 219      * initialization block is used to set the value here to communicate
 220      * that this static final field is not statically foldable, and to
 221      * avoid any possible circular dependency during vm initialization.
 222      */
 223     static final boolean COMPACT_STRINGS;
 224 
 225     static {
 226         COMPACT_STRINGS = true;
 227     }
 228 
 229     /**
 230      * Class String is special cased within the Serialization Stream Protocol.
 231      *
 232      * A String instance is written into an ObjectOutputStream according to
 233      * <a href="{@docRoot}/../specs/serialization/protocol.html#stream-elements">
 234      * <cite>Java Object Serialization Specification</cite>, Section 6.2, "Stream Elements"</a>
 235      */
 236     @java.io.Serial
 237     private static final ObjectStreamField[] serialPersistentFields =
 238         new ObjectStreamField[0];
 239 
 240     /**
 241      * Initializes a newly created {@code String} object so that it represents
 242      * an empty character sequence.  Note that use of this constructor is
 243      * unnecessary since Strings are immutable.
 244      */
 245     public String() {
 246         this.value = "".value;
 247         this.coder = "".coder;
 248     }
 249 
 250     /**
 251      * Initializes a newly created {@code String} object so that it represents
 252      * the same sequence of characters as the argument; in other words, the
 253      * newly created string is a copy of the argument string. Unless an
 254      * explicit copy of {@code original} is needed, use of this constructor is
 255      * unnecessary since Strings are immutable.
 256      *
 257      * @param  original
 258      *         A {@code String}
 259      */
 260     @IntrinsicCandidate
 261     public String(String original) {
 262         this.value = original.value;
 263         this.coder = original.coder;
 264         this.hash = original.hash;
 265         this.hashIsZero = original.hashIsZero;
 266     }
 267 
 268     /**
 269      * Allocates a new {@code String} so that it represents the sequence of
 270      * characters currently contained in the character array argument. The
 271      * contents of the character array are copied; subsequent modification of
 272      * the character array does not affect the newly created string.
 273      *
 274      * @param  value
 275      *         The initial value of the string
 276      */
 277     public String(char[] value) {
 278         this(value, 0, value.length, null);
 279     }
 280 
 281     /**
 282      * Allocates a new {@code String} that contains characters from a subarray
 283      * of the character array argument. The {@code offset} argument is the
 284      * index of the first character of the subarray and the {@code count}
 285      * argument specifies the length of the subarray. The contents of the
 286      * subarray are copied; subsequent modification of the character array does
 287      * not affect the newly created string.
 288      *
 289      * @param  value
 290      *         Array that is the source of characters
 291      *
 292      * @param  offset
 293      *         The initial offset
 294      *
 295      * @param  count
 296      *         The length
 297      *
 298      * @throws  IndexOutOfBoundsException
 299      *          If {@code offset} is negative, {@code count} is negative, or
 300      *          {@code offset} is greater than {@code value.length - count}
 301      */
 302     public String(char[] value, int offset, int count) {
 303         this(value, offset, count, rangeCheck(value, offset, count));
 304     }
 305 
 306     private static Void rangeCheck(char[] value, int offset, int count) {
 307         checkBoundsOffCount(offset, count, value.length);
 308         return null;
 309     }
 310 
 311     /**
 312      * Allocates a new {@code String} that contains characters from a subarray
 313      * of the <a href="Character.html#unicode">Unicode code point</a> array
 314      * argument.  The {@code offset} argument is the index of the first code
 315      * point of the subarray and the {@code count} argument specifies the
 316      * length of the subarray.  The contents of the subarray are converted to
 317      * {@code char}s; subsequent modification of the {@code int} array does not
 318      * affect the newly created string.
 319      *
 320      * @param  codePoints
 321      *         Array that is the source of Unicode code points
 322      *
 323      * @param  offset
 324      *         The initial offset
 325      *
 326      * @param  count
 327      *         The length
 328      *
 329      * @throws  IllegalArgumentException
 330      *          If any invalid Unicode code point is found in {@code
 331      *          codePoints}
 332      *
 333      * @throws  IndexOutOfBoundsException
 334      *          If {@code offset} is negative, {@code count} is negative, or
 335      *          {@code offset} is greater than {@code codePoints.length - count}
 336      *
 337      * @since  1.5
 338      */
 339     public String(int[] codePoints, int offset, int count) {
 340         checkBoundsOffCount(offset, count, codePoints.length);
 341         if (count == 0) {
 342             this.value = "".value;
 343             this.coder = "".coder;
 344             return;
 345         }
 346         if (COMPACT_STRINGS) {
 347             byte[] val = StringLatin1.toBytes(codePoints, offset, count);
 348             if (val != null) {
 349                 this.coder = LATIN1;
 350                 this.value = val;
 351                 return;
 352             }
 353         }
 354         this.coder = UTF16;
 355         this.value = StringUTF16.toBytes(codePoints, offset, count);
 356     }
 357 
 358     /**
 359      * Allocates a new {@code String} constructed from a subarray of an array
 360      * of 8-bit integer values.
 361      *
 362      * <p> The {@code offset} argument is the index of the first byte of the
 363      * subarray, and the {@code count} argument specifies the length of the
 364      * subarray.
 365      *
 366      * <p> Each {@code byte} in the subarray is converted to a {@code char} as
 367      * specified in the {@link #String(byte[],int) String(byte[],int)} constructor.
 368      *
 369      * @deprecated This method does not properly convert bytes into characters.
 370      * As of JDK&nbsp;1.1, the preferred way to do this is via the
 371      * {@code String} constructors that take a {@link Charset}, charset name,
 372      * or that use the {@link Charset#defaultCharset() default charset}.
 373      *
 374      * @param  ascii
 375      *         The bytes to be converted to characters
 376      *
 377      * @param  hibyte
 378      *         The top 8 bits of each 16-bit Unicode code unit
 379      *
 380      * @param  offset
 381      *         The initial offset
 382      * @param  count
 383      *         The length
 384      *
 385      * @throws  IndexOutOfBoundsException
 386      *          If {@code offset} is negative, {@code count} is negative, or
 387      *          {@code offset} is greater than {@code ascii.length - count}
 388      *
 389      * @see  #String(byte[], int)
 390      * @see  #String(byte[], int, int, java.lang.String)
 391      * @see  #String(byte[], int, int, java.nio.charset.Charset)
 392      * @see  #String(byte[], int, int)
 393      * @see  #String(byte[], java.lang.String)
 394      * @see  #String(byte[], java.nio.charset.Charset)
 395      * @see  #String(byte[])
 396      */
 397     @Deprecated(since="1.1")
 398     public String(byte[] ascii, int hibyte, int offset, int count) {
 399         checkBoundsOffCount(offset, count, ascii.length);
 400         if (count == 0) {
 401             this.value = "".value;
 402             this.coder = "".coder;
 403             return;
 404         }
 405         if (COMPACT_STRINGS && (byte)hibyte == 0) {
 406             this.value = Arrays.copyOfRange(ascii, offset, offset + count);
 407             this.coder = LATIN1;
 408         } else {
 409             hibyte <<= 8;
 410             byte[] val = StringUTF16.newBytesFor(count);
 411             for (int i = 0; i < count; i++) {
 412                 StringUTF16.putChar(val, i, hibyte | (ascii[offset++] & 0xff));
 413             }
 414             this.value = val;
 415             this.coder = UTF16;
 416         }
 417     }
 418 
 419     /**
 420      * Allocates a new {@code String} containing characters constructed from
 421      * an array of 8-bit integer values. Each character <i>c</i> in the
 422      * resulting string is constructed from the corresponding component
 423      * <i>b</i> in the byte array such that:
 424      *
 425      * <blockquote><pre>
 426      *     <b><i>c</i></b> == (char)(((hibyte &amp; 0xff) &lt;&lt; 8)
 427      *                         | (<b><i>b</i></b> &amp; 0xff))
 428      * </pre></blockquote>
 429      *
 430      * @deprecated  This method does not properly convert bytes into
 431      * characters.  As of JDK&nbsp;1.1, the preferred way to do this is via the
 432      * {@code String} constructors that take a {@link Charset}, charset name,
 433      * or that use the {@link Charset#defaultCharset() default charset}.
 434      *
 435      * @param  ascii
 436      *         The bytes to be converted to characters
 437      *
 438      * @param  hibyte
 439      *         The top 8 bits of each 16-bit Unicode code unit
 440      *
 441      * @see  #String(byte[], int, int, java.lang.String)
 442      * @see  #String(byte[], int, int, java.nio.charset.Charset)
 443      * @see  #String(byte[], int, int)
 444      * @see  #String(byte[], java.lang.String)
 445      * @see  #String(byte[], java.nio.charset.Charset)
 446      * @see  #String(byte[])
 447      */
 448     @Deprecated(since="1.1")
 449     public String(byte[] ascii, int hibyte) {
 450         this(ascii, hibyte, 0, ascii.length);
 451     }
 452 
 453     /**
 454      * Constructs a new {@code String} by decoding the specified subarray of
 455      * bytes using the specified charset.  The length of the new {@code String}
 456      * is a function of the charset, and hence may not be equal to the length
 457      * of the subarray.
 458      *
 459      * <p> The behavior of this constructor when the given bytes are not valid
 460      * in the given charset is unspecified.  The {@link
 461      * java.nio.charset.CharsetDecoder} class should be used when more control
 462      * over the decoding process is required.
 463      *
 464      * @param  bytes
 465      *         The bytes to be decoded into characters
 466      *
 467      * @param  offset
 468      *         The index of the first byte to decode
 469      *
 470      * @param  length
 471      *         The number of bytes to decode
 472      *
 473      * @param  charsetName
 474      *         The name of a supported {@linkplain java.nio.charset.Charset
 475      *         charset}
 476      *
 477      * @throws  UnsupportedEncodingException
 478      *          If the named charset is not supported
 479      *
 480      * @throws  IndexOutOfBoundsException
 481      *          If {@code offset} is negative, {@code length} is negative, or
 482      *          {@code offset} is greater than {@code bytes.length - length}
 483      *
 484      * @since  1.1
 485      */
 486     public String(byte[] bytes, int offset, int length, String charsetName)
 487             throws UnsupportedEncodingException {
 488         this(lookupCharset(charsetName), bytes, checkBoundsOffCount(offset, length, bytes.length), length);
 489     }
 490 
 491     /**
 492      * Constructs a new {@code String} by decoding the specified subarray of
 493      * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
 494      * The length of the new {@code String} is a function of the charset, and
 495      * hence may not be equal to the length of the subarray.
 496      *
 497      * <p> This method always replaces malformed-input and unmappable-character
 498      * sequences with this charset's default replacement string.  The {@link
 499      * java.nio.charset.CharsetDecoder} class should be used when more control
 500      * over the decoding process is required.
 501      *
 502      * @param  bytes
 503      *         The bytes to be decoded into characters
 504      *
 505      * @param  offset
 506      *         The index of the first byte to decode
 507      *
 508      * @param  length
 509      *         The number of bytes to decode
 510      *
 511      * @param  charset
 512      *         The {@linkplain java.nio.charset.Charset charset} to be used to
 513      *         decode the {@code bytes}
 514      *
 515      * @throws  IndexOutOfBoundsException
 516      *          If {@code offset} is negative, {@code length} is negative, or
 517      *          {@code offset} is greater than {@code bytes.length - length}
 518      *
 519      * @since  1.6
 520      */
 521     public String(byte[] bytes, int offset, int length, Charset charset) {
 522         this(Objects.requireNonNull(charset), bytes, checkBoundsOffCount(offset, length, bytes.length), length);
 523     }
 524 
 525     /**
 526      * This method does not do any precondition checks on its arguments.
 527      * <p>
 528      * Important: parameter order of this method is deliberately changed in order to
 529      * disambiguate it against other similar methods of this class.
 530      */
 531     @SuppressWarnings("removal")
 532     private String(Charset charset, byte[] bytes, int offset, int length) {
 533         if (length == 0) {
 534             this.value = "".value;
 535             this.coder = "".coder;
 536         } else if (charset == UTF_8.INSTANCE) {
 537             if (COMPACT_STRINGS) {
 538                 int dp = StringCoding.countPositives(bytes, offset, length);
 539                 if (dp == length) {
 540                     this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 541                     this.coder = LATIN1;
 542                     return;
 543                 }
 544                 int sl = offset + length;
 545                 byte[] dst = new byte[length];
 546                 if (dp > 0) {
 547                     System.arraycopy(bytes, offset, dst, 0, dp);
 548                     offset += dp;
 549                 }
 550                 while (offset < sl) {
 551                     int b1 = bytes[offset++];
 552                     if (b1 >= 0) {
 553                         dst[dp++] = (byte)b1;
 554                         continue;
 555                     }
 556                     if ((b1 & 0xfe) == 0xc2 && offset < sl) { // b1 either 0xc2 or 0xc3
 557                         int b2 = bytes[offset];
 558                         if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
 559                             dst[dp++] = (byte)decode2(b1, b2);
 560                             offset++;
 561                             continue;
 562                         }
 563                     }
 564                     // anything not a latin1, including the REPL
 565                     // we have to go with the utf16
 566                     offset--;
 567                     break;
 568                 }
 569                 if (offset == sl) {
 570                     if (dp != dst.length) {
 571                         dst = Arrays.copyOf(dst, dp);
 572                     }
 573                     this.value = dst;
 574                     this.coder = LATIN1;
 575                     return;
 576                 }
 577                 byte[] buf = new byte[length << 1];
 578                 StringLatin1.inflate(dst, 0, buf, 0, dp);
 579                 dst = buf;
 580                 dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, true);
 581                 if (dp != length) {
 582                     dst = Arrays.copyOf(dst, dp << 1);
 583                 }
 584                 this.value = dst;
 585                 this.coder = UTF16;
 586             } else { // !COMPACT_STRINGS
 587                 byte[] dst = new byte[length << 1];
 588                 int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true);
 589                 if (dp != length) {
 590                     dst = Arrays.copyOf(dst, dp << 1);
 591                 }
 592                 this.value = dst;
 593                 this.coder = UTF16;
 594             }
 595         } else if (charset == ISO_8859_1.INSTANCE) {
 596             if (COMPACT_STRINGS) {
 597                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 598                 this.coder = LATIN1;
 599             } else {
 600                 this.value = StringLatin1.inflate(bytes, offset, length);
 601                 this.coder = UTF16;
 602             }
 603         } else if (charset == US_ASCII.INSTANCE) {
 604             if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
 605                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 606                 this.coder = LATIN1;
 607             } else {
 608                 byte[] dst = new byte[length << 1];
 609                 int dp = 0;
 610                 while (dp < length) {
 611                     int b = bytes[offset++];
 612                     StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
 613                 }
 614                 this.value = dst;
 615                 this.coder = UTF16;
 616             }
 617         } else {
 618             // (1)We never cache the "external" cs, the only benefit of creating
 619             // an additional StringDe/Encoder object to wrap it is to share the
 620             // de/encode() method. These SD/E objects are short-lived, the young-gen
 621             // gc should be able to take care of them well. But the best approach
 622             // is still not to generate them if not really necessary.
 623             // (2)The defensive copy of the input byte/char[] has a big performance
 624             // impact, as well as the outgoing result byte/char[]. Need to do the
 625             // optimization check of (sm==null && classLoader0==null) for both.
 626             CharsetDecoder cd = charset.newDecoder();
 627             // ArrayDecoder fastpaths
 628             if (cd instanceof ArrayDecoder ad) {
 629                 // ascii
 630                 if (ad.isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
 631                     if (COMPACT_STRINGS) {
 632                         this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 633                         this.coder = LATIN1;
 634                         return;
 635                     }
 636                     this.value = StringLatin1.inflate(bytes, offset, length);
 637                     this.coder = UTF16;
 638                     return;
 639                 }
 640 
 641                 // fastpath for always Latin1 decodable single byte
 642                 if (COMPACT_STRINGS && ad.isLatin1Decodable()) {
 643                     byte[] dst = new byte[length];
 644                     ad.decodeToLatin1(bytes, offset, length, dst);
 645                     this.value = dst;
 646                     this.coder = LATIN1;
 647                     return;
 648                 }
 649 
 650                 int en = scale(length, cd.maxCharsPerByte());
 651                 cd.onMalformedInput(CodingErrorAction.REPLACE)
 652                         .onUnmappableCharacter(CodingErrorAction.REPLACE);
 653                 char[] ca = new char[en];
 654                 int clen = ad.decode(bytes, offset, length, ca);
 655                 if (COMPACT_STRINGS) {
 656                     byte[] bs = StringUTF16.compress(ca, 0, clen);
 657                     if (bs != null) {
 658                         value = bs;
 659                         coder = LATIN1;
 660                         return;
 661                     }
 662                 }
 663                 coder = UTF16;
 664                 value = StringUTF16.toBytes(ca, 0, clen);
 665                 return;
 666             }
 667 
 668             // decode using CharsetDecoder
 669             int en = scale(length, cd.maxCharsPerByte());
 670             cd.onMalformedInput(CodingErrorAction.REPLACE)
 671                     .onUnmappableCharacter(CodingErrorAction.REPLACE);
 672             char[] ca = new char[en];
 673             if (charset.getClass().getClassLoader0() != null &&
 674                     System.getSecurityManager() != null) {
 675                 bytes = Arrays.copyOfRange(bytes, offset, offset + length);
 676                 offset = 0;
 677             }
 678 
 679             int caLen;
 680             try {
 681                 caLen = decodeWithDecoder(cd, ca, bytes, offset, length);
 682             } catch (CharacterCodingException x) {
 683                 // Substitution is enabled, so this shouldn't happen
 684                 throw new Error(x);
 685             }
 686             if (COMPACT_STRINGS) {
 687                 byte[] bs = StringUTF16.compress(ca, 0, caLen);
 688                 if (bs != null) {
 689                     value = bs;
 690                     coder = LATIN1;
 691                     return;
 692                 }
 693             }
 694             coder = UTF16;
 695             value = StringUTF16.toBytes(ca, 0, caLen);
 696         }
 697     }
 698 
 699     /*
 700      * Throws iae, instead of replacing, if malformed or unmappable.
 701      *
 702      * @param  noShare
 703      *         {@code true} if the resulting string MUST NOT share the byte array,
 704      *         {@code false} if the byte array can be exclusively used to construct
 705      *         the string and is not modified or used for any other purpose.
 706      */
 707     static String newStringUTF8NoRepl(byte[] bytes, int offset, int length, boolean noShare) {
 708         checkBoundsOffCount(offset, length, bytes.length);
 709         if (length == 0) {
 710             return "";
 711         }
 712         int dp;
 713         byte[] dst;
 714         if (COMPACT_STRINGS) {
 715             dp = StringCoding.countPositives(bytes, offset, length);
 716             int sl = offset + length;
 717             if (dp == length) {
 718                 if (noShare || length != bytes.length) {
 719                     return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
 720                 } else {
 721                     return new String(bytes, LATIN1);
 722                 }
 723             }
 724             dst = new byte[length];
 725             System.arraycopy(bytes, offset, dst, 0, dp);
 726             offset += dp;
 727             while (offset < sl) {
 728                 int b1 = bytes[offset++];
 729                 if (b1 >= 0) {
 730                     dst[dp++] = (byte)b1;
 731                     continue;
 732                 }
 733                 if ((b1 & 0xfe) == 0xc2 && offset < sl) { // b1 either 0xc2 or 0xc3
 734                     int b2 = bytes[offset];
 735                     if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
 736                         dst[dp++] = (byte)decode2(b1, b2);
 737                         offset++;
 738                         continue;
 739                     }
 740                 }
 741                 // anything not a latin1, including the REPL
 742                 // we have to go with the utf16
 743                 offset--;
 744                 break;
 745             }
 746             if (offset == sl) {
 747                 if (dp != dst.length) {
 748                     dst = Arrays.copyOf(dst, dp);
 749                 }
 750                 return new String(dst, LATIN1);
 751             }
 752             if (dp == 0) {
 753                 dst = new byte[length << 1];
 754             } else {
 755                 byte[] buf = new byte[length << 1];
 756                 StringLatin1.inflate(dst, 0, buf, 0, dp);
 757                 dst = buf;
 758             }
 759             dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
 760         } else { // !COMPACT_STRINGS
 761             dst = new byte[length << 1];
 762             dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, false);
 763         }
 764         if (dp != length) {
 765             dst = Arrays.copyOf(dst, dp << 1);
 766         }
 767         return new String(dst, UTF16);
 768     }
 769 
 770     static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {
 771         try {
 772             return newStringNoRepl1(src, cs);
 773         } catch (IllegalArgumentException e) {
 774             //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause
 775             Throwable cause = e.getCause();
 776             if (cause instanceof MalformedInputException mie) {
 777                 throw mie;
 778             }
 779             throw (CharacterCodingException)cause;
 780         }
 781     }
 782 
 783     @SuppressWarnings("removal")
 784     private static String newStringNoRepl1(byte[] src, Charset cs) {
 785         int len = src.length;
 786         if (len == 0) {
 787             return "";
 788         }
 789         if (cs == UTF_8.INSTANCE) {
 790             return newStringUTF8NoRepl(src, 0, src.length, false);
 791         }
 792         if (cs == ISO_8859_1.INSTANCE) {
 793             if (COMPACT_STRINGS)
 794                 return new String(src, LATIN1);
 795             return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
 796         }
 797         if (cs == US_ASCII.INSTANCE) {
 798             if (!StringCoding.hasNegatives(src, 0, src.length)) {
 799                 if (COMPACT_STRINGS)
 800                     return new String(src, LATIN1);
 801                 return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
 802             } else {
 803                 throwMalformed(src);
 804             }
 805         }
 806 
 807         CharsetDecoder cd = cs.newDecoder();
 808         // ascii fastpath
 809         if (cd instanceof ArrayDecoder ad &&
 810                 ad.isASCIICompatible() &&
 811                 !StringCoding.hasNegatives(src, 0, src.length)) {
 812             if (COMPACT_STRINGS)
 813                 return new String(src, LATIN1);
 814             return new String(src, 0, src.length, ISO_8859_1.INSTANCE);
 815         }
 816         int en = scale(len, cd.maxCharsPerByte());
 817         char[] ca = new char[en];
 818         if (cs.getClass().getClassLoader0() != null &&
 819                 System.getSecurityManager() != null) {
 820             src = Arrays.copyOf(src, len);
 821         }
 822         int caLen;
 823         try {
 824             caLen = decodeWithDecoder(cd, ca, src, 0, src.length);
 825         } catch (CharacterCodingException x) {
 826             // throw via IAE
 827             throw new IllegalArgumentException(x);
 828         }
 829         if (COMPACT_STRINGS) {
 830             byte[] bs = StringUTF16.compress(ca, 0, caLen);
 831             if (bs != null) {
 832                 return new String(bs, LATIN1);
 833             }
 834         }
 835         return new String(StringUTF16.toBytes(ca, 0, caLen), UTF16);
 836     }
 837 
 838     private static final char REPL = '\ufffd';
 839 
 840     // Trim the given byte array to the given length
 841     @SuppressWarnings("removal")
 842     private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
 843         if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) {
 844             return ba;
 845         } else {
 846             return Arrays.copyOf(ba, len);
 847         }
 848     }
 849 
 850     private static int scale(int len, float expansionFactor) {
 851         // We need to perform double, not float, arithmetic; otherwise
 852         // we lose low order bits when len is larger than 2**24.
 853         return (int)(len * (double)expansionFactor);
 854     }
 855 
 856     private static Charset lookupCharset(String csn) throws UnsupportedEncodingException {
 857         Objects.requireNonNull(csn);
 858         try {
 859             return Charset.forName(csn);
 860         } catch (UnsupportedCharsetException | IllegalCharsetNameException x) {
 861             throw new UnsupportedEncodingException(csn);
 862         }
 863     }
 864 
 865     private static byte[] encode(Charset cs, byte coder, byte[] val) {
 866         if (cs == UTF_8.INSTANCE) {
 867             return encodeUTF8(coder, val, true);
 868         }
 869         if (cs == ISO_8859_1.INSTANCE) {
 870             return encode8859_1(coder, val);
 871         }
 872         if (cs == US_ASCII.INSTANCE) {
 873             return encodeASCII(coder, val);
 874         }
 875         return encodeWithEncoder(cs, coder, val, true);
 876     }
 877 
 878     private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, boolean doReplace) {
 879         CharsetEncoder ce = cs.newEncoder();
 880         int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
 881         int en = scale(len, ce.maxBytesPerChar());
 882         // fastpath with ArrayEncoder implies `doReplace`.
 883         if (doReplace && ce instanceof ArrayEncoder ae) {
 884             // fastpath for ascii compatible
 885             if (coder == LATIN1 &&
 886                     ae.isASCIICompatible() &&
 887                     !StringCoding.hasNegatives(val, 0, val.length)) {
 888                 return val.clone();
 889             }
 890             byte[] ba = new byte[en];
 891             if (len == 0) {
 892                 return ba;
 893             }
 894 
 895             int blen = (coder == LATIN1) ? ae.encodeFromLatin1(val, 0, len, ba)
 896                     : ae.encodeFromUTF16(val, 0, len, ba);
 897             if (blen != -1) {
 898                 return safeTrim(ba, blen, true);
 899             }
 900         }
 901 
 902         byte[] ba = new byte[en];
 903         if (len == 0) {
 904             return ba;
 905         }
 906         if (doReplace) {
 907             ce.onMalformedInput(CodingErrorAction.REPLACE)
 908                     .onUnmappableCharacter(CodingErrorAction.REPLACE);
 909         }
 910         char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
 911                 : StringUTF16.toChars(val);
 912         ByteBuffer bb = ByteBuffer.wrap(ba);
 913         CharBuffer cb = CharBuffer.wrap(ca, 0, len);
 914         try {
 915             CoderResult cr = ce.encode(cb, bb, true);
 916             if (!cr.isUnderflow())
 917                 cr.throwException();
 918             cr = ce.flush(bb);
 919             if (!cr.isUnderflow())
 920                 cr.throwException();
 921         } catch (CharacterCodingException x) {
 922             if (!doReplace) {
 923                 throw new IllegalArgumentException(x);
 924             } else {
 925                 throw new Error(x);
 926             }
 927         }
 928         return safeTrim(ba, bb.position(), cs.getClass().getClassLoader0() == null);
 929     }
 930 
 931     /*
 932      * Throws iae, instead of replacing, if unmappable.
 933      */
 934     static byte[] getBytesUTF8NoRepl(String s) {
 935         return encodeUTF8(s.coder(), s.value(), false);
 936     }
 937 
 938     private static boolean isASCII(byte[] src) {
 939         return !StringCoding.hasNegatives(src, 0, src.length);
 940     }
 941 
 942     /*
 943      * Throws CCE, instead of replacing, if unmappable.
 944      */
 945     static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
 946         try {
 947             return getBytesNoRepl1(s, cs);
 948         } catch (IllegalArgumentException e) {
 949             //getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause
 950             Throwable cause = e.getCause();
 951             if (cause instanceof UnmappableCharacterException) {
 952                 throw (UnmappableCharacterException)cause;
 953             }
 954             throw (CharacterCodingException)cause;
 955         }
 956     }
 957 
 958     private static byte[] getBytesNoRepl1(String s, Charset cs) {
 959         byte[] val = s.value();
 960         byte coder = s.coder();
 961         if (cs == UTF_8.INSTANCE) {
 962             if (coder == LATIN1 && isASCII(val)) {
 963                 return val;
 964             }
 965             return encodeUTF8(coder, val, false);
 966         }
 967         if (cs == ISO_8859_1.INSTANCE) {
 968             if (coder == LATIN1) {
 969                 return val;
 970             }
 971             return encode8859_1(coder, val, false);
 972         }
 973         if (cs == US_ASCII.INSTANCE) {
 974             if (coder == LATIN1) {
 975                 if (isASCII(val)) {
 976                     return val;
 977                 } else {
 978                     throwUnmappable(val);
 979                 }
 980             }
 981         }
 982         return encodeWithEncoder(cs, coder, val, false);
 983     }
 984 
 985     private static byte[] encodeASCII(byte coder, byte[] val) {
 986         if (coder == LATIN1) {
 987             int positives = StringCoding.countPositives(val, 0, val.length);
 988             byte[] dst = val.clone();
 989             if (positives < dst.length) {
 990                 replaceNegatives(dst, positives);
 991             }
 992             return dst;
 993         }
 994         int len = val.length >> 1;
 995         byte[] dst = new byte[len];
 996         int dp = 0;
 997         for (int i = 0; i < len; i++) {
 998             char c = StringUTF16.getChar(val, i);
 999             if (c < 0x80) {
1000                 dst[dp++] = (byte)c;
1001                 continue;
1002             }
1003             if (Character.isHighSurrogate(c) && i + 1 < len &&
1004                     Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
1005                 i++;
1006             }
1007             dst[dp++] = '?';
1008         }
1009         if (len == dp) {
1010             return dst;
1011         }
1012         return Arrays.copyOf(dst, dp);
1013     }
1014 
1015     private static void replaceNegatives(byte[] val, int fromIndex) {
1016         for (int i = fromIndex; i < val.length; i++) {
1017             if (val[i] < 0) {
1018                 val[i] = '?';
1019             }
1020         }
1021     }
1022 
1023     private static byte[] encode8859_1(byte coder, byte[] val) {
1024         return encode8859_1(coder, val, true);
1025     }
1026 
1027     private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
1028         if (coder == LATIN1) {
1029             return val.clone();
1030         }
1031         int len = val.length >> 1;
1032         byte[] dst = new byte[len];
1033         int dp = 0;
1034         int sp = 0;
1035         int sl = len;
1036         while (sp < sl) {
1037             int ret = StringCoding.implEncodeISOArray(val, sp, dst, dp, len);
1038             sp = sp + ret;
1039             dp = dp + ret;
1040             if (ret != len) {
1041                 if (!doReplace) {
1042                     throwUnmappable(sp);
1043                 }
1044                 char c = StringUTF16.getChar(val, sp++);
1045                 if (Character.isHighSurrogate(c) && sp < sl &&
1046                         Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
1047                     sp++;
1048                 }
1049                 dst[dp++] = '?';
1050                 len = sl - sp;
1051             }
1052         }
1053         if (dp == dst.length) {
1054             return dst;
1055         }
1056         return Arrays.copyOf(dst, dp);
1057     }
1058 
1059     //////////////////////////////// utf8 ////////////////////////////////////
1060 
1061     /**
1062      * Decodes ASCII from the source byte array into the destination
1063      * char array. Used via JavaLangAccess from UTF_8 and other charset
1064      * decoders.
1065      *
1066      * @return the number of bytes successfully decoded, at most len
1067      */
1068     /* package-private */
1069     static int decodeASCII(byte[] sa, int sp, char[] da, int dp, int len) {
1070         int count = StringCoding.countPositives(sa, sp, len);
1071         while (count < len) {
1072             if (sa[sp + count] < 0) {
1073                 break;
1074             }
1075             count++;
1076         }
1077         StringLatin1.inflate(sa, sp, da, dp, count);
1078         return count;
1079     }
1080 
1081     private static boolean isNotContinuation(int b) {
1082         return (b & 0xc0) != 0x80;
1083     }
1084 
1085     private static boolean isMalformed3(int b1, int b2, int b3) {
1086         return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
1087                 (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
1088     }
1089 
1090     private static boolean isMalformed3_2(int b1, int b2) {
1091         return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
1092                 (b2 & 0xc0) != 0x80;
1093     }
1094 
1095     private static boolean isMalformed4(int b2, int b3, int b4) {
1096         return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
1097                 (b4 & 0xc0) != 0x80;
1098     }
1099 
1100     private static boolean isMalformed4_2(int b1, int b2) {
1101         return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
1102                 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
1103                 (b2 & 0xc0) != 0x80;
1104     }
1105 
1106     private static boolean isMalformed4_3(int b3) {
1107         return (b3 & 0xc0) != 0x80;
1108     }
1109 
1110     private static char decode2(int b1, int b2) {
1111         return (char)(((b1 << 6) ^ b2) ^
1112                 (((byte) 0xC0 << 6) ^
1113                         ((byte) 0x80 << 0)));
1114     }
1115 
1116     private static char decode3(int b1, int b2, int b3) {
1117         return (char)((b1 << 12) ^
1118                 (b2 <<  6) ^
1119                 (b3 ^
1120                         (((byte) 0xE0 << 12) ^
1121                                 ((byte) 0x80 <<  6) ^
1122                                 ((byte) 0x80 <<  0))));
1123     }
1124 
1125     private static int decode4(int b1, int b2, int b3, int b4) {
1126         return ((b1 << 18) ^
1127                 (b2 << 12) ^
1128                 (b3 <<  6) ^
1129                 (b4 ^
1130                         (((byte) 0xF0 << 18) ^
1131                                 ((byte) 0x80 << 12) ^
1132                                 ((byte) 0x80 <<  6) ^
1133                                 ((byte) 0x80 <<  0))));
1134     }
1135 
1136     private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int dp, boolean doReplace) {
1137         while (sp < sl) {
1138             int b1 = src[sp++];
1139             if (b1 >= 0) {
1140                 StringUTF16.putChar(dst, dp++, (char) b1);
1141             } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
1142                 if (sp < sl) {
1143                     int b2 = src[sp++];
1144                     if (isNotContinuation(b2)) {
1145                         if (!doReplace) {
1146                             throwMalformed(sp - 1, 1);
1147                         }
1148                         StringUTF16.putChar(dst, dp++, REPL);
1149                         sp--;
1150                     } else {
1151                         StringUTF16.putChar(dst, dp++, decode2(b1, b2));
1152                     }
1153                     continue;
1154                 }
1155                 if (!doReplace) {
1156                     throwMalformed(sp, 1);  // underflow()
1157                 }
1158                 StringUTF16.putChar(dst, dp++, REPL);
1159                 break;
1160             } else if ((b1 >> 4) == -2) {
1161                 if (sp + 1 < sl) {
1162                     int b2 = src[sp++];
1163                     int b3 = src[sp++];
1164                     if (isMalformed3(b1, b2, b3)) {
1165                         if (!doReplace) {
1166                             throwMalformed(sp - 3, 3);
1167                         }
1168                         StringUTF16.putChar(dst, dp++, REPL);
1169                         sp -= 3;
1170                         sp += malformed3(src, sp);
1171                     } else {
1172                         char c = decode3(b1, b2, b3);
1173                         if (Character.isSurrogate(c)) {
1174                             if (!doReplace) {
1175                                 throwMalformed(sp - 3, 3);
1176                             }
1177                             StringUTF16.putChar(dst, dp++, REPL);
1178                         } else {
1179                             StringUTF16.putChar(dst, dp++, c);
1180                         }
1181                     }
1182                     continue;
1183                 }
1184                 if (sp < sl && isMalformed3_2(b1, src[sp])) {
1185                     if (!doReplace) {
1186                         throwMalformed(sp - 1, 2);
1187                     }
1188                     StringUTF16.putChar(dst, dp++, REPL);
1189                     continue;
1190                 }
1191                 if (!doReplace) {
1192                     throwMalformed(sp, 1);
1193                 }
1194                 StringUTF16.putChar(dst, dp++, REPL);
1195                 break;
1196             } else if ((b1 >> 3) == -2) {
1197                 if (sp + 2 < sl) {
1198                     int b2 = src[sp++];
1199                     int b3 = src[sp++];
1200                     int b4 = src[sp++];
1201                     int uc = decode4(b1, b2, b3, b4);
1202                     if (isMalformed4(b2, b3, b4) ||
1203                             !Character.isSupplementaryCodePoint(uc)) { // shortest form check
1204                         if (!doReplace) {
1205                             throwMalformed(sp - 4, 4);
1206                         }
1207                         StringUTF16.putChar(dst, dp++, REPL);
1208                         sp -= 4;
1209                         sp += malformed4(src, sp);
1210                     } else {
1211                         StringUTF16.putChar(dst, dp++, Character.highSurrogate(uc));
1212                         StringUTF16.putChar(dst, dp++, Character.lowSurrogate(uc));
1213                     }
1214                     continue;
1215                 }
1216                 b1 &= 0xff;
1217                 if (b1 > 0xf4 || sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
1218                     if (!doReplace) {
1219                         throwMalformed(sp - 1, 1);  // or 2
1220                     }
1221                     StringUTF16.putChar(dst, dp++, REPL);
1222                     continue;
1223                 }
1224                 if (!doReplace) {
1225                     throwMalformed(sp - 1, 1);
1226                 }
1227                 sp++;
1228                 StringUTF16.putChar(dst, dp++, REPL);
1229                 if (sp < sl && isMalformed4_3(src[sp])) {
1230                     continue;
1231                 }
1232                 break;
1233             } else {
1234                 if (!doReplace) {
1235                     throwMalformed(sp - 1, 1);
1236                 }
1237                 StringUTF16.putChar(dst, dp++, REPL);
1238             }
1239         }
1240         return dp;
1241     }
1242 
1243     private static int decodeWithDecoder(CharsetDecoder cd, char[] dst, byte[] src, int offset, int length)
1244                                             throws CharacterCodingException {
1245         ByteBuffer bb = ByteBuffer.wrap(src, offset, length);
1246         CharBuffer cb = CharBuffer.wrap(dst, 0, dst.length);
1247         CoderResult cr = cd.decode(bb, cb, true);
1248         if (!cr.isUnderflow())
1249             cr.throwException();
1250         cr = cd.flush(cb);
1251         if (!cr.isUnderflow())
1252             cr.throwException();
1253         return cb.position();
1254     }
1255 
1256     private static int malformed3(byte[] src, int sp) {
1257         int b1 = src[sp++];
1258         int b2 = src[sp];    // no need to lookup b3
1259         return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
1260                 isNotContinuation(b2)) ? 1 : 2;
1261     }
1262 
1263     private static int malformed4(byte[] src, int sp) {
1264         // we don't care the speed here
1265         int b1 = src[sp++] & 0xff;
1266         int b2 = src[sp++] & 0xff;
1267         if (b1 > 0xf4 ||
1268                 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
1269                 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
1270                 isNotContinuation(b2))
1271             return 1;
1272         if (isNotContinuation(src[sp]))
1273             return 2;
1274         return 3;
1275     }
1276 
1277     private static void throwMalformed(int off, int nb) {
1278         String msg = "malformed input off : " + off + ", length : " + nb;
1279         throw new IllegalArgumentException(msg, new MalformedInputException(nb));
1280     }
1281 
1282     private static void throwMalformed(byte[] val) {
1283         int dp = StringCoding.countPositives(val, 0, val.length);
1284         throwMalformed(dp, 1);
1285     }
1286 
1287     private static void throwUnmappable(int off) {
1288         String msg = "malformed input off : " + off + ", length : 1";
1289         throw new IllegalArgumentException(msg, new UnmappableCharacterException(1));
1290     }
1291 
1292     private static void throwUnmappable(byte[] val) {
1293         int dp = StringCoding.countPositives(val, 0, val.length);
1294         throwUnmappable(dp);
1295     }
1296 
1297     private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
1298         if (coder == UTF16) {
1299             return encodeUTF8_UTF16(val, doReplace);
1300         }
1301 
1302         if (!StringCoding.hasNegatives(val, 0, val.length)) {
1303             return val.clone();
1304         }
1305 
1306         int dp = 0;
1307         byte[] dst = new byte[val.length << 1];
1308         for (byte c : val) {
1309             if (c < 0) {
1310                 dst[dp++] = (byte) (0xc0 | ((c & 0xff) >> 6));
1311                 dst[dp++] = (byte) (0x80 | (c & 0x3f));
1312             } else {
1313                 dst[dp++] = c;
1314             }
1315         }
1316         if (dp == dst.length) {
1317             return dst;
1318         }
1319         return Arrays.copyOf(dst, dp);
1320     }
1321 
1322     private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
1323         int dp = 0;
1324         int sp = 0;
1325         int sl = val.length >> 1;
1326         byte[] dst = new byte[sl * 3];
1327         while (sp < sl) {
1328             // ascii fast loop;
1329             char c = StringUTF16.getChar(val, sp);
1330             if (c >= '\u0080') {
1331                 break;
1332             }
1333             dst[dp++] = (byte)c;
1334             sp++;
1335         }
1336         while (sp < sl) {
1337             char c = StringUTF16.getChar(val, sp++);
1338             if (c < 0x80) {
1339                 dst[dp++] = (byte)c;
1340             } else if (c < 0x800) {
1341                 dst[dp++] = (byte)(0xc0 | (c >> 6));
1342                 dst[dp++] = (byte)(0x80 | (c & 0x3f));
1343             } else if (Character.isSurrogate(c)) {
1344                 int uc = -1;
1345                 char c2;
1346                 if (Character.isHighSurrogate(c) && sp < sl &&
1347                         Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
1348                     uc = Character.toCodePoint(c, c2);
1349                 }
1350                 if (uc < 0) {
1351                     if (doReplace) {
1352                         dst[dp++] = '?';
1353                     } else {
1354                         throwUnmappable(sp - 1);
1355                     }
1356                 } else {
1357                     dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
1358                     dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
1359                     dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
1360                     dst[dp++] = (byte)(0x80 | (uc & 0x3f));
1361                     sp++;  // 2 chars
1362                 }
1363             } else {
1364                 // 3 bytes, 16 bits
1365                 dst[dp++] = (byte)(0xe0 | ((c >> 12)));
1366                 dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
1367                 dst[dp++] = (byte)(0x80 | (c & 0x3f));
1368             }
1369         }
1370         if (dp == dst.length) {
1371             return dst;
1372         }
1373         return Arrays.copyOf(dst, dp);
1374     }
1375 
1376     /**
1377      * Constructs a new {@code String} by decoding the specified array of bytes
1378      * using the specified {@linkplain java.nio.charset.Charset charset}.  The
1379      * length of the new {@code String} is a function of the charset, and hence
1380      * may not be equal to the length of the byte array.
1381      *
1382      * <p> The behavior of this constructor when the given bytes are not valid
1383      * in the given charset is unspecified.  The {@link
1384      * java.nio.charset.CharsetDecoder} class should be used when more control
1385      * over the decoding process is required.
1386      *
1387      * @param  bytes
1388      *         The bytes to be decoded into characters
1389      *
1390      * @param  charsetName
1391      *         The name of a supported {@linkplain java.nio.charset.Charset
1392      *         charset}
1393      *
1394      * @throws  UnsupportedEncodingException
1395      *          If the named charset is not supported
1396      *
1397      * @since  1.1
1398      */
1399     public String(byte[] bytes, String charsetName)
1400             throws UnsupportedEncodingException {
1401         this(lookupCharset(charsetName), bytes, 0, bytes.length);
1402     }
1403 
1404     /**
1405      * Constructs a new {@code String} by decoding the specified array of
1406      * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
1407      * The length of the new {@code String} is a function of the charset, and
1408      * hence may not be equal to the length of the byte array.
1409      *
1410      * <p> This method always replaces malformed-input and unmappable-character
1411      * sequences with this charset's default replacement string.  The {@link
1412      * java.nio.charset.CharsetDecoder} class should be used when more control
1413      * over the decoding process is required.
1414      *
1415      * @param  bytes
1416      *         The bytes to be decoded into characters
1417      *
1418      * @param  charset
1419      *         The {@linkplain java.nio.charset.Charset charset} to be used to
1420      *         decode the {@code bytes}
1421      *
1422      * @since  1.6
1423      */
1424     public String(byte[] bytes, Charset charset) {
1425         this(Objects.requireNonNull(charset), bytes, 0, bytes.length);
1426     }
1427 
1428     /**
1429      * Constructs a new {@code String} by decoding the specified subarray of
1430      * bytes using the {@link Charset#defaultCharset() default charset}.
1431      * The length of the new {@code String} is a function of the charset,
1432      * and hence may not be equal to the length of the subarray.
1433      *
1434      * <p> The behavior of this constructor when the given bytes are not valid
1435      * in the default charset is unspecified.  The {@link
1436      * java.nio.charset.CharsetDecoder} class should be used when more control
1437      * over the decoding process is required.
1438      *
1439      * @param  bytes
1440      *         The bytes to be decoded into characters
1441      *
1442      * @param  offset
1443      *         The index of the first byte to decode
1444      *
1445      * @param  length
1446      *         The number of bytes to decode
1447      *
1448      * @throws  IndexOutOfBoundsException
1449      *          If {@code offset} is negative, {@code length} is negative, or
1450      *          {@code offset} is greater than {@code bytes.length - length}
1451      *
1452      * @since  1.1
1453      */
1454     public String(byte[] bytes, int offset, int length) {
1455         this(Charset.defaultCharset(), bytes, checkBoundsOffCount(offset, length, bytes.length), length);
1456     }
1457 
1458     /**
1459      * Constructs a new {@code String} by decoding the specified array of bytes
1460      * using the {@link Charset#defaultCharset() default charset}. The length
1461      * of the new {@code String} is a function of the charset, and hence may not
1462      * be equal to the length of the byte array.
1463      *
1464      * <p> The behavior of this constructor when the given bytes are not valid
1465      * in the default charset is unspecified.  The {@link
1466      * java.nio.charset.CharsetDecoder} class should be used when more control
1467      * over the decoding process is required.
1468      *
1469      * @param  bytes
1470      *         The bytes to be decoded into characters
1471      *
1472      * @since  1.1
1473      */
1474     public String(byte[] bytes) {
1475         this(Charset.defaultCharset(), bytes, 0, bytes.length);
1476     }
1477 
1478     /**
1479      * Allocates a new string that contains the sequence of characters
1480      * currently contained in the string buffer argument. The contents of the
1481      * string buffer are copied; subsequent modification of the string buffer
1482      * does not affect the newly created string.
1483      *
1484      * @param  buffer
1485      *         A {@code StringBuffer}
1486      */
1487     public String(StringBuffer buffer) {
1488         this(buffer.toString());
1489     }
1490 
1491     /**
1492      * Allocates a new string that contains the sequence of characters
1493      * currently contained in the string builder argument. The contents of the
1494      * string builder are copied; subsequent modification of the string builder
1495      * does not affect the newly created string.
1496      *
1497      * <p> This constructor is provided to ease migration to {@code
1498      * StringBuilder}. Obtaining a string from a string builder via the {@code
1499      * toString} method is likely to run faster and is generally preferred.
1500      *
1501      * @param   builder
1502      *          A {@code StringBuilder}
1503      *
1504      * @since  1.5
1505      */
1506     public String(StringBuilder builder) {
1507         this(builder, null);
1508     }
1509 
1510     /**
1511      * Returns the length of this string.
1512      * The length is equal to the number of <a href="Character.html#unicode">Unicode
1513      * code units</a> in the string.
1514      *
1515      * @return  the length of the sequence of characters represented by this
1516      *          object.
1517      */
1518     public int length() {
1519         return value.length >> coder();
1520     }
1521 
1522     /**
1523      * Returns {@code true} if, and only if, {@link #length()} is {@code 0}.
1524      *
1525      * @return {@code true} if {@link #length()} is {@code 0}, otherwise
1526      * {@code false}
1527      *
1528      * @since 1.6
1529      */
1530     @Override
1531     public boolean isEmpty() {
1532         return value.length == 0;
1533     }
1534 
1535     /**
1536      * Returns the {@code char} value at the
1537      * specified index. An index ranges from {@code 0} to
1538      * {@code length() - 1}. The first {@code char} value of the sequence
1539      * is at index {@code 0}, the next at index {@code 1},
1540      * and so on, as for array indexing.
1541      *
1542      * <p>If the {@code char} value specified by the index is a
1543      * <a href="Character.html#unicode">surrogate</a>, the surrogate
1544      * value is returned.
1545      *
1546      * @param      index   the index of the {@code char} value.
1547      * @return     the {@code char} value at the specified index of this string.
1548      *             The first {@code char} value is at index {@code 0}.
1549      * @throws     IndexOutOfBoundsException  if the {@code index}
1550      *             argument is negative or not less than the length of this
1551      *             string.
1552      */
1553     public char charAt(int index) {
1554         if (isLatin1()) {
1555             return StringLatin1.charAt(value, index);
1556         } else {
1557             return StringUTF16.charAt(value, index);
1558         }
1559     }
1560 
1561     /**
1562      * Returns the character (Unicode code point) at the specified
1563      * index. The index refers to {@code char} values
1564      * (Unicode code units) and ranges from {@code 0} to
1565      * {@link #length()}{@code  - 1}.
1566      *
1567      * <p> If the {@code char} value specified at the given index
1568      * is in the high-surrogate range, the following index is less
1569      * than the length of this {@code String}, and the
1570      * {@code char} value at the following index is in the
1571      * low-surrogate range, then the supplementary code point
1572      * corresponding to this surrogate pair is returned. Otherwise,
1573      * the {@code char} value at the given index is returned.
1574      *
1575      * @param      index the index to the {@code char} values
1576      * @return     the code point value of the character at the
1577      *             {@code index}
1578      * @throws     IndexOutOfBoundsException  if the {@code index}
1579      *             argument is negative or not less than the length of this
1580      *             string.
1581      * @since      1.5
1582      */
1583     public int codePointAt(int index) {
1584         if (isLatin1()) {
1585             checkIndex(index, value.length);
1586             return value[index] & 0xff;
1587         }
1588         int length = value.length >> 1;
1589         checkIndex(index, length);
1590         return StringUTF16.codePointAt(value, index, length);
1591     }
1592 
1593     /**
1594      * Returns the character (Unicode code point) before the specified
1595      * index. The index refers to {@code char} values
1596      * (Unicode code units) and ranges from {@code 1} to {@link
1597      * CharSequence#length() length}.
1598      *
1599      * <p> If the {@code char} value at {@code (index - 1)}
1600      * is in the low-surrogate range, {@code (index - 2)} is not
1601      * negative, and the {@code char} value at {@code (index -
1602      * 2)} is in the high-surrogate range, then the
1603      * supplementary code point value of the surrogate pair is
1604      * returned. If the {@code char} value at {@code index -
1605      * 1} is an unpaired low-surrogate or a high-surrogate, the
1606      * surrogate value is returned.
1607      *
1608      * @param     index the index following the code point that should be returned
1609      * @return    the Unicode code point value before the given index.
1610      * @throws    IndexOutOfBoundsException if the {@code index}
1611      *            argument is less than 1 or greater than the length
1612      *            of this string.
1613      * @since     1.5
1614      */
1615     public int codePointBefore(int index) {
1616         int i = index - 1;
1617         checkIndex(i, length());
1618         if (isLatin1()) {
1619             return (value[i] & 0xff);
1620         }
1621         return StringUTF16.codePointBefore(value, index);
1622     }
1623 
1624     /**
1625      * Returns the number of Unicode code points in the specified text
1626      * range of this {@code String}. The text range begins at the
1627      * specified {@code beginIndex} and extends to the
1628      * {@code char} at index {@code endIndex - 1}. Thus the
1629      * length (in {@code char}s) of the text range is
1630      * {@code endIndex-beginIndex}. Unpaired surrogates within
1631      * the text range count as one code point each.
1632      *
1633      * @param beginIndex the index to the first {@code char} of
1634      * the text range.
1635      * @param endIndex the index after the last {@code char} of
1636      * the text range.
1637      * @return the number of Unicode code points in the specified text
1638      * range
1639      * @throws    IndexOutOfBoundsException if the
1640      * {@code beginIndex} is negative, or {@code endIndex}
1641      * is larger than the length of this {@code String}, or
1642      * {@code beginIndex} is larger than {@code endIndex}.
1643      * @since  1.5
1644      */
1645     public int codePointCount(int beginIndex, int endIndex) {
1646         Objects.checkFromToIndex(beginIndex, endIndex, length());
1647         if (isLatin1()) {
1648             return endIndex - beginIndex;
1649         }
1650         return StringUTF16.codePointCount(value, beginIndex, endIndex);
1651     }
1652 
1653     /**
1654      * Returns the index within this {@code String} that is
1655      * offset from the given {@code index} by
1656      * {@code codePointOffset} code points. Unpaired surrogates
1657      * within the text range given by {@code index} and
1658      * {@code codePointOffset} count as one code point each.
1659      *
1660      * @param index the index to be offset
1661      * @param codePointOffset the offset in code points
1662      * @return the index within this {@code String}
1663      * @throws    IndexOutOfBoundsException if {@code index}
1664      *   is negative or larger than the length of this
1665      *   {@code String}, or if {@code codePointOffset} is positive
1666      *   and the substring starting with {@code index} has fewer
1667      *   than {@code codePointOffset} code points,
1668      *   or if {@code codePointOffset} is negative and the substring
1669      *   before {@code index} has fewer than the absolute value
1670      *   of {@code codePointOffset} code points.
1671      * @since 1.5
1672      */
1673     public int offsetByCodePoints(int index, int codePointOffset) {
1674         return Character.offsetByCodePoints(this, index, codePointOffset);
1675     }
1676 
1677     /**
1678      * Copies characters from this string into the destination character
1679      * array.
1680      * <p>
1681      * The first character to be copied is at index {@code srcBegin};
1682      * the last character to be copied is at index {@code srcEnd-1}
1683      * (thus the total number of characters to be copied is
1684      * {@code srcEnd-srcBegin}). The characters are copied into the
1685      * subarray of {@code dst} starting at index {@code dstBegin}
1686      * and ending at index:
1687      * <blockquote><pre>
1688      *     dstBegin + (srcEnd-srcBegin) - 1
1689      * </pre></blockquote>
1690      *
1691      * @param      srcBegin   index of the first character in the string
1692      *                        to copy.
1693      * @param      srcEnd     index after the last character in the string
1694      *                        to copy.
1695      * @param      dst        the destination array.
1696      * @param      dstBegin   the start offset in the destination array.
1697      * @throws    IndexOutOfBoundsException If any of the following
1698      *            is true:
1699      *            <ul><li>{@code srcBegin} is negative.
1700      *            <li>{@code srcBegin} is greater than {@code srcEnd}
1701      *            <li>{@code srcEnd} is greater than the length of this
1702      *                string
1703      *            <li>{@code dstBegin} is negative
1704      *            <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than
1705      *                {@code dst.length}</ul>
1706      */
1707     public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) {
1708         checkBoundsBeginEnd(srcBegin, srcEnd, length());
1709         checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length);
1710         if (isLatin1()) {
1711             StringLatin1.getChars(value, srcBegin, srcEnd, dst, dstBegin);
1712         } else {
1713             StringUTF16.getChars(value, srcBegin, srcEnd, dst, dstBegin);
1714         }
1715     }
1716 
1717     /**
1718      * Copies characters from this string into the destination byte array. Each
1719      * byte receives the 8 low-order bits of the corresponding character. The
1720      * eight high-order bits of each character are not copied and do not
1721      * participate in the transfer in any way.
1722      *
1723      * <p> The first character to be copied is at index {@code srcBegin}; the
1724      * last character to be copied is at index {@code srcEnd-1}.  The total
1725      * number of characters to be copied is {@code srcEnd-srcBegin}. The
1726      * characters, converted to bytes, are copied into the subarray of {@code
1727      * dst} starting at index {@code dstBegin} and ending at index:
1728      *
1729      * <blockquote><pre>
1730      *     dstBegin + (srcEnd-srcBegin) - 1
1731      * </pre></blockquote>
1732      *
1733      * @deprecated  This method does not properly convert characters into
1734      * bytes.  As of JDK&nbsp;1.1, the preferred way to do this is via the
1735      * {@link #getBytes()} method, which uses the {@link Charset#defaultCharset()
1736      * default charset}.
1737      *
1738      * @param  srcBegin
1739      *         Index of the first character in the string to copy
1740      *
1741      * @param  srcEnd
1742      *         Index after the last character in the string to copy
1743      *
1744      * @param  dst
1745      *         The destination array
1746      *
1747      * @param  dstBegin
1748      *         The start offset in the destination array
1749      *
1750      * @throws  IndexOutOfBoundsException
1751      *          If any of the following is true:
1752      *          <ul>
1753      *            <li> {@code srcBegin} is negative
1754      *            <li> {@code srcBegin} is greater than {@code srcEnd}
1755      *            <li> {@code srcEnd} is greater than the length of this String
1756      *            <li> {@code dstBegin} is negative
1757      *            <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code
1758      *                 dst.length}
1759      *          </ul>
1760      */
1761     @Deprecated(since="1.1")
1762     public void getBytes(int srcBegin, int srcEnd, byte[] dst, int dstBegin) {
1763         checkBoundsBeginEnd(srcBegin, srcEnd, length());
1764         Objects.requireNonNull(dst);
1765         checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length);
1766         if (isLatin1()) {
1767             StringLatin1.getBytes(value, srcBegin, srcEnd, dst, dstBegin);
1768         } else {
1769             StringUTF16.getBytes(value, srcBegin, srcEnd, dst, dstBegin);
1770         }
1771     }
1772 
1773     /**
1774      * Encodes this {@code String} into a sequence of bytes using the named
1775      * charset, storing the result into a new byte array.
1776      *
1777      * <p> The behavior of this method when this string cannot be encoded in
1778      * the given charset is unspecified.  The {@link
1779      * java.nio.charset.CharsetEncoder} class should be used when more control
1780      * over the encoding process is required.
1781      *
1782      * @param  charsetName
1783      *         The name of a supported {@linkplain java.nio.charset.Charset
1784      *         charset}
1785      *
1786      * @return  The resultant byte array
1787      *
1788      * @throws  UnsupportedEncodingException
1789      *          If the named charset is not supported
1790      *
1791      * @since  1.1
1792      */
1793     public byte[] getBytes(String charsetName)
1794             throws UnsupportedEncodingException {
1795         return encode(lookupCharset(charsetName), coder(), value);
1796     }
1797 
1798     /**
1799      * Encodes this {@code String} into a sequence of bytes using the given
1800      * {@linkplain java.nio.charset.Charset charset}, storing the result into a
1801      * new byte array.
1802      *
1803      * <p> This method always replaces malformed-input and unmappable-character
1804      * sequences with this charset's default replacement byte array.  The
1805      * {@link java.nio.charset.CharsetEncoder} class should be used when more
1806      * control over the encoding process is required.
1807      *
1808      * @param  charset
1809      *         The {@linkplain java.nio.charset.Charset} to be used to encode
1810      *         the {@code String}
1811      *
1812      * @return  The resultant byte array
1813      *
1814      * @since  1.6
1815      */
1816     public byte[] getBytes(Charset charset) {
1817         if (charset == null) throw new NullPointerException();
1818         return encode(charset, coder(), value);
1819      }
1820 
1821     /**
1822      * Encodes this {@code String} into a sequence of bytes using the
1823      * {@link Charset#defaultCharset() default charset}, storing the result
1824      * into a new byte array.
1825      *
1826      * <p> The behavior of this method when this string cannot be encoded in
1827      * the default charset is unspecified.  The {@link
1828      * java.nio.charset.CharsetEncoder} class should be used when more control
1829      * over the encoding process is required.
1830      *
1831      * @return  The resultant byte array
1832      *
1833      * @since      1.1
1834      */
1835     public byte[] getBytes() {
1836         return encode(Charset.defaultCharset(), coder(), value);
1837     }
1838 
1839     /**
1840      * Compares this string to the specified object.  The result is {@code
1841      * true} if and only if the argument is not {@code null} and is a {@code
1842      * String} object that represents the same sequence of characters as this
1843      * object.
1844      *
1845      * <p>For finer-grained String comparison, refer to
1846      * {@link java.text.Collator}.
1847      *
1848      * @param  anObject
1849      *         The object to compare this {@code String} against
1850      *
1851      * @return  {@code true} if the given object represents a {@code String}
1852      *          equivalent to this string, {@code false} otherwise
1853      *
1854      * @see  #compareTo(String)
1855      * @see  #equalsIgnoreCase(String)
1856      */
1857     public boolean equals(Object anObject) {
1858         if (this == anObject) {
1859             return true;
1860         }
1861         return (anObject instanceof String aString)
1862                 && (!COMPACT_STRINGS || this.coder == aString.coder)
1863                 && StringLatin1.equals(value, aString.value);
1864     }
1865 
1866     /**
1867      * Compares this string to the specified {@code StringBuffer}.  The result
1868      * is {@code true} if and only if this {@code String} represents the same
1869      * sequence of characters as the specified {@code StringBuffer}. This method
1870      * synchronizes on the {@code StringBuffer}.
1871      *
1872      * <p>For finer-grained String comparison, refer to
1873      * {@link java.text.Collator}.
1874      *
1875      * @param  sb
1876      *         The {@code StringBuffer} to compare this {@code String} against
1877      *
1878      * @return  {@code true} if this {@code String} represents the same
1879      *          sequence of characters as the specified {@code StringBuffer},
1880      *          {@code false} otherwise
1881      *
1882      * @since  1.4
1883      */
1884     public boolean contentEquals(StringBuffer sb) {
1885         return contentEquals((CharSequence)sb);
1886     }
1887 
1888     private boolean nonSyncContentEquals(AbstractStringBuilder sb) {
1889         int len = length();
1890         if (len != sb.length()) {
1891             return false;
1892         }
1893         byte[] v1 = value;
1894         byte[] v2 = sb.getValue();
1895         byte coder = coder();
1896         if (coder == sb.getCoder()) {
1897             return v1.length <= v2.length && ArraysSupport.mismatch(v1, v2, v1.length) < 0;
1898         } else {
1899             if (coder != LATIN1) {  // utf16 str and latin1 abs can never be "equal"
1900                 return false;
1901             }
1902             return StringUTF16.contentEquals(v1, v2, len);
1903         }
1904     }
1905 
1906     /**
1907      * Compares this string to the specified {@code CharSequence}.  The
1908      * result is {@code true} if and only if this {@code String} represents the
1909      * same sequence of char values as the specified sequence. Note that if the
1910      * {@code CharSequence} is a {@code StringBuffer} then the method
1911      * synchronizes on it.
1912      *
1913      * <p>For finer-grained String comparison, refer to
1914      * {@link java.text.Collator}.
1915      *
1916      * @param  cs
1917      *         The sequence to compare this {@code String} against
1918      *
1919      * @return  {@code true} if this {@code String} represents the same
1920      *          sequence of char values as the specified sequence, {@code
1921      *          false} otherwise
1922      *
1923      * @since  1.5
1924      */
1925     public boolean contentEquals(CharSequence cs) {
1926         // Argument is a StringBuffer, StringBuilder
1927         if (cs instanceof AbstractStringBuilder) {
1928             if (cs instanceof StringBuffer) {
1929                 synchronized(cs) {
1930                    return nonSyncContentEquals((AbstractStringBuilder)cs);
1931                 }
1932             } else {
1933                 return nonSyncContentEquals((AbstractStringBuilder)cs);
1934             }
1935         }
1936         // Argument is a String
1937         if (cs instanceof String) {
1938             return equals(cs);
1939         }
1940         // Argument is a generic CharSequence
1941         int n = cs.length();
1942         if (n != length()) {
1943             return false;
1944         }
1945         byte[] val = this.value;
1946         if (isLatin1()) {
1947             for (int i = 0; i < n; i++) {
1948                 if ((val[i] & 0xff) != cs.charAt(i)) {
1949                     return false;
1950                 }
1951             }
1952         } else {
1953             if (!StringUTF16.contentEquals(val, cs, n)) {
1954                 return false;
1955             }
1956         }
1957         return true;
1958     }
1959 
1960     /**
1961      * Compares this {@code String} to another {@code String}, ignoring case
1962      * considerations.  Two strings are considered equal ignoring case if they
1963      * are of the same length and corresponding Unicode code points in the two
1964      * strings are equal ignoring case.
1965      *
1966      * <p> Two Unicode code points are considered the same
1967      * ignoring case if at least one of the following is true:
1968      * <ul>
1969      *   <li> The two Unicode code points are the same (as compared by the
1970      *        {@code ==} operator)
1971      *   <li> Calling {@code Character.toLowerCase(Character.toUpperCase(int))}
1972      *        on each Unicode code point produces the same result
1973      * </ul>
1974      *
1975      * <p>Note that this method does <em>not</em> take locale into account, and
1976      * will result in unsatisfactory results for certain locales.  The
1977      * {@link java.text.Collator} class provides locale-sensitive comparison.
1978      *
1979      * @param  anotherString
1980      *         The {@code String} to compare this {@code String} against
1981      *
1982      * @return  {@code true} if the argument is not {@code null} and it
1983      *          represents an equivalent {@code String} ignoring case; {@code
1984      *          false} otherwise
1985      *
1986      * @see  #equals(Object)
1987      * @see  #codePoints()
1988      */
1989     public boolean equalsIgnoreCase(String anotherString) {
1990         return (this == anotherString) ? true
1991                 : (anotherString != null)
1992                 && (anotherString.length() == length())
1993                 && regionMatches(true, 0, anotherString, 0, length());
1994     }
1995 
1996     /**
1997      * Compares two strings lexicographically.
1998      * The comparison is based on the Unicode value of each character in
1999      * the strings. The character sequence represented by this
2000      * {@code String} object is compared lexicographically to the
2001      * character sequence represented by the argument string. The result is
2002      * a negative integer if this {@code String} object
2003      * lexicographically precedes the argument string. The result is a
2004      * positive integer if this {@code String} object lexicographically
2005      * follows the argument string. The result is zero if the strings
2006      * are equal; {@code compareTo} returns {@code 0} exactly when
2007      * the {@link #equals(Object)} method would return {@code true}.
2008      * <p>
2009      * This is the definition of lexicographic ordering. If two strings are
2010      * different, then either they have different characters at some index
2011      * that is a valid index for both strings, or their lengths are different,
2012      * or both. If they have different characters at one or more index
2013      * positions, let <i>k</i> be the smallest such index; then the string
2014      * whose character at position <i>k</i> has the smaller value, as
2015      * determined by using the {@code <} operator, lexicographically precedes the
2016      * other string. In this case, {@code compareTo} returns the
2017      * difference of the two character values at position {@code k} in
2018      * the two string -- that is, the value:
2019      * <blockquote><pre>
2020      * this.charAt(k)-anotherString.charAt(k)
2021      * </pre></blockquote>
2022      * If there is no index position at which they differ, then the shorter
2023      * string lexicographically precedes the longer string. In this case,
2024      * {@code compareTo} returns the difference of the lengths of the
2025      * strings -- that is, the value:
2026      * <blockquote><pre>
2027      * this.length()-anotherString.length()
2028      * </pre></blockquote>
2029      *
2030      * <p>For finer-grained String comparison, refer to
2031      * {@link java.text.Collator}.
2032      *
2033      * @param   anotherString   the {@code String} to be compared.
2034      * @return  the value {@code 0} if the argument string is equal to
2035      *          this string; a value less than {@code 0} if this string
2036      *          is lexicographically less than the string argument; and a
2037      *          value greater than {@code 0} if this string is
2038      *          lexicographically greater than the string argument.
2039      */
2040     public int compareTo(String anotherString) {
2041         byte[] v1 = value;
2042         byte[] v2 = anotherString.value;
2043         byte coder = coder();
2044         if (coder == anotherString.coder()) {
2045             return coder == LATIN1 ? StringLatin1.compareTo(v1, v2)
2046                                    : StringUTF16.compareTo(v1, v2);
2047         }
2048         return coder == LATIN1 ? StringLatin1.compareToUTF16(v1, v2)
2049                                : StringUTF16.compareToLatin1(v1, v2);
2050      }
2051 
2052     /**
2053      * A Comparator that orders {@code String} objects as by
2054      * {@link #compareToIgnoreCase(String) compareToIgnoreCase}.
2055      * This comparator is serializable.
2056      * <p>
2057      * Note that this Comparator does <em>not</em> take locale into account,
2058      * and will result in an unsatisfactory ordering for certain locales.
2059      * The {@link java.text.Collator} class provides locale-sensitive comparison.
2060      *
2061      * @see     java.text.Collator
2062      * @since   1.2
2063      */
2064     public static final Comparator<String> CASE_INSENSITIVE_ORDER
2065                                          = new CaseInsensitiveComparator();
2066 
2067     /**
2068      * CaseInsensitiveComparator for Strings.
2069      */
2070     private static class CaseInsensitiveComparator
2071             implements Comparator<String>, java.io.Serializable {
2072         // use serialVersionUID from JDK 1.2.2 for interoperability
2073         @java.io.Serial
2074         private static final long serialVersionUID = 8575799808933029326L;
2075 
2076         public int compare(String s1, String s2) {
2077             byte[] v1 = s1.value;
2078             byte[] v2 = s2.value;
2079             byte coder = s1.coder();
2080             if (coder == s2.coder()) {
2081                 return coder == LATIN1 ? StringLatin1.compareToCI(v1, v2)
2082                                        : StringUTF16.compareToCI(v1, v2);
2083             }
2084             return coder == LATIN1 ? StringLatin1.compareToCI_UTF16(v1, v2)
2085                                    : StringUTF16.compareToCI_Latin1(v1, v2);
2086         }
2087 
2088         /** Replaces the de-serialized object. */
2089         @java.io.Serial
2090         private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
2091     }
2092 
2093     /**
2094      * Compares two strings lexicographically, ignoring case
2095      * differences. This method returns an integer whose sign is that of
2096      * calling {@code compareTo} with case folded versions of the strings
2097      * where case differences have been eliminated by calling
2098      * {@code Character.toLowerCase(Character.toUpperCase(int))} on
2099      * each Unicode code point.
2100      * <p>
2101      * Note that this method does <em>not</em> take locale into account,
2102      * and will result in an unsatisfactory ordering for certain locales.
2103      * The {@link java.text.Collator} class provides locale-sensitive comparison.
2104      *
2105      * @param   str   the {@code String} to be compared.
2106      * @return  a negative integer, zero, or a positive integer as the
2107      *          specified String is greater than, equal to, or less
2108      *          than this String, ignoring case considerations.
2109      * @see     java.text.Collator
2110      * @see     #codePoints()
2111      * @since   1.2
2112      */
2113     public int compareToIgnoreCase(String str) {
2114         return CASE_INSENSITIVE_ORDER.compare(this, str);
2115     }
2116 
2117     /**
2118      * Tests if two string regions are equal.
2119      * <p>
2120      * A substring of this {@code String} object is compared to a substring
2121      * of the argument other. The result is true if these substrings
2122      * represent identical character sequences. The substring of this
2123      * {@code String} object to be compared begins at index {@code toffset}
2124      * and has length {@code len}. The substring of other to be compared
2125      * begins at index {@code ooffset} and has length {@code len}. The
2126      * result is {@code false} if and only if at least one of the following
2127      * is true:
2128      * <ul><li>{@code toffset} is negative.
2129      * <li>{@code ooffset} is negative.
2130      * <li>{@code toffset+len} is greater than the length of this
2131      * {@code String} object.
2132      * <li>{@code ooffset+len} is greater than the length of the other
2133      * argument.
2134      * <li>There is some nonnegative integer <i>k</i> less than {@code len}
2135      * such that:
2136      * {@code this.charAt(toffset + }<i>k</i>{@code ) != other.charAt(ooffset + }
2137      * <i>k</i>{@code )}
2138      * </ul>
2139      *
2140      * <p>Note that this method does <em>not</em> take locale into account.  The
2141      * {@link java.text.Collator} class provides locale-sensitive comparison.
2142      *
2143      * @param   toffset   the starting offset of the subregion in this string.
2144      * @param   other     the string argument.
2145      * @param   ooffset   the starting offset of the subregion in the string
2146      *                    argument.
2147      * @param   len       the number of characters to compare.
2148      * @return  {@code true} if the specified subregion of this string
2149      *          exactly matches the specified subregion of the string argument;
2150      *          {@code false} otherwise.
2151      */
2152     public boolean regionMatches(int toffset, String other, int ooffset, int len) {
2153         // Note: toffset, ooffset, or len might be near -1>>>1.
2154         if ((ooffset < 0) || (toffset < 0) ||
2155              (toffset > (long)length() - len) ||
2156              (ooffset > (long)other.length() - len)) {
2157             return false;
2158         }
2159         // Any strings match if len <= 0
2160         if (len <= 0) {
2161            return true;
2162         }
2163         byte[] tv = value;
2164         byte[] ov = other.value;
2165         byte coder = coder();
2166         if (coder == other.coder()) {
2167             if (coder == UTF16) {
2168                 toffset <<= UTF16;
2169                 ooffset <<= UTF16;
2170                 len <<= UTF16;
2171             }
2172             return ArraysSupport.mismatch(tv, toffset,
2173                     ov, ooffset, len) < 0;
2174         } else {
2175             if (coder == LATIN1) {
2176                 while (len-- > 0) {
2177                     if (StringLatin1.getChar(tv, toffset++) !=
2178                         StringUTF16.getChar(ov, ooffset++)) {
2179                         return false;
2180                     }
2181                 }
2182             } else {
2183                 while (len-- > 0) {
2184                     if (StringUTF16.getChar(tv, toffset++) !=
2185                         StringLatin1.getChar(ov, ooffset++)) {
2186                         return false;
2187                     }
2188                 }
2189             }
2190         }
2191         return true;
2192     }
2193 
2194     /**
2195      * Tests if two string regions are equal.
2196      * <p>
2197      * A substring of this {@code String} object is compared to a substring
2198      * of the argument {@code other}. The result is {@code true} if these
2199      * substrings represent Unicode code point sequences that are the same,
2200      * ignoring case if and only if {@code ignoreCase} is true.
2201      * The sequences {@code tsequence} and {@code osequence} are compared,
2202      * where {@code tsequence} is the sequence produced as if by calling
2203      * {@code this.substring(toffset, toffset + len).codePoints()} and
2204      * {@code osequence} is the sequence produced as if by calling
2205      * {@code other.substring(ooffset, ooffset + len).codePoints()}.
2206      * The result is {@code true} if and only if all of the following
2207      * are true:
2208      * <ul><li>{@code toffset} is non-negative.
2209      * <li>{@code ooffset} is non-negative.
2210      * <li>{@code toffset+len} is less than or equal to the length of this
2211      * {@code String} object.
2212      * <li>{@code ooffset+len} is less than or equal to the length of the other
2213      * argument.
2214      * <li>if {@code ignoreCase} is {@code false}, all pairs of corresponding Unicode
2215      * code points are equal integer values; or if {@code ignoreCase} is {@code true},
2216      * {@link Character#toLowerCase(int) Character.toLowerCase(}
2217      * {@link Character#toUpperCase(int)}{@code )} on all pairs of Unicode code points
2218      * results in equal integer values.
2219      * </ul>
2220      *
2221      * <p>Note that this method does <em>not</em> take locale into account,
2222      * and will result in unsatisfactory results for certain locales when
2223      * {@code ignoreCase} is {@code true}.  The {@link java.text.Collator} class
2224      * provides locale-sensitive comparison.
2225      *
2226      * @param   ignoreCase   if {@code true}, ignore case when comparing
2227      *                       characters.
2228      * @param   toffset      the starting offset of the subregion in this
2229      *                       string.
2230      * @param   other        the string argument.
2231      * @param   ooffset      the starting offset of the subregion in the string
2232      *                       argument.
2233      * @param   len          the number of characters (Unicode code units -
2234      *                       16bit {@code char} value) to compare.
2235      * @return  {@code true} if the specified subregion of this string
2236      *          matches the specified subregion of the string argument;
2237      *          {@code false} otherwise. Whether the matching is exact
2238      *          or case insensitive depends on the {@code ignoreCase}
2239      *          argument.
2240      * @see     #codePoints()
2241      */
2242     public boolean regionMatches(boolean ignoreCase, int toffset,
2243             String other, int ooffset, int len) {
2244         if (!ignoreCase) {
2245             return regionMatches(toffset, other, ooffset, len);
2246         }
2247         // Note: toffset, ooffset, or len might be near -1>>>1.
2248         if ((ooffset < 0) || (toffset < 0)
2249                 || (toffset > (long)length() - len)
2250                 || (ooffset > (long)other.length() - len)) {
2251             return false;
2252         }
2253         byte[] tv = value;
2254         byte[] ov = other.value;
2255         byte coder = coder();
2256         if (coder == other.coder()) {
2257             return coder == LATIN1
2258               ? StringLatin1.regionMatchesCI(tv, toffset, ov, ooffset, len)
2259               : StringUTF16.regionMatchesCI(tv, toffset, ov, ooffset, len);
2260         }
2261         return coder == LATIN1
2262               ? StringLatin1.regionMatchesCI_UTF16(tv, toffset, ov, ooffset, len)
2263               : StringUTF16.regionMatchesCI_Latin1(tv, toffset, ov, ooffset, len);
2264     }
2265 
2266     /**
2267      * Tests if the substring of this string beginning at the
2268      * specified index starts with the specified prefix.
2269      *
2270      * @param   prefix    the prefix.
2271      * @param   toffset   where to begin looking in this string.
2272      * @return  {@code true} if the character sequence represented by the
2273      *          argument is a prefix of the substring of this object starting
2274      *          at index {@code toffset}; {@code false} otherwise.
2275      *          The result is {@code false} if {@code toffset} is
2276      *          negative or greater than the length of this
2277      *          {@code String} object; otherwise the result is the same
2278      *          as the result of the expression
2279      *          <pre>
2280      *          this.substring(toffset).startsWith(prefix)
2281      *          </pre>
2282      */
2283     public boolean startsWith(String prefix, int toffset) {
2284         // Note: toffset might be near -1>>>1.
2285         if (toffset < 0 || toffset > length() - prefix.length()) {
2286             return false;
2287         }
2288         byte[] ta = value;
2289         byte[] pa = prefix.value;
2290         int po = 0;
2291         int pc = pa.length;
2292         byte coder = coder();
2293         if (coder == prefix.coder()) {
2294             if (coder == UTF16) {
2295                 toffset <<= UTF16;
2296             }
2297             return ArraysSupport.mismatch(ta, toffset,
2298                     pa, 0, pc) < 0;
2299         } else {
2300             if (coder == LATIN1) {  // && pcoder == UTF16
2301                 return false;
2302             }
2303             // coder == UTF16 && pcoder == LATIN1)
2304             while (po < pc) {
2305                 if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) {
2306                     return false;
2307                }
2308             }
2309         }
2310         return true;
2311     }
2312 
2313     /**
2314      * Tests if this string starts with the specified prefix.
2315      *
2316      * @param   prefix   the prefix.
2317      * @return  {@code true} if the character sequence represented by the
2318      *          argument is a prefix of the character sequence represented by
2319      *          this string; {@code false} otherwise.
2320      *          Note also that {@code true} will be returned if the
2321      *          argument is an empty string or is equal to this
2322      *          {@code String} object as determined by the
2323      *          {@link #equals(Object)} method.
2324      * @since   1.0
2325      */
2326     public boolean startsWith(String prefix) {
2327         return startsWith(prefix, 0);
2328     }
2329 
2330     /**
2331      * Tests if this string ends with the specified suffix.
2332      *
2333      * @param   suffix   the suffix.
2334      * @return  {@code true} if the character sequence represented by the
2335      *          argument is a suffix of the character sequence represented by
2336      *          this object; {@code false} otherwise. Note that the
2337      *          result will be {@code true} if the argument is the
2338      *          empty string or is equal to this {@code String} object
2339      *          as determined by the {@link #equals(Object)} method.
2340      */
2341     public boolean endsWith(String suffix) {
2342         return startsWith(suffix, length() - suffix.length());
2343     }
2344 
2345     /**
2346      * Returns a hash code for this string. The hash code for a
2347      * {@code String} object is computed as
2348      * <blockquote><pre>
2349      * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
2350      * </pre></blockquote>
2351      * using {@code int} arithmetic, where {@code s[i]} is the
2352      * <i>i</i>th character of the string, {@code n} is the length of
2353      * the string, and {@code ^} indicates exponentiation.
2354      * (The hash value of the empty string is zero.)
2355      *
2356      * @return  a hash code value for this object.
2357      */
2358     public int hashCode() {
2359         // The hash or hashIsZero fields are subject to a benign data race,
2360         // making it crucial to ensure that any observable result of the
2361         // calculation in this method stays correct under any possible read of
2362         // these fields. Necessary restrictions to allow this to be correct
2363         // without explicit memory fences or similar concurrency primitives is
2364         // that we can ever only write to one of these two fields for a given
2365         // String instance, and that the computation is idempotent and derived
2366         // from immutable state
2367         int h = hash;
2368         if (h == 0 && !hashIsZero) {
2369             h = isLatin1() ? StringLatin1.hashCode(value)
2370                            : StringUTF16.hashCode(value);
2371             if (h == 0) {
2372                 hashIsZero = true;
2373             } else {
2374                 hash = h;
2375             }
2376         }
2377         return h;
2378     }
2379 
2380     /**
2381      * Returns the index within this string of the first occurrence of
2382      * the specified character. If a character with value
2383      * {@code ch} occurs in the character sequence represented by
2384      * this {@code String} object, then the index (in Unicode
2385      * code units) of the first such occurrence is returned. For
2386      * values of {@code ch} in the range from 0 to 0xFFFF
2387      * (inclusive), this is the smallest value <i>k</i> such that:
2388      * <blockquote><pre>
2389      * this.charAt(<i>k</i>) == ch
2390      * </pre></blockquote>
2391      * is true. For other values of {@code ch}, it is the
2392      * smallest value <i>k</i> such that:
2393      * <blockquote><pre>
2394      * this.codePointAt(<i>k</i>) == ch
2395      * </pre></blockquote>
2396      * is true. In either case, if no such character occurs in this
2397      * string, then {@code -1} is returned.
2398      *
2399      * @param   ch   a character (Unicode code point).
2400      * @return  the index of the first occurrence of the character in the
2401      *          character sequence represented by this object, or
2402      *          {@code -1} if the character does not occur.
2403      */
2404     public int indexOf(int ch) {
2405         return indexOf(ch, 0);
2406     }
2407 
2408     /**
2409      * Returns the index within this string of the first occurrence of the
2410      * specified character, starting the search at the specified index.
2411      * <p>
2412      * If a character with value {@code ch} occurs in the
2413      * character sequence represented by this {@code String}
2414      * object at an index no smaller than {@code fromIndex}, then
2415      * the index of the first such occurrence is returned. For values
2416      * of {@code ch} in the range from 0 to 0xFFFF (inclusive),
2417      * this is the smallest value <i>k</i> such that:
2418      * <blockquote><pre>
2419      * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &gt;= fromIndex)
2420      * </pre></blockquote>
2421      * is true. For other values of {@code ch}, it is the
2422      * smallest value <i>k</i> such that:
2423      * <blockquote><pre>
2424      * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &gt;= fromIndex)
2425      * </pre></blockquote>
2426      * is true. In either case, if no such character occurs in this
2427      * string at or after position {@code fromIndex}, then
2428      * {@code -1} is returned.
2429      *
2430      * <p>
2431      * There is no restriction on the value of {@code fromIndex}. If it
2432      * is negative, it has the same effect as if it were zero: this entire
2433      * string may be searched. If it is greater than the length of this
2434      * string, it has the same effect as if it were equal to the length of
2435      * this string: {@code -1} is returned.
2436      *
2437      * <p>All indices are specified in {@code char} values
2438      * (Unicode code units).
2439      *
2440      * @param   ch          a character (Unicode code point).
2441      * @param   fromIndex   the index to start the search from.
2442      * @return  the index of the first occurrence of the character in the
2443      *          character sequence represented by this object that is greater
2444      *          than or equal to {@code fromIndex}, or {@code -1}
2445      *          if the character does not occur.
2446      *
2447      * @apiNote
2448      * Unlike {@link #substring(int)}, for example, this method does not throw
2449      * an exception when {@code fromIndex} is outside the valid range.
2450      * Rather, it returns -1 when {@code fromIndex} is larger than the length of
2451      * the string.
2452      * This result is, by itself, indistinguishable from a genuine absence of
2453      * {@code ch} in the string.
2454      * If stricter behavior is needed, {@link #indexOf(int, int, int)}
2455      * should be considered instead.
2456      * On a {@link String} {@code s}, for example,
2457      * {@code s.indexOf(ch, fromIndex, s.length())} would throw if
2458      * {@code fromIndex} were larger than the string length, or were negative.
2459      */
2460     public int indexOf(int ch, int fromIndex) {
2461         return isLatin1() ? StringLatin1.indexOf(value, ch, fromIndex, length())
2462                 : StringUTF16.indexOf(value, ch, fromIndex, length());
2463     }
2464 
2465     /**
2466      * Returns the index within this string of the first occurrence of the
2467      * specified character, starting the search at {@code beginIndex} and
2468      * stopping before {@code endIndex}.
2469      *
2470      * <p>If a character with value {@code ch} occurs in the
2471      * character sequence represented by this {@code String}
2472      * object at an index no smaller than {@code beginIndex} but smaller than
2473      * {@code endIndex}, then
2474      * the index of the first such occurrence is returned. For values
2475      * of {@code ch} in the range from 0 to 0xFFFF (inclusive),
2476      * this is the smallest value <i>k</i> such that:
2477      * <blockquote><pre>
2478      * (this.charAt(<i>k</i>) == ch) &amp;&amp; (beginIndex &lt;= <i>k</i> &lt; endIndex)
2479      * </pre></blockquote>
2480      * is true. For other values of {@code ch}, it is the
2481      * smallest value <i>k</i> such that:
2482      * <blockquote><pre>
2483      * (this.codePointAt(<i>k</i>) == ch) &amp;&amp; (beginIndex &lt;= <i>k</i> &lt; endIndex)
2484      * </pre></blockquote>
2485      * is true. In either case, if no such character occurs in this
2486      * string at or after position {@code beginIndex} and before position
2487      * {@code endIndex}, then {@code -1} is returned.
2488      *
2489      * <p>All indices are specified in {@code char} values
2490      * (Unicode code units).
2491      *
2492      * @param   ch          a character (Unicode code point).
2493      * @param   beginIndex  the index to start the search from (included).
2494      * @param   endIndex    the index to stop the search at (excluded).
2495      * @return  the index of the first occurrence of the character in the
2496      *          character sequence represented by this object that is greater
2497      *          than or equal to {@code beginIndex} and less than {@code endIndex},
2498      *          or {@code -1} if the character does not occur.
2499      * @throws  StringIndexOutOfBoundsException if {@code beginIndex}
2500      *          is negative, or {@code endIndex} is larger than the length of
2501      *          this {@code String} object, or {@code beginIndex} is larger than
2502      *          {@code endIndex}.
2503      * @since   21
2504      */
2505     public int indexOf(int ch, int beginIndex, int endIndex) {
2506         checkBoundsBeginEnd(beginIndex, endIndex, length());
2507         return isLatin1() ? StringLatin1.indexOf(value, ch, beginIndex, endIndex)
2508                 : StringUTF16.indexOf(value, ch, beginIndex, endIndex);
2509     }
2510 
2511     /**
2512      * Returns the index within this string of the last occurrence of
2513      * the specified character. For values of {@code ch} in the
2514      * range from 0 to 0xFFFF (inclusive), the index (in Unicode code
2515      * units) returned is the largest value <i>k</i> such that:
2516      * <blockquote><pre>
2517      * this.charAt(<i>k</i>) == ch
2518      * </pre></blockquote>
2519      * is true. For other values of {@code ch}, it is the
2520      * largest value <i>k</i> such that:
2521      * <blockquote><pre>
2522      * this.codePointAt(<i>k</i>) == ch
2523      * </pre></blockquote>
2524      * is true.  In either case, if no such character occurs in this
2525      * string, then {@code -1} is returned.  The
2526      * {@code String} is searched backwards starting at the last
2527      * character.
2528      *
2529      * @param   ch   a character (Unicode code point).
2530      * @return  the index of the last occurrence of the character in the
2531      *          character sequence represented by this object, or
2532      *          {@code -1} if the character does not occur.
2533      */
2534     public int lastIndexOf(int ch) {
2535         return lastIndexOf(ch, length() - 1);
2536     }
2537 
2538     /**
2539      * Returns the index within this string of the last occurrence of
2540      * the specified character, searching backward starting at the
2541      * specified index. For values of {@code ch} in the range
2542      * from 0 to 0xFFFF (inclusive), the index returned is the largest
2543      * value <i>k</i> such that:
2544      * <blockquote><pre>
2545      * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &lt;= fromIndex)
2546      * </pre></blockquote>
2547      * is true. For other values of {@code ch}, it is the
2548      * largest value <i>k</i> such that:
2549      * <blockquote><pre>
2550      * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &lt;= fromIndex)
2551      * </pre></blockquote>
2552      * is true. In either case, if no such character occurs in this
2553      * string at or before position {@code fromIndex}, then
2554      * {@code -1} is returned.
2555      *
2556      * <p>All indices are specified in {@code char} values
2557      * (Unicode code units).
2558      *
2559      * @param   ch          a character (Unicode code point).
2560      * @param   fromIndex   the index to start the search from. There is no
2561      *          restriction on the value of {@code fromIndex}. If it is
2562      *          greater than or equal to the length of this string, it has
2563      *          the same effect as if it were equal to one less than the
2564      *          length of this string: this entire string may be searched.
2565      *          If it is negative, it has the same effect as if it were -1:
2566      *          -1 is returned.
2567      * @return  the index of the last occurrence of the character in the
2568      *          character sequence represented by this object that is less
2569      *          than or equal to {@code fromIndex}, or {@code -1}
2570      *          if the character does not occur before that point.
2571      */
2572     public int lastIndexOf(int ch, int fromIndex) {
2573         return isLatin1() ? StringLatin1.lastIndexOf(value, ch, fromIndex)
2574                           : StringUTF16.lastIndexOf(value, ch, fromIndex);
2575     }
2576 
2577     /**
2578      * Returns the index within this string of the first occurrence of the
2579      * specified substring.
2580      *
2581      * <p>The returned index is the smallest value {@code k} for which:
2582      * <pre>{@code
2583      * this.startsWith(str, k)
2584      * }</pre>
2585      * If no such value of {@code k} exists, then {@code -1} is returned.
2586      *
2587      * @param   str   the substring to search for.
2588      * @return  the index of the first occurrence of the specified substring,
2589      *          or {@code -1} if there is no such occurrence.
2590      */
2591     public int indexOf(String str) {
2592         byte coder = coder();
2593         if (coder == str.coder()) {
2594             return isLatin1() ? StringLatin1.indexOf(value, str.value)
2595                               : StringUTF16.indexOf(value, str.value);
2596         }
2597         if (coder == LATIN1) {  // str.coder == UTF16
2598             return -1;
2599         }
2600         return StringUTF16.indexOfLatin1(value, str.value);
2601     }
2602 
2603     /**
2604      * Returns the index within this string of the first occurrence of the
2605      * specified substring, starting at the specified index.
2606      *
2607      * <p>The returned index is the smallest value {@code k} for which:
2608      * <pre>{@code
2609      *     k >= Math.min(fromIndex, this.length()) &&
2610      *                   this.startsWith(str, k)
2611      * }</pre>
2612      * If no such value of {@code k} exists, then {@code -1} is returned.
2613      *
2614      * @apiNote
2615      * Unlike {@link #substring(int)}, for example, this method does not throw
2616      * an exception when {@code fromIndex} is outside the valid range.
2617      * Rather, it returns -1 when {@code fromIndex} is larger than the length of
2618      * the string.
2619      * This result is, by itself, indistinguishable from a genuine absence of
2620      * {@code str} in the string.
2621      * If stricter behavior is needed, {@link #indexOf(String, int, int)}
2622      * should be considered instead.
2623      * On {@link String} {@code s} and a non-empty {@code str}, for example,
2624      * {@code s.indexOf(str, fromIndex, s.length())} would throw if
2625      * {@code fromIndex} were larger than the string length, or were negative.
2626      *
2627      * @param   str         the substring to search for.
2628      * @param   fromIndex   the index from which to start the search.
2629      * @return  the index of the first occurrence of the specified substring,
2630      *          starting at the specified index,
2631      *          or {@code -1} if there is no such occurrence.
2632      */
2633     public int indexOf(String str, int fromIndex) {
2634         return indexOf(value, coder(), length(), str, fromIndex);
2635     }
2636 
2637     /**
2638      * Returns the index of the first occurrence of the specified substring
2639      * within the specified index range of {@code this} string.
2640      *
2641      * <p>This method returns the same result as the one of the invocation
2642      * <pre>{@code
2643      *     s.substring(beginIndex, endIndex).indexOf(str) + beginIndex
2644      * }</pre>
2645      * if the index returned by {@link #indexOf(String)} is non-negative,
2646      * and returns -1 otherwise.
2647      * (No substring is instantiated, though.)
2648      *
2649      * @param   str         the substring to search for.
2650      * @param   beginIndex  the index to start the search from (included).
2651      * @param   endIndex    the index to stop the search at (excluded).
2652      * @return  the index of the first occurrence of the specified substring
2653      *          within the specified index range,
2654      *          or {@code -1} if there is no such occurrence.
2655      * @throws  StringIndexOutOfBoundsException if {@code beginIndex}
2656      *          is negative, or {@code endIndex} is larger than the length of
2657      *          this {@code String} object, or {@code beginIndex} is larger than
2658      *          {@code endIndex}.
2659      * @since   21
2660      */
2661     public int indexOf(String str, int beginIndex, int endIndex) {
2662         if (str.length() == 1) {
2663             /* Simple optimization, can be omitted without behavioral impact */
2664             return indexOf(str.charAt(0), beginIndex, endIndex);
2665         }
2666         checkBoundsBeginEnd(beginIndex, endIndex, length());
2667         return indexOf(value, coder(), endIndex, str, beginIndex);
2668     }
2669 
2670     /**
2671      * Code shared by String and AbstractStringBuilder to do searches. The
2672      * source is the character array being searched, and the target
2673      * is the string being searched for.
2674      *
2675      * @param   src       the characters being searched.
2676      * @param   srcCoder  the coder of the source string.
2677      * @param   srcCount  last index (exclusive) in the source string.
2678      * @param   tgtStr    the characters being searched for.
2679      * @param   fromIndex the index to begin searching from.
2680      */
2681     static int indexOf(byte[] src, byte srcCoder, int srcCount,
2682                        String tgtStr, int fromIndex) {
2683         fromIndex = Math.clamp(fromIndex, 0, srcCount);
2684         int tgtCount = tgtStr.length();
2685         if (tgtCount > srcCount - fromIndex) {
2686             return -1;
2687         }
2688         if (tgtCount == 0) {
2689             return fromIndex;
2690         }
2691 
2692         byte[] tgt = tgtStr.value;
2693         byte tgtCoder = tgtStr.coder();
2694         if (srcCoder == tgtCoder) {
2695             return srcCoder == LATIN1
2696                 ? StringLatin1.indexOf(src, srcCount, tgt, tgtCount, fromIndex)
2697                 : StringUTF16.indexOf(src, srcCount, tgt, tgtCount, fromIndex);
2698         }
2699         if (srcCoder == LATIN1) {    //  && tgtCoder == UTF16
2700             return -1;
2701         }
2702         // srcCoder == UTF16 && tgtCoder == LATIN1) {
2703         return StringUTF16.indexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex);
2704     }
2705 
2706     /**
2707      * Returns the index within this string of the last occurrence of the
2708      * specified substring.  The last occurrence of the empty string ""
2709      * is considered to occur at the index value {@code this.length()}.
2710      *
2711      * <p>The returned index is the largest value {@code k} for which:
2712      * <pre>{@code
2713      * this.startsWith(str, k)
2714      * }</pre>
2715      * If no such value of {@code k} exists, then {@code -1} is returned.
2716      *
2717      * @param   str   the substring to search for.
2718      * @return  the index of the last occurrence of the specified substring,
2719      *          or {@code -1} if there is no such occurrence.
2720      */
2721     public int lastIndexOf(String str) {
2722         return lastIndexOf(str, length());
2723     }
2724 
2725     /**
2726      * Returns the index within this string of the last occurrence of the
2727      * specified substring, searching backward starting at the specified index.
2728      *
2729      * <p>The returned index is the largest value {@code k} for which:
2730      * <pre>{@code
2731      *     k <= Math.min(fromIndex, this.length()) &&
2732      *                   this.startsWith(str, k)
2733      * }</pre>
2734      * If no such value of {@code k} exists, then {@code -1} is returned.
2735      *
2736      * @param   str         the substring to search for.
2737      * @param   fromIndex   the index to start the search from.
2738      * @return  the index of the last occurrence of the specified substring,
2739      *          searching backward from the specified index,
2740      *          or {@code -1} if there is no such occurrence.
2741      */
2742     public int lastIndexOf(String str, int fromIndex) {
2743         return lastIndexOf(value, coder(), length(), str, fromIndex);
2744     }
2745 
2746     /**
2747      * Code shared by String and AbstractStringBuilder to do searches. The
2748      * source is the character array being searched, and the target
2749      * is the string being searched for.
2750      *
2751      * @param   src         the characters being searched.
2752      * @param   srcCoder    coder handles the mapping between bytes/chars
2753      * @param   srcCount    count of the source string.
2754      * @param   tgtStr      the characters being searched for.
2755      * @param   fromIndex   the index to begin searching from.
2756      */
2757     static int lastIndexOf(byte[] src, byte srcCoder, int srcCount,
2758                            String tgtStr, int fromIndex) {
2759         byte[] tgt = tgtStr.value;
2760         byte tgtCoder = tgtStr.coder();
2761         int tgtCount = tgtStr.length();
2762         /*
2763          * Check arguments; return immediately where possible. For
2764          * consistency, don't check for null str.
2765          */
2766         int rightIndex = srcCount - tgtCount;
2767         if (fromIndex > rightIndex) {
2768             fromIndex = rightIndex;
2769         }
2770         if (fromIndex < 0) {
2771             return -1;
2772         }
2773         /* Empty string always matches. */
2774         if (tgtCount == 0) {
2775             return fromIndex;
2776         }
2777         if (srcCoder == tgtCoder) {
2778             return srcCoder == LATIN1
2779                 ? StringLatin1.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex)
2780                 : StringUTF16.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex);
2781         }
2782         if (srcCoder == LATIN1) {    // && tgtCoder == UTF16
2783             return -1;
2784         }
2785         // srcCoder == UTF16 && tgtCoder == LATIN1
2786         return StringUTF16.lastIndexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex);
2787     }
2788 
2789     /**
2790      * Returns a string that is a substring of this string. The
2791      * substring begins with the character at the specified index and
2792      * extends to the end of this string. <p>
2793      * Examples:
2794      * <blockquote><pre>
2795      * "unhappy".substring(2) returns "happy"
2796      * "Harbison".substring(3) returns "bison"
2797      * "emptiness".substring(9) returns "" (an empty string)
2798      * </pre></blockquote>
2799      *
2800      * @param      beginIndex   the beginning index, inclusive.
2801      * @return     the specified substring.
2802      * @throws     IndexOutOfBoundsException  if
2803      *             {@code beginIndex} is negative or larger than the
2804      *             length of this {@code String} object.
2805      */
2806     public String substring(int beginIndex) {
2807         return substring(beginIndex, length());
2808     }
2809 
2810     /**
2811      * Returns a string that is a substring of this string. The
2812      * substring begins at the specified {@code beginIndex} and
2813      * extends to the character at index {@code endIndex - 1}.
2814      * Thus the length of the substring is {@code endIndex-beginIndex}.
2815      * <p>
2816      * Examples:
2817      * <blockquote><pre>
2818      * "hamburger".substring(4, 8) returns "urge"
2819      * "smiles".substring(1, 5) returns "mile"
2820      * </pre></blockquote>
2821      *
2822      * @param      beginIndex   the beginning index, inclusive.
2823      * @param      endIndex     the ending index, exclusive.
2824      * @return     the specified substring.
2825      * @throws     IndexOutOfBoundsException  if the
2826      *             {@code beginIndex} is negative, or
2827      *             {@code endIndex} is larger than the length of
2828      *             this {@code String} object, or
2829      *             {@code beginIndex} is larger than
2830      *             {@code endIndex}.
2831      */
2832     public String substring(int beginIndex, int endIndex) {
2833         int length = length();
2834         checkBoundsBeginEnd(beginIndex, endIndex, length);
2835         if (beginIndex == 0 && endIndex == length) {
2836             return this;
2837         }
2838         int subLen = endIndex - beginIndex;
2839         return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen)
2840                           : StringUTF16.newString(value, beginIndex, subLen);
2841     }
2842 
2843     /**
2844      * Returns a character sequence that is a subsequence of this sequence.
2845      *
2846      * <p> An invocation of this method of the form
2847      *
2848      * <blockquote><pre>
2849      * str.subSequence(begin,&nbsp;end)</pre></blockquote>
2850      *
2851      * behaves in exactly the same way as the invocation
2852      *
2853      * <blockquote><pre>
2854      * str.substring(begin,&nbsp;end)</pre></blockquote>
2855      *
2856      * @apiNote
2857      * This method is defined so that the {@code String} class can implement
2858      * the {@link CharSequence} interface.
2859      *
2860      * @param   beginIndex   the begin index, inclusive.
2861      * @param   endIndex     the end index, exclusive.
2862      * @return  the specified subsequence.
2863      *
2864      * @throws  IndexOutOfBoundsException
2865      *          if {@code beginIndex} or {@code endIndex} is negative,
2866      *          if {@code endIndex} is greater than {@code length()},
2867      *          or if {@code beginIndex} is greater than {@code endIndex}
2868      *
2869      * @since 1.4
2870      */
2871     public CharSequence subSequence(int beginIndex, int endIndex) {
2872         return this.substring(beginIndex, endIndex);
2873     }
2874 
2875     /**
2876      * Concatenates the specified string to the end of this string.
2877      * <p>
2878      * If the length of the argument string is {@code 0}, then this
2879      * {@code String} object is returned. Otherwise, a
2880      * {@code String} object is returned that represents a character
2881      * sequence that is the concatenation of the character sequence
2882      * represented by this {@code String} object and the character
2883      * sequence represented by the argument string.<p>
2884      * Examples:
2885      * <blockquote><pre>
2886      * "cares".concat("s") returns "caress"
2887      * "to".concat("get").concat("her") returns "together"
2888      * </pre></blockquote>
2889      *
2890      * @param   str   the {@code String} that is concatenated to the end
2891      *                of this {@code String}.
2892      * @return  a string that represents the concatenation of this object's
2893      *          characters followed by the string argument's characters.
2894      */
2895     public String concat(String str) {
2896         if (str.isEmpty()) {
2897             return this;
2898         }
2899         return StringConcatHelper.simpleConcat(this, str);
2900     }
2901 
2902     /**
2903      * Returns a string resulting from replacing all occurrences of
2904      * {@code oldChar} in this string with {@code newChar}.
2905      * <p>
2906      * If the character {@code oldChar} does not occur in the
2907      * character sequence represented by this {@code String} object,
2908      * then a reference to this {@code String} object is returned.
2909      * Otherwise, a {@code String} object is returned that
2910      * represents a character sequence identical to the character sequence
2911      * represented by this {@code String} object, except that every
2912      * occurrence of {@code oldChar} is replaced by an occurrence
2913      * of {@code newChar}.
2914      * <p>
2915      * Examples:
2916      * <blockquote><pre>
2917      * "mesquite in your cellar".replace('e', 'o')
2918      *         returns "mosquito in your collar"
2919      * "the war of baronets".replace('r', 'y')
2920      *         returns "the way of bayonets"
2921      * "sparring with a purple porpoise".replace('p', 't')
2922      *         returns "starring with a turtle tortoise"
2923      * "JonL".replace('q', 'x') returns "JonL" (no change)
2924      * </pre></blockquote>
2925      *
2926      * @param   oldChar   the old character.
2927      * @param   newChar   the new character.
2928      * @return  a string derived from this string by replacing every
2929      *          occurrence of {@code oldChar} with {@code newChar}.
2930      */
2931     public String replace(char oldChar, char newChar) {
2932         if (oldChar != newChar) {
2933             String ret = isLatin1() ? StringLatin1.replace(value, oldChar, newChar)
2934                                     : StringUTF16.replace(value, oldChar, newChar);
2935             if (ret != null) {
2936                 return ret;
2937             }
2938         }
2939         return this;
2940     }
2941 
2942     /**
2943      * Tells whether or not this string matches the given <a
2944      * href="../util/regex/Pattern.html#sum">regular expression</a>.
2945      *
2946      * <p> An invocation of this method of the form
2947      * <i>str</i>{@code .matches(}<i>regex</i>{@code )} yields exactly the
2948      * same result as the expression
2949      *
2950      * <blockquote>
2951      * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#matches(String,CharSequence)
2952      * matches(<i>regex</i>, <i>str</i>)}
2953      * </blockquote>
2954      *
2955      * @param   regex
2956      *          the regular expression to which this string is to be matched
2957      *
2958      * @return  {@code true} if, and only if, this string matches the
2959      *          given regular expression
2960      *
2961      * @throws  PatternSyntaxException
2962      *          if the regular expression's syntax is invalid
2963      *
2964      * @see java.util.regex.Pattern
2965      *
2966      * @since 1.4
2967      */
2968     public boolean matches(String regex) {
2969         return Pattern.matches(regex, this);
2970     }
2971 
2972     /**
2973      * Returns true if and only if this string contains the specified
2974      * sequence of char values.
2975      *
2976      * @param s the sequence to search for
2977      * @return true if this string contains {@code s}, false otherwise
2978      * @since 1.5
2979      */
2980     public boolean contains(CharSequence s) {
2981         return indexOf(s.toString()) >= 0;
2982     }
2983 
2984     /**
2985      * Replaces the first substring of this string that matches the given <a
2986      * href="../util/regex/Pattern.html#sum">regular expression</a> with the
2987      * given replacement.
2988      *
2989      * <p> An invocation of this method of the form
2990      * <i>str</i>{@code .replaceFirst(}<i>regex</i>{@code ,} <i>repl</i>{@code )}
2991      * yields exactly the same result as the expression
2992      *
2993      * <blockquote>
2994      * <code>
2995      * {@link java.util.regex.Pattern}.{@link
2996      * java.util.regex.Pattern#compile(String) compile}(<i>regex</i>).{@link
2997      * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link
2998      * java.util.regex.Matcher#replaceFirst(String) replaceFirst}(<i>repl</i>)
2999      * </code>
3000      * </blockquote>
3001      *
3002      *<p>
3003      * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the
3004      * replacement string may cause the results to be different than if it were
3005      * being treated as a literal replacement string; see
3006      * {@link java.util.regex.Matcher#replaceFirst}.
3007      * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
3008      * meaning of these characters, if desired.
3009      *
3010      * @param   regex
3011      *          the regular expression to which this string is to be matched
3012      * @param   replacement
3013      *          the string to be substituted for the first match
3014      *
3015      * @return  The resulting {@code String}
3016      *
3017      * @throws  PatternSyntaxException
3018      *          if the regular expression's syntax is invalid
3019      *
3020      * @see java.util.regex.Pattern
3021      *
3022      * @since 1.4
3023      */
3024     public String replaceFirst(String regex, String replacement) {
3025         return Pattern.compile(regex).matcher(this).replaceFirst(replacement);
3026     }
3027 
3028     /**
3029      * Replaces each substring of this string that matches the given <a
3030      * href="../util/regex/Pattern.html#sum">regular expression</a> with the
3031      * given replacement.
3032      *
3033      * <p> An invocation of this method of the form
3034      * <i>str</i>{@code .replaceAll(}<i>regex</i>{@code ,} <i>repl</i>{@code )}
3035      * yields exactly the same result as the expression
3036      *
3037      * <blockquote>
3038      * <code>
3039      * {@link java.util.regex.Pattern}.{@link
3040      * java.util.regex.Pattern#compile(String) compile}(<i>regex</i>).{@link
3041      * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link
3042      * java.util.regex.Matcher#replaceAll(String) replaceAll}(<i>repl</i>)
3043      * </code>
3044      * </blockquote>
3045      *
3046      *<p>
3047      * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the
3048      * replacement string may cause the results to be different than if it were
3049      * being treated as a literal replacement string; see
3050      * {@link java.util.regex.Matcher#replaceAll Matcher.replaceAll}.
3051      * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
3052      * meaning of these characters, if desired.
3053      *
3054      * @param   regex
3055      *          the regular expression to which this string is to be matched
3056      * @param   replacement
3057      *          the string to be substituted for each match
3058      *
3059      * @return  The resulting {@code String}
3060      *
3061      * @throws  PatternSyntaxException
3062      *          if the regular expression's syntax is invalid
3063      *
3064      * @see java.util.regex.Pattern
3065      *
3066      * @since 1.4
3067      */
3068     public String replaceAll(String regex, String replacement) {
3069         return Pattern.compile(regex).matcher(this).replaceAll(replacement);
3070     }
3071 
3072     /**
3073      * Replaces each substring of this string that matches the literal target
3074      * sequence with the specified literal replacement sequence. The
3075      * replacement proceeds from the beginning of the string to the end, for
3076      * example, replacing "aa" with "b" in the string "aaa" will result in
3077      * "ba" rather than "ab".
3078      *
3079      * @param  target The sequence of char values to be replaced
3080      * @param  replacement The replacement sequence of char values
3081      * @return  The resulting string
3082      * @since 1.5
3083      */
3084     public String replace(CharSequence target, CharSequence replacement) {
3085         String trgtStr = target.toString();
3086         String replStr = replacement.toString();
3087         int thisLen = length();
3088         int trgtLen = trgtStr.length();
3089         int replLen = replStr.length();
3090 
3091         if (trgtLen > 0) {
3092             if (trgtLen == 1 && replLen == 1) {
3093                 return replace(trgtStr.charAt(0), replStr.charAt(0));
3094             }
3095 
3096             boolean thisIsLatin1 = this.isLatin1();
3097             boolean trgtIsLatin1 = trgtStr.isLatin1();
3098             boolean replIsLatin1 = replStr.isLatin1();
3099             String ret = (thisIsLatin1 && trgtIsLatin1 && replIsLatin1)
3100                     ? StringLatin1.replace(value, thisLen,
3101                                            trgtStr.value, trgtLen,
3102                                            replStr.value, replLen)
3103                     : StringUTF16.replace(value, thisLen, thisIsLatin1,
3104                                           trgtStr.value, trgtLen, trgtIsLatin1,
3105                                           replStr.value, replLen, replIsLatin1);
3106             if (ret != null) {
3107                 return ret;
3108             }
3109             return this;
3110 
3111         } else { // trgtLen == 0
3112             int resultLen;
3113             try {
3114                 resultLen = Math.addExact(thisLen, Math.multiplyExact(
3115                         Math.addExact(thisLen, 1), replLen));
3116             } catch (ArithmeticException ignored) {
3117                 throw new OutOfMemoryError("Required length exceeds implementation limit");
3118             }
3119 
3120             StringBuilder sb = new StringBuilder(resultLen);
3121             sb.append(replStr);
3122             for (int i = 0; i < thisLen; ++i) {
3123                 sb.append(charAt(i)).append(replStr);
3124             }
3125             return sb.toString();
3126         }
3127     }
3128 
3129     /**
3130      * Splits this string around matches of the given
3131      * <a href="../util/regex/Pattern.html#sum">regular expression</a>.
3132      *
3133      * <p> The array returned by this method contains each substring of this
3134      * string that is terminated by another substring that matches the given
3135      * expression or is terminated by the end of the string.  The substrings in
3136      * the array are in the order in which they occur in this string.  If the
3137      * expression does not match any part of the input then the resulting array
3138      * has just one element, namely this string.
3139      *
3140      * <p> When there is a positive-width match at the beginning of this
3141      * string then an empty leading substring is included at the beginning
3142      * of the resulting array. A zero-width match at the beginning however
3143      * never produces such empty leading substring.
3144      *
3145      * <p> The {@code limit} parameter controls the number of times the
3146      * pattern is applied and therefore affects the length of the resulting
3147      * array.
3148      * <ul>
3149      *    <li><p>
3150      *    If the <i>limit</i> is positive then the pattern will be applied
3151      *    at most <i>limit</i>&nbsp;-&nbsp;1 times, the array's length will be
3152      *    no greater than <i>limit</i>, and the array's last entry will contain
3153      *    all input beyond the last matched delimiter.</p></li>
3154      *
3155      *    <li><p>
3156      *    If the <i>limit</i> is zero then the pattern will be applied as
3157      *    many times as possible, the array can have any length, and trailing
3158      *    empty strings will be discarded.</p></li>
3159      *
3160      *    <li><p>
3161      *    If the <i>limit</i> is negative then the pattern will be applied
3162      *    as many times as possible and the array can have any length.</p></li>
3163      * </ul>
3164      *
3165      * <p> The string {@code "boo:and:foo"}, for example, yields the
3166      * following results with these parameters:
3167      *
3168      * <blockquote><table class="plain">
3169      * <caption style="display:none">Split example showing regex, limit, and result</caption>
3170      * <thead>
3171      * <tr>
3172      *     <th scope="col">Regex</th>
3173      *     <th scope="col">Limit</th>
3174      *     <th scope="col">Result</th>
3175      * </tr>
3176      * </thead>
3177      * <tbody>
3178      * <tr><th scope="row" rowspan="3" style="font-weight:normal">:</th>
3179      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th>
3180      *     <td>{@code { "boo", "and:foo" }}</td></tr>
3181      * <tr><!-- : -->
3182      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
3183      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
3184      * <tr><!-- : -->
3185      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
3186      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
3187      * <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th>
3188      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
3189      *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
3190      * <tr><!-- o -->
3191      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
3192      *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
3193      * <tr><!-- o -->
3194      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th>
3195      *     <td>{@code { "b", "", ":and:f" }}</td></tr>
3196      * </tbody>
3197      * </table></blockquote>
3198      *
3199      * <p> An invocation of this method of the form
3200      * <i>str.</i>{@code split(}<i>regex</i>{@code ,}&nbsp;<i>n</i>{@code )}
3201      * yields the same result as the expression
3202      *
3203      * <blockquote>
3204      * <code>
3205      * {@link java.util.regex.Pattern}.{@link
3206      * java.util.regex.Pattern#compile(String) compile}(<i>regex</i>).{@link
3207      * java.util.regex.Pattern#split(java.lang.CharSequence,int) split}(<i>str</i>,&nbsp;<i>n</i>)
3208      * </code>
3209      * </blockquote>
3210      *
3211      *
3212      * @param  regex
3213      *         the delimiting regular expression
3214      *
3215      * @param  limit
3216      *         the result threshold, as described above
3217      *
3218      * @return  the array of strings computed by splitting this string
3219      *          around matches of the given regular expression
3220      *
3221      * @throws  PatternSyntaxException
3222      *          if the regular expression's syntax is invalid
3223      *
3224      * @see java.util.regex.Pattern
3225      *
3226      * @since 1.4
3227      */
3228     public String[] split(String regex, int limit) {
3229         return split(regex, limit, false);
3230     }
3231 
3232     /**
3233      * Splits this string around matches of the given regular expression and
3234      * returns both the strings and the matching delimiters.
3235      *
3236      * <p> The array returned by this method contains each substring of this
3237      * string that is terminated by another substring that matches the given
3238      * expression or is terminated by the end of the string.
3239      * Each substring is immediately followed by the subsequence (the delimiter)
3240      * that matches the given expression, <em>except</em> for the last
3241      * substring, which is not followed by anything.
3242      * The substrings in the array and the delimiters are in the order in which
3243      * they occur in the input.
3244      * If the expression does not match any part of the input then the resulting
3245      * array has just one element, namely this string.
3246      *
3247      * <p> When there is a positive-width match at the beginning of this
3248      * string then an empty leading substring is included at the beginning
3249      * of the resulting array. A zero-width match at the beginning however
3250      * never produces such empty leading substring nor the empty delimiter.
3251      *
3252      * <p> The {@code limit} parameter controls the number of times the
3253      * pattern is applied and therefore affects the length of the resulting
3254      * array.
3255      * <ul>
3256      *    <li> If the <i>limit</i> is positive then the pattern will be applied
3257      *    at most <i>limit</i>&nbsp;-&nbsp;1 times, the array's length will be
3258      *    no greater than 2 &times; <i>limit</i> - 1, and the array's last
3259      *    entry will contain all input beyond the last matched delimiter.</li>
3260      *
3261      *    <li> If the <i>limit</i> is zero then the pattern will be applied as
3262      *    many times as possible, the array can have any length, and trailing
3263      *    empty strings will be discarded.</li>
3264      *
3265      *    <li> If the <i>limit</i> is negative then the pattern will be applied
3266      *    as many times as possible and the array can have any length.</li>
3267      * </ul>
3268      *
3269      * <p> The input {@code "boo:::and::foo"}, for example, yields the following
3270      * results with these parameters:
3271      *
3272      * <table class="plain" style="margin-left:2em;">
3273      * <caption style="display:none">Split example showing regex, limit, and result</caption>
3274      * <thead>
3275      * <tr>
3276      *     <th scope="col">Regex</th>
3277      *     <th scope="col">Limit</th>
3278      *     <th scope="col">Result</th>
3279      * </tr>
3280      * </thead>
3281      * <tbody>
3282      * <tr><th scope="row" rowspan="3" style="font-weight:normal">:+</th>
3283      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th>
3284      *     <td>{@code { "boo", ":::", "and::foo" }}</td></tr>
3285      * <tr><!-- : -->
3286      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
3287      *     <td>{@code { "boo", ":::", "and", "::", "foo" }}</td></tr>
3288      * <tr><!-- : -->
3289      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-1</th>
3290      *     <td>{@code { "boo", ":::", "and", "::", "foo" }}</td></tr>
3291      * <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th>
3292      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
3293      *     <td>{@code { "b", "o", "", "o", ":::and::f", "o", "", "o", "" }}</td></tr>
3294      * <tr><!-- o -->
3295      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-1</th>
3296      *     <td>{@code { "b", "o", "", "o", ":::and::f", "o", "", "o", "" }}</td></tr>
3297      * <tr><!-- o -->
3298      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th>
3299      *     <td>{@code { "b", "o", "", "o", ":::and::f", "o", "", "o" }}</td></tr>
3300      * </tbody>
3301      * </table>
3302      *
3303      * @apiNote An invocation of this method of the form
3304      * <i>str.</i>{@code splitWithDelimiters(}<i>regex</i>{@code ,}&nbsp;<i>n</i>{@code )}
3305      * yields the same result as the expression
3306      *
3307      * <blockquote>
3308      * <code>
3309      * {@link java.util.regex.Pattern}.{@link
3310      * java.util.regex.Pattern#compile(String) compile}(<i>regex</i>).{@link
3311      * java.util.regex.Pattern#splitWithDelimiters(CharSequence,int) splitWithDelimiters}(<i>str</i>,&nbsp;<i>n</i>)
3312      * </code>
3313      * </blockquote>
3314      *
3315      * @param  regex
3316      *         the delimiting regular expression
3317      *
3318      * @param  limit
3319      *         the result threshold, as described above
3320      *
3321      * @return  the array of strings computed by splitting this string
3322      *          around matches of the given regular expression, alternating
3323      *          substrings and matching delimiters
3324      *
3325      * @since   21
3326      */
3327     public String[] splitWithDelimiters(String regex, int limit) {
3328         return split(regex, limit, true);
3329     }
3330 
3331     private String[] split(String regex, int limit, boolean withDelimiters) {
3332         /* fastpath if the regex is a
3333          * (1) one-char String and this character is not one of the
3334          *     RegEx's meta characters ".$|()[{^?*+\\", or
3335          * (2) two-char String and the first char is the backslash and
3336          *     the second is not the ascii digit or ascii letter.
3337          */
3338         char ch = 0;
3339         if (((regex.length() == 1 &&
3340                 ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
3341                 (regex.length() == 2 &&
3342                         regex.charAt(0) == '\\' &&
3343                         (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
3344                         ((ch-'a')|('z'-ch)) < 0 &&
3345                         ((ch-'A')|('Z'-ch)) < 0)) &&
3346                 (ch < Character.MIN_HIGH_SURROGATE ||
3347                         ch > Character.MAX_LOW_SURROGATE))
3348         {
3349             // All the checks above can potentially be constant folded by
3350             // a JIT/AOT compiler when the regex is a constant string.
3351             // That requires method inlining of the checks, which is only
3352             // possible when the actual split logic is in a separate method
3353             // because the large split loop can usually not be inlined.
3354             return split(ch, limit, withDelimiters);
3355         }
3356         Pattern pattern = Pattern.compile(regex);
3357         return withDelimiters
3358                 ? pattern.splitWithDelimiters(this, limit)
3359                 : pattern.split(this, limit);
3360     }
3361 
3362     private String[] split(char ch, int limit, boolean withDelimiters) {
3363         int matchCount = 0;
3364         int off = 0;
3365         int next;
3366         boolean limited = limit > 0;
3367         ArrayList<String> list = new ArrayList<>();
3368         String del = withDelimiters ? String.valueOf(ch) : null;
3369         while ((next = indexOf(ch, off)) != -1) {
3370             if (!limited || matchCount < limit - 1) {
3371                 list.add(substring(off, next));
3372                 if (withDelimiters) {
3373                     list.add(del);
3374                 }
3375                 off = next + 1;
3376                 ++matchCount;
3377             } else {    // last one
3378                 int last = length();
3379                 list.add(substring(off, last));
3380                 off = last;
3381                 ++matchCount;
3382                 break;
3383             }
3384         }
3385         // If no match was found, return this
3386         if (off == 0)
3387             return new String[] {this};
3388 
3389         // Add remaining segment
3390         if (!limited || matchCount < limit)
3391             list.add(substring(off, length()));
3392 
3393         // Construct result
3394         int resultSize = list.size();
3395         if (limit == 0) {
3396             while (resultSize > 0 && list.get(resultSize - 1).isEmpty()) {
3397                 resultSize--;
3398             }
3399         }
3400         String[] result = new String[resultSize];
3401         return list.subList(0, resultSize).toArray(result);
3402     }
3403 
3404     /**
3405      * Splits this string around matches of the given <a
3406      * href="../util/regex/Pattern.html#sum">regular expression</a>.
3407      *
3408      * <p> This method works as if by invoking the two-argument {@link
3409      * #split(String, int) split} method with the given expression and a limit
3410      * argument of zero.  Trailing empty strings are therefore not included in
3411      * the resulting array.
3412      *
3413      * <p> The string {@code "boo:and:foo"}, for example, yields the following
3414      * results with these expressions:
3415      *
3416      * <blockquote><table class="plain">
3417      * <caption style="display:none">Split examples showing regex and result</caption>
3418      * <thead>
3419      * <tr>
3420      *  <th scope="col">Regex</th>
3421      *  <th scope="col">Result</th>
3422      * </tr>
3423      * </thead>
3424      * <tbody>
3425      * <tr><th scope="row" style="text-weight:normal">:</th>
3426      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
3427      * <tr><th scope="row" style="text-weight:normal">o</th>
3428      *     <td>{@code { "b", "", ":and:f" }}</td></tr>
3429      * </tbody>
3430      * </table></blockquote>
3431      *
3432      *
3433      * @param  regex
3434      *         the delimiting regular expression
3435      *
3436      * @return  the array of strings computed by splitting this string
3437      *          around matches of the given regular expression
3438      *
3439      * @throws  PatternSyntaxException
3440      *          if the regular expression's syntax is invalid
3441      *
3442      * @see java.util.regex.Pattern
3443      *
3444      * @since 1.4
3445      */
3446     public String[] split(String regex) {
3447         return split(regex, 0, false);
3448     }
3449 
3450     /**
3451      * Returns a new String composed of copies of the
3452      * {@code CharSequence elements} joined together with a copy of
3453      * the specified {@code delimiter}.
3454      *
3455      * <blockquote>For example,
3456      * <pre>{@code
3457      *     String message = String.join("-", "Java", "is", "cool");
3458      *     // message returned is: "Java-is-cool"
3459      * }</pre></blockquote>
3460      *
3461      * Note that if an element is null, then {@code "null"} is added.
3462      *
3463      * @param  delimiter the delimiter that separates each element
3464      * @param  elements the elements to join together.
3465      *
3466      * @return a new {@code String} that is composed of the {@code elements}
3467      *         separated by the {@code delimiter}
3468      *
3469      * @throws NullPointerException If {@code delimiter} or {@code elements}
3470      *         is {@code null}
3471      *
3472      * @see java.util.StringJoiner
3473      * @since 1.8
3474      */
3475     public static String join(CharSequence delimiter, CharSequence... elements) {
3476         var delim = delimiter.toString();
3477         var elems = new String[elements.length];
3478         for (int i = 0; i < elements.length; i++) {
3479             elems[i] = String.valueOf(elements[i]);
3480         }
3481         return join("", "", delim, elems, elems.length);
3482     }
3483 
3484     /**
3485      * Designated join routine.
3486      *
3487      * @param prefix the non-null prefix
3488      * @param suffix the non-null suffix
3489      * @param delimiter the non-null delimiter
3490      * @param elements the non-null array of non-null elements
3491      * @param size the number of elements in the array (<= elements.length)
3492      * @return the joined string
3493      */
3494     @ForceInline
3495     static String join(String prefix, String suffix, String delimiter, String[] elements, int size) {
3496         int icoder = prefix.coder() | suffix.coder();
3497         long len = (long) prefix.length() + suffix.length();
3498         if (size > 1) { // when there are more than one element, size - 1 delimiters will be emitted
3499             len += (long) (size - 1) * delimiter.length();
3500             icoder |= delimiter.coder();
3501         }
3502         // assert len > 0L; // max: (long) Integer.MAX_VALUE << 32
3503         // following loop will add max: (long) Integer.MAX_VALUE * Integer.MAX_VALUE to len
3504         // so len can overflow at most once
3505         for (int i = 0; i < size; i++) {
3506             var el = elements[i];
3507             len += el.length();
3508             icoder |= el.coder();
3509         }
3510         byte coder = (byte) icoder;
3511         // long len overflow check, char -> byte length, int len overflow check
3512         if (len < 0L || (len <<= coder) != (int) len) {
3513             throw new OutOfMemoryError("Requested string length exceeds VM limit");
3514         }
3515         byte[] value = StringConcatHelper.newArray(len);
3516 
3517         int off = 0;
3518         prefix.getBytes(value, off, coder); off += prefix.length();
3519         if (size > 0) {
3520             var el = elements[0];
3521             el.getBytes(value, off, coder); off += el.length();
3522             for (int i = 1; i < size; i++) {
3523                 delimiter.getBytes(value, off, coder); off += delimiter.length();
3524                 el = elements[i];
3525                 el.getBytes(value, off, coder); off += el.length();
3526             }
3527         }
3528         suffix.getBytes(value, off, coder);
3529         // assert off + suffix.length() == value.length >> coder;
3530 
3531         return new String(value, coder);
3532     }
3533 
3534     /**
3535      * Returns a new {@code String} composed of copies of the
3536      * {@code CharSequence elements} joined together with a copy of the
3537      * specified {@code delimiter}.
3538      *
3539      * <blockquote>For example,
3540      * <pre>{@code
3541      *     List<String> strings = List.of("Java", "is", "cool");
3542      *     String message = String.join(" ", strings);
3543      *     // message returned is: "Java is cool"
3544      *
3545      *     Set<String> strings =
3546      *         new LinkedHashSet<>(List.of("Java", "is", "very", "cool"));
3547      *     String message = String.join("-", strings);
3548      *     // message returned is: "Java-is-very-cool"
3549      * }</pre></blockquote>
3550      *
3551      * Note that if an individual element is {@code null}, then {@code "null"} is added.
3552      *
3553      * @param  delimiter a sequence of characters that is used to separate each
3554      *         of the {@code elements} in the resulting {@code String}
3555      * @param  elements an {@code Iterable} that will have its {@code elements}
3556      *         joined together.
3557      *
3558      * @return a new {@code String} that is composed from the {@code elements}
3559      *         argument
3560      *
3561      * @throws NullPointerException If {@code delimiter} or {@code elements}
3562      *         is {@code null}
3563      *
3564      * @see    #join(CharSequence,CharSequence...)
3565      * @see    java.util.StringJoiner
3566      * @since 1.8
3567      */
3568     public static String join(CharSequence delimiter,
3569             Iterable<? extends CharSequence> elements) {
3570         Objects.requireNonNull(delimiter);
3571         Objects.requireNonNull(elements);
3572         var delim = delimiter.toString();
3573         var elems = new String[8];
3574         int size = 0;
3575         for (CharSequence cs: elements) {
3576             if (size >= elems.length) {
3577                 elems = Arrays.copyOf(elems, elems.length << 1);
3578             }
3579             elems[size++] = String.valueOf(cs);
3580         }
3581         return join("", "", delim, elems, size);
3582     }
3583 
3584     /**
3585      * Converts all of the characters in this {@code String} to lower
3586      * case using the rules of the given {@code Locale}.  Case mapping is based
3587      * on the Unicode Standard version specified by the {@link java.lang.Character Character}
3588      * class. Since case mappings are not always 1:1 char mappings, the resulting {@code String}
3589      * and this {@code String} may differ in length.
3590      * <p>
3591      * Examples of lowercase mappings are in the following table:
3592      * <table class="plain">
3593      * <caption style="display:none">Lowercase mapping examples showing language code of locale, upper case, lower case, and description</caption>
3594      * <thead>
3595      * <tr>
3596      *   <th scope="col">Language Code of Locale</th>
3597      *   <th scope="col">Upper Case</th>
3598      *   <th scope="col">Lower Case</th>
3599      *   <th scope="col">Description</th>
3600      * </tr>
3601      * </thead>
3602      * <tbody>
3603      * <tr>
3604      *   <td>tr (Turkish)</td>
3605      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0130</th>
3606      *   <td>&#92;u0069</td>
3607      *   <td>capital letter I with dot above -&gt; small letter i</td>
3608      * </tr>
3609      * <tr>
3610      *   <td>tr (Turkish)</td>
3611      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0049</th>
3612      *   <td>&#92;u0131</td>
3613      *   <td>capital letter I -&gt; small letter dotless i </td>
3614      * </tr>
3615      * <tr>
3616      *   <td>(all)</td>
3617      *   <th scope="row" style="font-weight:normal; text-align:left">French Fries</th>
3618      *   <td>french fries</td>
3619      *   <td>lowercased all chars in String</td>
3620      * </tr>
3621      * <tr>
3622      *   <td>(all)</td>
3623      *   <th scope="row" style="font-weight:normal; text-align:left">
3624      *       &Iota;&Chi;&Theta;&Upsilon;&Sigma;</th>
3625      *   <td>&iota;&chi;&theta;&upsilon;&sigma;</td>
3626      *   <td>lowercased all chars in String</td>
3627      * </tr>
3628      * </tbody>
3629      * </table>
3630      *
3631      * @param locale use the case transformation rules for this locale
3632      * @return the {@code String}, converted to lowercase.
3633      * @see     java.lang.String#toLowerCase()
3634      * @see     java.lang.String#toUpperCase()
3635      * @see     java.lang.String#toUpperCase(Locale)
3636      * @since   1.1
3637      */
3638     public String toLowerCase(Locale locale) {
3639         return isLatin1() ? StringLatin1.toLowerCase(this, value, locale)
3640                           : StringUTF16.toLowerCase(this, value, locale);
3641     }
3642 
3643     /**
3644      * Converts all of the characters in this {@code String} to lower
3645      * case using the rules of the default locale. This method is equivalent to
3646      * {@code toLowerCase(Locale.getDefault())}.
3647      *
3648      * @apiNote This method is locale sensitive, and may produce unexpected
3649      * results if used for strings that are intended to be interpreted locale
3650      * independently.
3651      * Examples are programming language identifiers, protocol keys, and HTML
3652      * tags.
3653      * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale
3654      * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the
3655      * LATIN SMALL LETTER DOTLESS I character.
3656      * To obtain correct results for locale insensitive strings, use
3657      * {@code toLowerCase(Locale.ROOT)}.
3658      *
3659      * @return  the {@code String}, converted to lowercase.
3660      * @see     java.lang.String#toLowerCase(Locale)
3661      */
3662     public String toLowerCase() {
3663         return toLowerCase(Locale.getDefault());
3664     }
3665 
3666     /**
3667      * Converts all of the characters in this {@code String} to upper
3668      * case using the rules of the given {@code Locale}. Case mapping is based
3669      * on the Unicode Standard version specified by the {@link java.lang.Character Character}
3670      * class. Since case mappings are not always 1:1 char mappings, the resulting {@code String}
3671      * and this {@code String} may differ in length.
3672      * <p>
3673      * Examples of locale-sensitive and 1:M case mappings are in the following table:
3674      * <table class="plain">
3675      * <caption style="display:none">Examples of locale-sensitive and 1:M case mappings. Shows Language code of locale, lower case, upper case, and description.</caption>
3676      * <thead>
3677      * <tr>
3678      *   <th scope="col">Language Code of Locale</th>
3679      *   <th scope="col">Lower Case</th>
3680      *   <th scope="col">Upper Case</th>
3681      *   <th scope="col">Description</th>
3682      * </tr>
3683      * </thead>
3684      * <tbody>
3685      * <tr>
3686      *   <td>tr (Turkish)</td>
3687      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0069</th>
3688      *   <td>&#92;u0130</td>
3689      *   <td>small letter i -&gt; capital letter I with dot above</td>
3690      * </tr>
3691      * <tr>
3692      *   <td>tr (Turkish)</td>
3693      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0131</th>
3694      *   <td>&#92;u0049</td>
3695      *   <td>small letter dotless i -&gt; capital letter I</td>
3696      * </tr>
3697      * <tr>
3698      *   <td>(all)</td>
3699      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u00df</th>
3700      *   <td>&#92;u0053 &#92;u0053</td>
3701      *   <td>small letter sharp s -&gt; two letters: SS</td>
3702      * </tr>
3703      * <tr>
3704      *   <td>(all)</td>
3705      *   <th scope="row" style="font-weight:normal; text-align:left">Fahrvergn&uuml;gen</th>
3706      *   <td>FAHRVERGN&Uuml;GEN</td>
3707      *   <td></td>
3708      * </tr>
3709      * </tbody>
3710      * </table>
3711      * @param locale use the case transformation rules for this locale
3712      * @return the {@code String}, converted to uppercase.
3713      * @see     java.lang.String#toUpperCase()
3714      * @see     java.lang.String#toLowerCase()
3715      * @see     java.lang.String#toLowerCase(Locale)
3716      * @since   1.1
3717      */
3718     public String toUpperCase(Locale locale) {
3719         return isLatin1() ? StringLatin1.toUpperCase(this, value, locale)
3720                           : StringUTF16.toUpperCase(this, value, locale);
3721     }
3722 
3723     /**
3724      * Converts all of the characters in this {@code String} to upper
3725      * case using the rules of the default locale. This method is equivalent to
3726      * {@code toUpperCase(Locale.getDefault())}.
3727      *
3728      * @apiNote This method is locale sensitive, and may produce unexpected
3729      * results if used for strings that are intended to be interpreted locale
3730      * independently.
3731      * Examples are programming language identifiers, protocol keys, and HTML
3732      * tags.
3733      * For instance, {@code "title".toUpperCase()} in a Turkish locale
3734      * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the
3735      * LATIN CAPITAL LETTER I WITH DOT ABOVE character.
3736      * To obtain correct results for locale insensitive strings, use
3737      * {@code toUpperCase(Locale.ROOT)}.
3738      *
3739      * @return  the {@code String}, converted to uppercase.
3740      * @see     java.lang.String#toUpperCase(Locale)
3741      */
3742     public String toUpperCase() {
3743         return toUpperCase(Locale.getDefault());
3744     }
3745 
3746     /**
3747      * Returns a string whose value is this string, with all leading
3748      * and trailing space removed, where space is defined
3749      * as any character whose codepoint is less than or equal to
3750      * {@code 'U+0020'} (the space character).
3751      * <p>
3752      * If this {@code String} object represents an empty character
3753      * sequence, or the first and last characters of character sequence
3754      * represented by this {@code String} object both have codes
3755      * that are not space (as defined above), then a
3756      * reference to this {@code String} object is returned.
3757      * <p>
3758      * Otherwise, if all characters in this string are space (as
3759      * defined above), then a  {@code String} object representing an
3760      * empty string is returned.
3761      * <p>
3762      * Otherwise, let <i>k</i> be the index of the first character in the
3763      * string whose code is not a space (as defined above) and let
3764      * <i>m</i> be the index of the last character in the string whose code
3765      * is not a space (as defined above). A {@code String}
3766      * object is returned, representing the substring of this string that
3767      * begins with the character at index <i>k</i> and ends with the
3768      * character at index <i>m</i>-that is, the result of
3769      * {@code this.substring(k, m + 1)}.
3770      * <p>
3771      * This method may be used to trim space (as defined above) from
3772      * the beginning and end of a string.
3773      *
3774      * @return  a string whose value is this string, with all leading
3775      *          and trailing space removed, or this string if it
3776      *          has no leading or trailing space.
3777      */
3778     public String trim() {
3779         String ret = isLatin1() ? StringLatin1.trim(value)
3780                                 : StringUTF16.trim(value);
3781         return ret == null ? this : ret;
3782     }
3783 
3784     /**
3785      * Returns a string whose value is this string, with all leading
3786      * and trailing {@linkplain Character#isWhitespace(int) white space}
3787      * removed.
3788      * <p>
3789      * If this {@code String} object represents an empty string,
3790      * or if all code points in this string are
3791      * {@linkplain Character#isWhitespace(int) white space}, then an empty string
3792      * is returned.
3793      * <p>
3794      * Otherwise, returns a substring of this string beginning with the first
3795      * code point that is not a {@linkplain Character#isWhitespace(int) white space}
3796      * up to and including the last code point that is not a
3797      * {@linkplain Character#isWhitespace(int) white space}.
3798      * <p>
3799      * This method may be used to strip
3800      * {@linkplain Character#isWhitespace(int) white space} from
3801      * the beginning and end of a string.
3802      *
3803      * @return  a string whose value is this string, with all leading
3804      *          and trailing white space removed
3805      *
3806      * @see Character#isWhitespace(int)
3807      *
3808      * @since 11
3809      */
3810     public String strip() {
3811         String ret = isLatin1() ? StringLatin1.strip(value)
3812                                 : StringUTF16.strip(value);
3813         return ret == null ? this : ret;
3814     }
3815 
3816     /**
3817      * Returns a string whose value is this string, with all leading
3818      * {@linkplain Character#isWhitespace(int) white space} removed.
3819      * <p>
3820      * If this {@code String} object represents an empty string,
3821      * or if all code points in this string are
3822      * {@linkplain Character#isWhitespace(int) white space}, then an empty string
3823      * is returned.
3824      * <p>
3825      * Otherwise, returns a substring of this string beginning with the first
3826      * code point that is not a {@linkplain Character#isWhitespace(int) white space}
3827      * up to and including the last code point of this string.
3828      * <p>
3829      * This method may be used to trim
3830      * {@linkplain Character#isWhitespace(int) white space} from
3831      * the beginning of a string.
3832      *
3833      * @return  a string whose value is this string, with all leading white
3834      *          space removed
3835      *
3836      * @see Character#isWhitespace(int)
3837      *
3838      * @since 11
3839      */
3840     public String stripLeading() {
3841         String ret = isLatin1() ? StringLatin1.stripLeading(value)
3842                                 : StringUTF16.stripLeading(value);
3843         return ret == null ? this : ret;
3844     }
3845 
3846     /**
3847      * Returns a string whose value is this string, with all trailing
3848      * {@linkplain Character#isWhitespace(int) white space} removed.
3849      * <p>
3850      * If this {@code String} object represents an empty string,
3851      * or if all characters in this string are
3852      * {@linkplain Character#isWhitespace(int) white space}, then an empty string
3853      * is returned.
3854      * <p>
3855      * Otherwise, returns a substring of this string beginning with the first
3856      * code point of this string up to and including the last code point
3857      * that is not a {@linkplain Character#isWhitespace(int) white space}.
3858      * <p>
3859      * This method may be used to trim
3860      * {@linkplain Character#isWhitespace(int) white space} from
3861      * the end of a string.
3862      *
3863      * @return  a string whose value is this string, with all trailing white
3864      *          space removed
3865      *
3866      * @see Character#isWhitespace(int)
3867      *
3868      * @since 11
3869      */
3870     public String stripTrailing() {
3871         String ret = isLatin1() ? StringLatin1.stripTrailing(value)
3872                                 : StringUTF16.stripTrailing(value);
3873         return ret == null ? this : ret;
3874     }
3875 
3876     /**
3877      * Returns {@code true} if the string is empty or contains only
3878      * {@linkplain Character#isWhitespace(int) white space} codepoints,
3879      * otherwise {@code false}.
3880      *
3881      * @return {@code true} if the string is empty or contains only
3882      *         {@linkplain Character#isWhitespace(int) white space} codepoints,
3883      *         otherwise {@code false}
3884      *
3885      * @see Character#isWhitespace(int)
3886      *
3887      * @since 11
3888      */
3889     public boolean isBlank() {
3890         return indexOfNonWhitespace() == length();
3891     }
3892 
3893     /**
3894      * Returns a stream of lines extracted from this string,
3895      * separated by line terminators.
3896      * <p>
3897      * A <i>line terminator</i> is one of the following:
3898      * a line feed character {@code "\n"} (U+000A),
3899      * a carriage return character {@code "\r"} (U+000D),
3900      * or a carriage return followed immediately by a line feed
3901      * {@code "\r\n"} (U+000D U+000A).
3902      * <p>
3903      * A <i>line</i> is either a sequence of zero or more characters
3904      * followed by a line terminator, or it is a sequence of one or
3905      * more characters followed by the end of the string. A
3906      * line does not include the line terminator.
3907      * <p>
3908      * The stream returned by this method contains the lines from
3909      * this string in the order in which they occur.
3910      *
3911      * @apiNote This definition of <i>line</i> implies that an empty
3912      *          string has zero lines and that there is no empty line
3913      *          following a line terminator at the end of a string.
3914      *
3915      * @implNote This method provides better performance than
3916      *           split("\R") by supplying elements lazily and
3917      *           by faster search of new line terminators.
3918      *
3919      * @return  the stream of lines extracted from this string
3920      *
3921      * @since 11
3922      */
3923     public Stream<String> lines() {
3924         return isLatin1() ? StringLatin1.lines(value) : StringUTF16.lines(value);
3925     }
3926 
3927     /**
3928      * Adjusts the indentation of each line of this string based on the value of
3929      * {@code n}, and normalizes line termination characters.
3930      * <p>
3931      * This string is conceptually separated into lines using
3932      * {@link String#lines()}. Each line is then adjusted as described below
3933      * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting
3934      * lines are then concatenated and returned.
3935      * <p>
3936      * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the
3937      * beginning of each line.
3938      * <p>
3939      * If {@code n < 0} then up to {@code n}
3940      * {@linkplain Character#isWhitespace(int) white space characters} are removed
3941      * from the beginning of each line. If a given line does not contain
3942      * sufficient white space then all leading
3943      * {@linkplain Character#isWhitespace(int) white space characters} are removed.
3944      * Each white space character is treated as a single character. In
3945      * particular, the tab character {@code "\t"} (U+0009) is considered a
3946      * single character; it is not expanded.
3947      * <p>
3948      * If {@code n == 0} then the line remains unchanged. However, line
3949      * terminators are still normalized.
3950      *
3951      * @param n  number of leading
3952      *           {@linkplain Character#isWhitespace(int) white space characters}
3953      *           to add or remove
3954      *
3955      * @return string with indentation adjusted and line endings normalized
3956      *
3957      * @see String#lines()
3958      * @see String#isBlank()
3959      * @see Character#isWhitespace(int)
3960      *
3961      * @since 12
3962      */
3963     public String indent(int n) {
3964         if (isEmpty()) {
3965             return "";
3966         }
3967         Stream<String> stream = lines();
3968         if (n > 0) {
3969             final String spaces = " ".repeat(n);
3970             stream = stream.map(s -> spaces + s);
3971         } else if (n == Integer.MIN_VALUE) {
3972             stream = stream.map(s -> s.stripLeading());
3973         } else if (n < 0) {
3974             stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace())));
3975         }
3976         return stream.collect(Collectors.joining("\n", "", "\n"));
3977     }
3978 
3979     private int indexOfNonWhitespace() {
3980         return isLatin1() ? StringLatin1.indexOfNonWhitespace(value)
3981                           : StringUTF16.indexOfNonWhitespace(value);
3982     }
3983 
3984     private int lastIndexOfNonWhitespace() {
3985         return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value)
3986                           : StringUTF16.lastIndexOfNonWhitespace(value);
3987     }
3988 
3989     /**
3990      * Returns a string whose value is this string, with incidental
3991      * {@linkplain Character#isWhitespace(int) white space} removed from
3992      * the beginning and end of every line.
3993      * <p>
3994      * Incidental {@linkplain Character#isWhitespace(int) white space}
3995      * is often present in a text block to align the content with the opening
3996      * delimiter. For example, in the following code, dots represent incidental
3997      * {@linkplain Character#isWhitespace(int) white space}:
3998      * <blockquote><pre>
3999      * String html = """
4000      * ..............&lt;html&gt;
4001      * ..............    &lt;body&gt;
4002      * ..............        &lt;p&gt;Hello, world&lt;/p&gt;
4003      * ..............    &lt;/body&gt;
4004      * ..............&lt;/html&gt;
4005      * ..............""";
4006      * </pre></blockquote>
4007      * This method treats the incidental
4008      * {@linkplain Character#isWhitespace(int) white space} as indentation to be
4009      * stripped, producing a string that preserves the relative indentation of
4010      * the content. Using | to visualize the start of each line of the string:
4011      * <blockquote><pre>
4012      * |&lt;html&gt;
4013      * |    &lt;body&gt;
4014      * |        &lt;p&gt;Hello, world&lt;/p&gt;
4015      * |    &lt;/body&gt;
4016      * |&lt;/html&gt;
4017      * </pre></blockquote>
4018      * First, the individual lines of this string are extracted. A <i>line</i>
4019      * is a sequence of zero or more characters followed by either a line
4020      * terminator or the end of the string.
4021      * If the string has at least one line terminator, the last line consists
4022      * of the characters between the last terminator and the end of the string.
4023      * Otherwise, if the string has no terminators, the last line is the start
4024      * of the string to the end of the string, in other words, the entire
4025      * string.
4026      * A line does not include the line terminator.
4027      * <p>
4028      * Then, the <i>minimum indentation</i> (min) is determined as follows:
4029      * <ul>
4030      *   <li><p>For each non-blank line (as defined by {@link String#isBlank()}),
4031      *   the leading {@linkplain Character#isWhitespace(int) white space}
4032      *   characters are counted.</p>
4033      *   </li>
4034      *   <li><p>The leading {@linkplain Character#isWhitespace(int) white space}
4035      *   characters on the last line are also counted even if
4036      *   {@linkplain String#isBlank() blank}.</p>
4037      *   </li>
4038      * </ul>
4039      * <p>The <i>min</i> value is the smallest of these counts.
4040      * <p>
4041      * For each {@linkplain String#isBlank() non-blank} line, <i>min</i> leading
4042      * {@linkplain Character#isWhitespace(int) white space} characters are
4043      * removed, and any trailing {@linkplain Character#isWhitespace(int) white
4044      * space} characters are removed. {@linkplain String#isBlank() Blank} lines
4045      * are replaced with the empty string.
4046      *
4047      * <p>
4048      * Finally, the lines are joined into a new string, using the LF character
4049      * {@code "\n"} (U+000A) to separate lines.
4050      *
4051      * @apiNote
4052      * This method's primary purpose is to shift a block of lines as far as
4053      * possible to the left, while preserving relative indentation. Lines
4054      * that were indented the least will thus have no leading
4055      * {@linkplain Character#isWhitespace(int) white space}.
4056      * The result will have the same number of line terminators as this string.
4057      * If this string ends with a line terminator then the result will end
4058      * with a line terminator.
4059      *
4060      * @implSpec
4061      * This method treats all {@linkplain Character#isWhitespace(int) white space}
4062      * characters as having equal width. As long as the indentation on every
4063      * line is consistently composed of the same character sequences, then the
4064      * result will be as described above.
4065      *
4066      * @return string with incidental indentation removed and line
4067      *         terminators normalized
4068      *
4069      * @see String#lines()
4070      * @see String#isBlank()
4071      * @see String#indent(int)
4072      * @see Character#isWhitespace(int)
4073      *
4074      * @since 15
4075      *
4076      */
4077     public String stripIndent() {
4078         int length = length();
4079         if (length == 0) {
4080             return "";
4081         }
4082         char lastChar = charAt(length - 1);
4083         boolean optOut = lastChar == '\n' || lastChar == '\r';
4084         List<String> lines = lines().toList();
4085         final int outdent = optOut ? 0 : outdent(lines);
4086         return lines.stream()
4087             .map(line -> {
4088                 int firstNonWhitespace = line.indexOfNonWhitespace();
4089                 int lastNonWhitespace = line.lastIndexOfNonWhitespace();
4090                 int incidentalWhitespace = Math.min(outdent, firstNonWhitespace);
4091                 return firstNonWhitespace > lastNonWhitespace
4092                     ? "" : line.substring(incidentalWhitespace, lastNonWhitespace);
4093             })
4094             .collect(Collectors.joining("\n", "", optOut ? "\n" : ""));
4095     }
4096 
4097     private static int outdent(List<String> lines) {
4098         // Note: outdent is guaranteed to be zero or positive number.
4099         // If there isn't a non-blank line then the last must be blank
4100         int outdent = Integer.MAX_VALUE;
4101         for (String line : lines) {
4102             int leadingWhitespace = line.indexOfNonWhitespace();
4103             if (leadingWhitespace != line.length()) {
4104                 outdent = Integer.min(outdent, leadingWhitespace);
4105             }
4106         }
4107         String lastLine = lines.get(lines.size() - 1);
4108         if (lastLine.isBlank()) {
4109             outdent = Integer.min(outdent, lastLine.length());
4110         }
4111         return outdent;
4112     }
4113 
4114     /**
4115      * Returns a string whose value is this string, with escape sequences
4116      * translated as if in a string literal.
4117      * <p>
4118      * Escape sequences are translated as follows;
4119      * <table class="striped">
4120      *   <caption style="display:none">Translation</caption>
4121      *   <thead>
4122      *   <tr>
4123      *     <th scope="col">Escape</th>
4124      *     <th scope="col">Name</th>
4125      *     <th scope="col">Translation</th>
4126      *   </tr>
4127      *   </thead>
4128      *   <tbody>
4129      *   <tr>
4130      *     <th scope="row">{@code \u005Cb}</th>
4131      *     <td>backspace</td>
4132      *     <td>{@code U+0008}</td>
4133      *   </tr>
4134      *   <tr>
4135      *     <th scope="row">{@code \u005Ct}</th>
4136      *     <td>horizontal tab</td>
4137      *     <td>{@code U+0009}</td>
4138      *   </tr>
4139      *   <tr>
4140      *     <th scope="row">{@code \u005Cn}</th>
4141      *     <td>line feed</td>
4142      *     <td>{@code U+000A}</td>
4143      *   </tr>
4144      *   <tr>
4145      *     <th scope="row">{@code \u005Cf}</th>
4146      *     <td>form feed</td>
4147      *     <td>{@code U+000C}</td>
4148      *   </tr>
4149      *   <tr>
4150      *     <th scope="row">{@code \u005Cr}</th>
4151      *     <td>carriage return</td>
4152      *     <td>{@code U+000D}</td>
4153      *   </tr>
4154      *   <tr>
4155      *     <th scope="row">{@code \u005Cs}</th>
4156      *     <td>space</td>
4157      *     <td>{@code U+0020}</td>
4158      *   </tr>
4159      *   <tr>
4160      *     <th scope="row">{@code \u005C"}</th>
4161      *     <td>double quote</td>
4162      *     <td>{@code U+0022}</td>
4163      *   </tr>
4164      *   <tr>
4165      *     <th scope="row">{@code \u005C'}</th>
4166      *     <td>single quote</td>
4167      *     <td>{@code U+0027}</td>
4168      *   </tr>
4169      *   <tr>
4170      *     <th scope="row">{@code \u005C\u005C}</th>
4171      *     <td>backslash</td>
4172      *     <td>{@code U+005C}</td>
4173      *   </tr>
4174      *   <tr>
4175      *     <th scope="row">{@code \u005C0 - \u005C377}</th>
4176      *     <td>octal escape</td>
4177      *     <td>code point equivalents</td>
4178      *   </tr>
4179      *   <tr>
4180      *     <th scope="row">{@code \u005C<line-terminator>}</th>
4181      *     <td>continuation</td>
4182      *     <td>discard</td>
4183      *   </tr>
4184      *   </tbody>
4185      * </table>
4186      *
4187      * @implNote
4188      * This method does <em>not</em> translate Unicode escapes such as "{@code \u005cu2022}".
4189      * Unicode escapes are translated by the Java compiler when reading input characters and
4190      * are not part of the string literal specification.
4191      *
4192      * @throws IllegalArgumentException when an escape sequence is malformed.
4193      *
4194      * @return String with escape sequences translated.
4195      *
4196      * @jls 3.10.7 Escape Sequences
4197      *
4198      * @since 15
4199      */
4200     public String translateEscapes() {
4201         if (isEmpty()) {
4202             return "";
4203         }
4204         char[] chars = toCharArray();
4205         int length = chars.length;
4206         int from = 0;
4207         int to = 0;
4208         while (from < length) {
4209             char ch = chars[from++];
4210             if (ch == '\\') {
4211                 ch = from < length ? chars[from++] : '\0';
4212                 switch (ch) {
4213                 case 'b':
4214                     ch = '\b';
4215                     break;
4216                 case 'f':
4217                     ch = '\f';
4218                     break;
4219                 case 'n':
4220                     ch = '\n';
4221                     break;
4222                 case 'r':
4223                     ch = '\r';
4224                     break;
4225                 case 's':
4226                     ch = ' ';
4227                     break;
4228                 case 't':
4229                     ch = '\t';
4230                     break;
4231                 case '\'':
4232                 case '\"':
4233                 case '\\':
4234                     // as is
4235                     break;
4236                 case '0': case '1': case '2': case '3':
4237                 case '4': case '5': case '6': case '7':
4238                     int limit = Integer.min(from + (ch <= '3' ? 2 : 1), length);
4239                     int code = ch - '0';
4240                     while (from < limit) {
4241                         ch = chars[from];
4242                         if (ch < '0' || '7' < ch) {
4243                             break;
4244                         }
4245                         from++;
4246                         code = (code << 3) | (ch - '0');
4247                     }
4248                     ch = (char)code;
4249                     break;
4250                 case '\n':
4251                     continue;
4252                 case '\r':
4253                     if (from < length && chars[from] == '\n') {
4254                         from++;
4255                     }
4256                     continue;
4257                 default: {
4258                     String msg = String.format(
4259                         "Invalid escape sequence: \\%c \\\\u%04X",
4260                         ch, (int)ch);
4261                     throw new IllegalArgumentException(msg);
4262                 }
4263                 }
4264             }
4265 
4266             chars[to++] = ch;
4267         }
4268 
4269         return new String(chars, 0, to);
4270     }
4271 
4272     /**
4273      * This method allows the application of a function to {@code this}
4274      * string. The function should expect a single String argument
4275      * and produce an {@code R} result.
4276      * <p>
4277      * Any exception thrown by {@code f.apply()} will be propagated to the
4278      * caller.
4279      *
4280      * @param f    a function to apply
4281      *
4282      * @param <R>  the type of the result
4283      *
4284      * @return     the result of applying the function to this string
4285      *
4286      * @see java.util.function.Function
4287      *
4288      * @since 12
4289      */
4290     public <R> R transform(Function<? super String, ? extends R> f) {
4291         return f.apply(this);
4292     }
4293 
4294     /**
4295      * This object (which is already a string!) is itself returned.
4296      *
4297      * @return  the string itself.
4298      */
4299     public String toString() {
4300         return this;
4301     }
4302 
4303     /**
4304      * Returns a stream of {@code int} zero-extending the {@code char} values
4305      * from this sequence.  Any char which maps to a {@linkplain
4306      * Character##unicode surrogate code point} is passed through
4307      * uninterpreted.
4308      *
4309      * @return an IntStream of char values from this sequence
4310      * @since 9
4311      */
4312     @Override
4313     public IntStream chars() {
4314         return StreamSupport.intStream(
4315             isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE)
4316                        : new StringUTF16.CharsSpliterator(value, Spliterator.IMMUTABLE),
4317             false);
4318     }
4319 
4320 
4321     /**
4322      * Returns a stream of code point values from this sequence.  Any surrogate
4323      * pairs encountered in the sequence are combined as if by {@linkplain
4324      * Character#toCodePoint Character.toCodePoint} and the result is passed
4325      * to the stream. Any other code units, including ordinary BMP characters,
4326      * unpaired surrogates, and undefined code units, are zero-extended to
4327      * {@code int} values which are then passed to the stream.
4328      *
4329      * @return an IntStream of Unicode code points from this sequence
4330      * @since 9
4331      */
4332     @Override
4333     public IntStream codePoints() {
4334         return StreamSupport.intStream(
4335             isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE)
4336                        : new StringUTF16.CodePointsSpliterator(value, Spliterator.IMMUTABLE),
4337             false);
4338     }
4339 
4340     /**
4341      * Converts this string to a new character array.
4342      *
4343      * @return  a newly allocated character array whose length is the length
4344      *          of this string and whose contents are initialized to contain
4345      *          the character sequence represented by this string.
4346      */
4347     public char[] toCharArray() {
4348         return isLatin1() ? StringLatin1.toChars(value)
4349                           : StringUTF16.toChars(value);
4350     }
4351 
4352     /**
4353      * Returns a formatted string using the specified format string and
4354      * arguments.
4355      *
4356      * <p> The locale always used is the one returned by {@link
4357      * java.util.Locale#getDefault(java.util.Locale.Category)
4358      * Locale.getDefault(Locale.Category)} with
4359      * {@link java.util.Locale.Category#FORMAT FORMAT} category specified.
4360      *
4361      * @param  format
4362      *         A <a href="../util/Formatter.html#syntax">format string</a>
4363      *
4364      * @param  args
4365      *         Arguments referenced by the format specifiers in the format
4366      *         string.  If there are more arguments than format specifiers, the
4367      *         extra arguments are ignored.  The number of arguments is
4368      *         variable and may be zero.  The maximum number of arguments is
4369      *         limited by the maximum dimension of a Java array as defined by
4370      *         <cite>The Java Virtual Machine Specification</cite>.
4371      *         The behaviour on a
4372      *         {@code null} argument depends on the <a
4373      *         href="../util/Formatter.html#syntax">conversion</a>.
4374      *
4375      * @throws  java.util.IllegalFormatException
4376      *          If a format string contains an illegal syntax, a format
4377      *          specifier that is incompatible with the given arguments,
4378      *          insufficient arguments given the format string, or other
4379      *          illegal conditions.  For specification of all possible
4380      *          formatting errors, see the <a
4381      *          href="../util/Formatter.html#detail">Details</a> section of the
4382      *          formatter class specification.
4383      *
4384      * @return  A formatted string
4385      *
4386      * @see  java.util.Formatter
4387      * @since  1.5
4388      */
4389     public static String format(String format, Object... args) {
4390         return new Formatter().format(format, args).toString();
4391     }
4392 
4393     /**
4394      * Returns a formatted string using the specified locale, format string,
4395      * and arguments.
4396      *
4397      * @param  l
4398      *         The {@linkplain java.util.Locale locale} to apply during
4399      *         formatting.  If {@code l} is {@code null} then no localization
4400      *         is applied.
4401      *
4402      * @param  format
4403      *         A <a href="../util/Formatter.html#syntax">format string</a>
4404      *
4405      * @param  args
4406      *         Arguments referenced by the format specifiers in the format
4407      *         string.  If there are more arguments than format specifiers, the
4408      *         extra arguments are ignored.  The number of arguments is
4409      *         variable and may be zero.  The maximum number of arguments is
4410      *         limited by the maximum dimension of a Java array as defined by
4411      *         <cite>The Java Virtual Machine Specification</cite>.
4412      *         The behaviour on a
4413      *         {@code null} argument depends on the
4414      *         <a href="../util/Formatter.html#syntax">conversion</a>.
4415      *
4416      * @throws  java.util.IllegalFormatException
4417      *          If a format string contains an illegal syntax, a format
4418      *          specifier that is incompatible with the given arguments,
4419      *          insufficient arguments given the format string, or other
4420      *          illegal conditions.  For specification of all possible
4421      *          formatting errors, see the <a
4422      *          href="../util/Formatter.html#detail">Details</a> section of the
4423      *          formatter class specification
4424      *
4425      * @return  A formatted string
4426      *
4427      * @see  java.util.Formatter
4428      * @since  1.5
4429      */
4430     public static String format(Locale l, String format, Object... args) {
4431         return new Formatter(l).format(format, args).toString();
4432     }
4433 
4434     /**
4435      * Formats using this string as the format string, and the supplied
4436      * arguments.
4437      *
4438      * @implSpec This method is equivalent to {@code String.format(this, args)}.
4439      *
4440      * @param  args
4441      *         Arguments referenced by the format specifiers in this string.
4442      *
4443      * @return  A formatted string
4444      *
4445      * @see  java.lang.String#format(String,Object...)
4446      * @see  java.util.Formatter
4447      *
4448      * @since 15
4449      *
4450      */
4451     public String formatted(Object... args) {
4452         return new Formatter().format(this, args).toString();
4453     }
4454 
4455     /**
4456      * Returns the string representation of the {@code Object} argument.
4457      *
4458      * @param   obj   an {@code Object}.
4459      * @return  if the argument is {@code null}, then a string equal to
4460      *          {@code "null"}; otherwise, the value of
4461      *          {@code obj.toString()} is returned.
4462      * @see     java.lang.Object#toString()
4463      */
4464     public static String valueOf(Object obj) {
4465         return (obj == null) ? "null" : obj.toString();
4466     }
4467 
4468     /**
4469      * Returns the string representation of the {@code char} array
4470      * argument. The contents of the character array are copied; subsequent
4471      * modification of the character array does not affect the returned
4472      * string.
4473      *
4474      * @param   data     the character array.
4475      * @return  a {@code String} that contains the characters of the
4476      *          character array.
4477      */
4478     public static String valueOf(char[] data) {
4479         return new String(data);
4480     }
4481 
4482     /**
4483      * Returns the string representation of a specific subarray of the
4484      * {@code char} array argument.
4485      * <p>
4486      * The {@code offset} argument is the index of the first
4487      * character of the subarray. The {@code count} argument
4488      * specifies the length of the subarray. The contents of the subarray
4489      * are copied; subsequent modification of the character array does not
4490      * affect the returned string.
4491      *
4492      * @param   data     the character array.
4493      * @param   offset   initial offset of the subarray.
4494      * @param   count    length of the subarray.
4495      * @return  a {@code String} that contains the characters of the
4496      *          specified subarray of the character array.
4497      * @throws    IndexOutOfBoundsException if {@code offset} is
4498      *          negative, or {@code count} is negative, or
4499      *          {@code offset+count} is larger than
4500      *          {@code data.length}.
4501      */
4502     public static String valueOf(char[] data, int offset, int count) {
4503         return new String(data, offset, count);
4504     }
4505 
4506     /**
4507      * Equivalent to {@link #valueOf(char[], int, int)}.
4508      *
4509      * @param   data     the character array.
4510      * @param   offset   initial offset of the subarray.
4511      * @param   count    length of the subarray.
4512      * @return  a {@code String} that contains the characters of the
4513      *          specified subarray of the character array.
4514      * @throws    IndexOutOfBoundsException if {@code offset} is
4515      *          negative, or {@code count} is negative, or
4516      *          {@code offset+count} is larger than
4517      *          {@code data.length}.
4518      */
4519     public static String copyValueOf(char[] data, int offset, int count) {
4520         return new String(data, offset, count);
4521     }
4522 
4523     /**
4524      * Equivalent to {@link #valueOf(char[])}.
4525      *
4526      * @param   data   the character array.
4527      * @return  a {@code String} that contains the characters of the
4528      *          character array.
4529      */
4530     public static String copyValueOf(char[] data) {
4531         return new String(data);
4532     }
4533 
4534     /**
4535      * Returns the string representation of the {@code boolean} argument.
4536      *
4537      * @param   b   a {@code boolean}.
4538      * @return  if the argument is {@code true}, a string equal to
4539      *          {@code "true"} is returned; otherwise, a string equal to
4540      *          {@code "false"} is returned.
4541      */
4542     public static String valueOf(boolean b) {
4543         return b ? "true" : "false";
4544     }
4545 
4546     /**
4547      * Returns the string representation of the {@code char}
4548      * argument.
4549      *
4550      * @param   c   a {@code char}.
4551      * @return  a string of length {@code 1} containing
4552      *          as its single character the argument {@code c}.
4553      */
4554     public static String valueOf(char c) {
4555         if (COMPACT_STRINGS && StringLatin1.canEncode(c)) {
4556             return new String(StringLatin1.toBytes(c), LATIN1);
4557         }
4558         return new String(StringUTF16.toBytes(c), UTF16);
4559     }
4560 
4561     /**
4562      * Returns the string representation of the {@code int} argument.
4563      * <p>
4564      * The representation is exactly the one returned by the
4565      * {@code Integer.toString} method of one argument.
4566      *
4567      * @param   i   an {@code int}.
4568      * @return  a string representation of the {@code int} argument.
4569      * @see     java.lang.Integer#toString(int, int)
4570      */
4571     public static String valueOf(int i) {
4572         return Integer.toString(i);
4573     }
4574 
4575     /**
4576      * Returns the string representation of the {@code long} argument.
4577      * <p>
4578      * The representation is exactly the one returned by the
4579      * {@code Long.toString} method of one argument.
4580      *
4581      * @param   l   a {@code long}.
4582      * @return  a string representation of the {@code long} argument.
4583      * @see     java.lang.Long#toString(long)
4584      */
4585     public static String valueOf(long l) {
4586         return Long.toString(l);
4587     }
4588 
4589     /**
4590      * Returns the string representation of the {@code float} argument.
4591      * <p>
4592      * The representation is exactly the one returned by the
4593      * {@code Float.toString} method of one argument.
4594      *
4595      * @param   f   a {@code float}.
4596      * @return  a string representation of the {@code float} argument.
4597      * @see     java.lang.Float#toString(float)
4598      */
4599     public static String valueOf(float f) {
4600         return Float.toString(f);
4601     }
4602 
4603     /**
4604      * Returns the string representation of the {@code double} argument.
4605      * <p>
4606      * The representation is exactly the one returned by the
4607      * {@code Double.toString} method of one argument.
4608      *
4609      * @param   d   a {@code double}.
4610      * @return  a  string representation of the {@code double} argument.
4611      * @see     java.lang.Double#toString(double)
4612      */
4613     public static String valueOf(double d) {
4614         return Double.toString(d);
4615     }
4616 
4617     /**
4618      * Returns a canonical representation for the string object.
4619      * <p>
4620      * A pool of strings, initially empty, is maintained privately by the
4621      * class {@code String}.
4622      * <p>
4623      * When the intern method is invoked, if the pool already contains a
4624      * string equal to this {@code String} object as determined by
4625      * the {@link #equals(Object)} method, then the string from the pool is
4626      * returned. Otherwise, this {@code String} object is added to the
4627      * pool and a reference to this {@code String} object is returned.
4628      * <p>
4629      * It follows that for any two strings {@code s} and {@code t},
4630      * {@code s.intern() == t.intern()} is {@code true}
4631      * if and only if {@code s.equals(t)} is {@code true}.
4632      * <p>
4633      * All literal strings and string-valued constant expressions are
4634      * interned. String literals are defined in section {@jls 3.10.5} of the
4635      * <cite>The Java Language Specification</cite>.
4636      *
4637      * @return  a string that has the same contents as this string, but is
4638      *          guaranteed to be from a pool of unique strings.
4639      */
4640     public native String intern();
4641 
4642     /**
4643      * Returns a string whose value is the concatenation of this
4644      * string repeated {@code count} times.
4645      * <p>
4646      * If this string is empty or count is zero then the empty
4647      * string is returned.
4648      *
4649      * @param   count number of times to repeat
4650      *
4651      * @return  A string composed of this string repeated
4652      *          {@code count} times or the empty string if this
4653      *          string is empty or count is zero
4654      *
4655      * @throws  IllegalArgumentException if the {@code count} is
4656      *          negative.
4657      *
4658      * @since 11
4659      */
4660     public String repeat(int count) {
4661         if (count < 0) {
4662             throw new IllegalArgumentException("count is negative: " + count);
4663         }
4664         if (count == 1) {
4665             return this;
4666         }
4667         final int len = value.length;
4668         if (len == 0 || count == 0) {
4669             return "";
4670         }
4671         if (Integer.MAX_VALUE / count < len) {
4672             throw new OutOfMemoryError("Required length exceeds implementation limit");
4673         }
4674         if (len == 1) {
4675             final byte[] single = new byte[count];
4676             Arrays.fill(single, value[0]);
4677             return new String(single, coder);
4678         }
4679         final int limit = len * count;
4680         final byte[] multiple = new byte[limit];
4681         System.arraycopy(value, 0, multiple, 0, len);
4682         repeatCopyRest(multiple, 0, limit, len);
4683         return new String(multiple, coder);
4684     }
4685 
4686     /**
4687      * Used to perform copying after the initial insertion. Copying is optimized
4688      * by using power of two duplication. First pass duplicates original copy,
4689      * second pass then duplicates the original and the copy yielding four copies,
4690      * third pass duplicates four copies yielding eight copies, and so on.
4691      * Finally, the remainder is filled in with prior copies.
4692      *
4693      * @implNote The technique used here is significantly faster than hand-rolled
4694      * loops or special casing small numbers due to the intensive optimization
4695      * done by intrinsic {@code System.arraycopy}.
4696      *
4697      * @param buffer    destination buffer
4698      * @param offset    offset in the destination buffer
4699      * @param limit     total replicated including what is already in the buffer
4700      * @param copied    number of bytes that have already in the buffer
4701      */
4702     static void repeatCopyRest(byte[] buffer, int offset, int limit, int copied) {
4703         // Initial copy is in the buffer.
4704         for (; copied < limit - copied; copied <<= 1) {
4705             // Power of two duplicate.
4706             System.arraycopy(buffer, offset, buffer, offset + copied, copied);
4707         }
4708         // Duplicate remainder.
4709         System.arraycopy(buffer, offset, buffer, offset + copied, limit - copied);
4710     }
4711 
4712     ////////////////////////////////////////////////////////////////
4713 
4714     /**
4715      * Copy character bytes from this string into dst starting at dstBegin.
4716      * This method doesn't perform any range checking.
4717      *
4718      * Invoker guarantees: dst is in UTF16 (inflate itself for asb), if two
4719      * coders are different, and dst is big enough (range check)
4720      *
4721      * @param dstBegin  the char index, not offset of byte[]
4722      * @param coder     the coder of dst[]
4723      */
4724     void getBytes(byte[] dst, int dstBegin, byte coder) {
4725         if (coder() == coder) {
4726             System.arraycopy(value, 0, dst, dstBegin << coder, value.length);
4727         } else {    // this.coder == LATIN && coder == UTF16
4728             StringLatin1.inflate(value, 0, dst, dstBegin, value.length);
4729         }
4730     }
4731 
4732     /**
4733      * Copy character bytes from this string into dst starting at dstBegin.
4734      * This method doesn't perform any range checking.
4735      *
4736      * Invoker guarantees: dst is in UTF16 (inflate itself for asb), if two
4737      * coders are different, and dst is big enough (range check)
4738      *
4739      * @param srcPos    the char index, not offset of byte[]
4740      * @param dstBegin  the char index to start from
4741      * @param coder     the coder of dst[]
4742      * @param length    the amount of copied chars
4743      */
4744     void getBytes(byte[] dst, int srcPos, int dstBegin, byte coder, int length) {
4745         if (coder() == coder) {
4746             System.arraycopy(value, srcPos << coder, dst, dstBegin << coder, length << coder);
4747         } else {    // this.coder == LATIN && coder == UTF16
4748             StringLatin1.inflate(value, srcPos, dst, dstBegin, length);
4749         }
4750     }
4751 
4752     /*
4753      * Package private constructor. Trailing Void argument is there for
4754      * disambiguating it against other (public) constructors.
4755      *
4756      * Stores the char[] value into a byte[] that each byte represents
4757      * the8 low-order bits of the corresponding character, if the char[]
4758      * contains only latin1 character. Or a byte[] that stores all
4759      * characters in their byte sequences defined by the {@code StringUTF16}.
4760      */
4761     String(char[] value, int off, int len, Void sig) {
4762         if (len == 0) {
4763             this.value = "".value;
4764             this.coder = "".coder;
4765             return;
4766         }
4767         if (COMPACT_STRINGS) {
4768             byte[] val = StringUTF16.compress(value, off, len);
4769             if (val != null) {
4770                 this.value = val;
4771                 this.coder = LATIN1;
4772                 return;
4773             }
4774         }
4775         this.coder = UTF16;
4776         this.value = StringUTF16.toBytes(value, off, len);
4777     }
4778 
4779     /*
4780      * Package private constructor. Trailing Void argument is there for
4781      * disambiguating it against other (public) constructors.
4782      */
4783     String(AbstractStringBuilder asb, Void sig) {
4784         byte[] val = asb.getValue();
4785         int length = asb.length();
4786         if (asb.isLatin1()) {
4787             this.coder = LATIN1;
4788             this.value = Arrays.copyOfRange(val, 0, length);
4789         } else {
4790             // only try to compress val if some characters were deleted.
4791             if (COMPACT_STRINGS && asb.maybeLatin1) {
4792                 byte[] buf = StringUTF16.compress(val, 0, length);
4793                 if (buf != null) {
4794                     this.coder = LATIN1;
4795                     this.value = buf;
4796                     return;
4797                 }
4798             }
4799             this.coder = UTF16;
4800             this.value = Arrays.copyOfRange(val, 0, length << 1);
4801         }
4802     }
4803 
4804    /*
4805     * Package private constructor which shares value array for speed.
4806     */
4807     String(byte[] value, byte coder) {
4808         this.value = value;
4809         this.coder = coder;
4810     }
4811 
4812     byte coder() {
4813         return COMPACT_STRINGS ? coder : UTF16;
4814     }
4815 
4816     byte[] value() {
4817         return value;
4818     }
4819 
4820     boolean isLatin1() {
4821         return COMPACT_STRINGS && coder == LATIN1;
4822     }
4823 
4824     @Native static final byte LATIN1 = 0;
4825     @Native static final byte UTF16  = 1;
4826 
4827     /*
4828      * StringIndexOutOfBoundsException  if {@code index} is
4829      * negative or greater than or equal to {@code length}.
4830      */
4831     static void checkIndex(int index, int length) {
4832         Preconditions.checkIndex(index, length, Preconditions.SIOOBE_FORMATTER);
4833     }
4834 
4835     /*
4836      * StringIndexOutOfBoundsException  if {@code offset}
4837      * is negative or greater than {@code length}.
4838      */
4839     static void checkOffset(int offset, int length) {
4840         Preconditions.checkFromToIndex(offset, length, length, Preconditions.SIOOBE_FORMATTER);
4841     }
4842 
4843     /*
4844      * Check {@code offset}, {@code count} against {@code 0} and {@code length}
4845      * bounds.
4846      *
4847      * @return  {@code offset} if the sub-range within bounds of the range
4848      * @throws  StringIndexOutOfBoundsException
4849      *          If {@code offset} is negative, {@code count} is negative,
4850      *          or {@code offset} is greater than {@code length - count}
4851      */
4852     static int checkBoundsOffCount(int offset, int count, int length) {
4853         return Preconditions.checkFromIndexSize(offset, count, length, Preconditions.SIOOBE_FORMATTER);
4854     }
4855 
4856     /*
4857      * Check {@code begin}, {@code end} against {@code 0} and {@code length}
4858      * bounds.
4859      *
4860      * @throws  StringIndexOutOfBoundsException
4861      *          If {@code begin} is negative, {@code begin} is greater than
4862      *          {@code end}, or {@code end} is greater than {@code length}.
4863      */
4864     static void checkBoundsBeginEnd(int begin, int end, int length) {
4865         Preconditions.checkFromToIndex(begin, end, length, Preconditions.SIOOBE_FORMATTER);
4866     }
4867 
4868     /**
4869      * Returns the string representation of the {@code codePoint}
4870      * argument.
4871      *
4872      * @param   codePoint a {@code codePoint}.
4873      * @return  a string of length {@code 1} or {@code 2} containing
4874      *          as its single character the argument {@code codePoint}.
4875      * @throws IllegalArgumentException if the specified
4876      *          {@code codePoint} is not a {@linkplain Character#isValidCodePoint
4877      *          valid Unicode code point}.
4878      */
4879     static String valueOfCodePoint(int codePoint) {
4880         if (COMPACT_STRINGS && StringLatin1.canEncode(codePoint)) {
4881             return new String(StringLatin1.toBytes((char)codePoint), LATIN1);
4882         } else if (Character.isBmpCodePoint(codePoint)) {
4883             return new String(StringUTF16.toBytes((char)codePoint), UTF16);
4884         } else if (Character.isSupplementaryCodePoint(codePoint)) {
4885             return new String(StringUTF16.toBytesSupplementary(codePoint), UTF16);
4886         }
4887 
4888         throw new IllegalArgumentException(
4889             format("Not a valid Unicode code point: 0x%X", codePoint));
4890     }
4891 
4892     /**
4893      * Returns an {@link Optional} containing the nominal descriptor for this
4894      * instance, which is the instance itself.
4895      *
4896      * @return an {@link Optional} describing the {@linkplain String} instance
4897      * @since 12
4898      */
4899     @Override
4900     public Optional<String> describeConstable() {
4901         return Optional.of(this);
4902     }
4903 
4904     /**
4905      * Resolves this instance as a {@link ConstantDesc}, the result of which is
4906      * the instance itself.
4907      *
4908      * @param lookup ignored
4909      * @return the {@linkplain String} instance
4910      * @since 12
4911      */
4912     @Override
4913     public String resolveConstantDesc(MethodHandles.Lookup lookup) {
4914         return this;
4915     }
4916 
4917 }