1 /*
   2  * Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.io.ObjectStreamField;
  29 import java.io.UnsupportedEncodingException;
  30 import java.lang.annotation.Native;
  31 import java.lang.invoke.MethodHandles;
  32 import java.lang.constant.Constable;
  33 import java.lang.constant.ConstantDesc;
  34 import java.nio.ByteBuffer;
  35 import java.nio.CharBuffer;
  36 import java.nio.charset.*;
  37 import java.util.ArrayList;
  38 import java.util.Arrays;
  39 import java.util.Comparator;
  40 import java.util.Formatter;
  41 import java.util.List;
  42 import java.util.Locale;
  43 import java.util.Objects;
  44 import java.util.Optional;
  45 import java.util.Spliterator;
  46 import java.util.function.Function;
  47 import java.util.regex.Pattern;
  48 import java.util.regex.PatternSyntaxException;
  49 import java.util.stream.Collectors;
  50 import java.util.stream.IntStream;
  51 import java.util.stream.Stream;
  52 import java.util.stream.StreamSupport;
  53 
  54 import jdk.internal.util.ArraysSupport;
  55 import jdk.internal.util.Preconditions;
  56 import jdk.internal.vm.annotation.ForceInline;
  57 import jdk.internal.vm.annotation.IntrinsicCandidate;
  58 import jdk.internal.vm.annotation.Stable;
  59 import sun.nio.cs.ArrayDecoder;
  60 import sun.nio.cs.ArrayEncoder;
  61 
  62 import sun.nio.cs.ISO_8859_1;
  63 import sun.nio.cs.US_ASCII;
  64 import sun.nio.cs.UTF_8;
  65 
  66 /**
  67  * The {@code String} class represents character strings. All
  68  * string literals in Java programs, such as {@code "abc"}, are
  69  * implemented as instances of this class.
  70  * <p>
  71  * Strings are constant; their values cannot be changed after they
  72  * are created. String buffers support mutable strings.
  73  * Because String objects are immutable they can be shared. For example:
  74  * <blockquote><pre>
  75  *     String str = "abc";
  76  * </pre></blockquote><p>
  77  * is equivalent to:
  78  * <blockquote><pre>
  79  *     char data[] = {'a', 'b', 'c'};
  80  *     String str = new String(data);
  81  * </pre></blockquote><p>
  82  * Here are some more examples of how strings can be used:
  83  * <blockquote><pre>
  84  *     System.out.println("abc");
  85  *     String cde = "cde";
  86  *     System.out.println("abc" + cde);
  87  *     String c = "abc".substring(2, 3);
  88  *     String d = cde.substring(1, 2);
  89  * </pre></blockquote>
  90  * <p>
  91  * The class {@code String} includes methods for examining
  92  * individual characters of the sequence, for comparing strings, for
  93  * searching strings, for extracting substrings, and for creating a
  94  * copy of a string with all characters translated to uppercase or to
  95  * lowercase. Case mapping is based on the Unicode Standard version
  96  * specified by the {@link java.lang.Character Character} class.
  97  * <p>
  98  * The Java language provides special support for the string
  99  * concatenation operator (&nbsp;+&nbsp;), and for conversion of
 100  * other objects to strings. For additional information on string
 101  * concatenation and conversion, see <i>The Java Language Specification</i>.
 102  *
 103  * <p> Unless otherwise noted, passing a {@code null} argument to a constructor
 104  * or method in this class will cause a {@link NullPointerException} to be
 105  * thrown.
 106  *
 107  * <p>A {@code String} represents a string in the UTF-16 format
 108  * in which <em>supplementary characters</em> are represented by <em>surrogate
 109  * pairs</em> (see the section <a href="Character.html#unicode">Unicode
 110  * Character Representations</a> in the {@code Character} class for
 111  * more information).
 112  * Index values refer to {@code char} code units, so a supplementary
 113  * character uses two positions in a {@code String}.
 114  * <p>The {@code String} class provides methods for dealing with
 115  * Unicode code points (i.e., characters), in addition to those for
 116  * dealing with Unicode code units (i.e., {@code char} values).
 117  *
 118  * <p>Unless otherwise noted, methods for comparing Strings do not take locale
 119  * into account.  The {@link java.text.Collator} class provides methods for
 120  * finer-grain, locale-sensitive String comparison.
 121  *
 122  * @implNote The implementation of the string concatenation operator is left to
 123  * the discretion of a Java compiler, as long as the compiler ultimately conforms
 124  * to <i>The Java Language Specification</i>. For example, the {@code javac} compiler
 125  * may implement the operator with {@code StringBuffer}, {@code StringBuilder},
 126  * or {@code java.lang.invoke.StringConcatFactory} depending on the JDK version. The
 127  * implementation of string conversion is typically through the method {@code toString},
 128  * defined by {@code Object} and inherited by all classes in Java.
 129  *
 130  * @author  Lee Boynton
 131  * @author  Arthur van Hoff
 132  * @author  Martin Buchholz
 133  * @author  Ulf Zibis
 134  * @see     java.lang.Object#toString()
 135  * @see     java.lang.StringBuffer
 136  * @see     java.lang.StringBuilder
 137  * @see     java.nio.charset.Charset
 138  * @since   1.0
 139  * @jls     15.18.1 String Concatenation Operator +
 140  */
 141 
 142 public final class String
 143     implements java.io.Serializable, Comparable<String>, CharSequence,
 144                Constable, ConstantDesc {
 145 
 146     /**
 147      * The value is used for character storage.
 148      *
 149      * @implNote This field is trusted by the VM, and is a subject to
 150      * constant folding if String instance is constant. Overwriting this
 151      * field after construction will cause problems.
 152      *
 153      * Additionally, it is marked with {@link Stable} to trust the contents
 154      * of the array. No other facility in JDK provides this functionality (yet).
 155      * {@link Stable} is safe here, because value is never null.
 156      */
 157     @Stable
 158     private final byte[] value;
 159 
 160     /**
 161      * The identifier of the encoding used to encode the bytes in
 162      * {@code value}. The supported values in this implementation are
 163      *
 164      * LATIN1
 165      * UTF16
 166      *
 167      * @implNote This field is trusted by the VM, and is a subject to
 168      * constant folding if String instance is constant. Overwriting this
 169      * field after construction will cause problems.
 170      */
 171     private final byte coder;
 172 
 173     /** Cache the hash code for the string */
 174     private int hash; // Default to 0
 175 
 176     /**
 177      * Cache if the hash has been calculated as actually being zero, enabling
 178      * us to avoid recalculating this.
 179      */
 180     private boolean hashIsZero; // Default to false;
 181 
 182     /** use serialVersionUID from JDK 1.0.2 for interoperability */
 183     @java.io.Serial
 184     private static final long serialVersionUID = -6849794470754667710L;
 185 
 186     /**
 187      * If String compaction is disabled, the bytes in {@code value} are
 188      * always encoded in UTF16.
 189      *
 190      * For methods with several possible implementation paths, when String
 191      * compaction is disabled, only one code path is taken.
 192      *
 193      * The instance field value is generally opaque to optimizing JIT
 194      * compilers. Therefore, in performance-sensitive place, an explicit
 195      * check of the static boolean {@code COMPACT_STRINGS} is done first
 196      * before checking the {@code coder} field since the static boolean
 197      * {@code COMPACT_STRINGS} would be constant folded away by an
 198      * optimizing JIT compiler. The idioms for these cases are as follows.
 199      *
 200      * For code such as:
 201      *
 202      *    if (coder == LATIN1) { ... }
 203      *
 204      * can be written more optimally as
 205      *
 206      *    if (coder() == LATIN1) { ... }
 207      *
 208      * or:
 209      *
 210      *    if (COMPACT_STRINGS && coder == LATIN1) { ... }
 211      *
 212      * An optimizing JIT compiler can fold the above conditional as:
 213      *
 214      *    COMPACT_STRINGS == true  => if (coder == LATIN1) { ... }
 215      *    COMPACT_STRINGS == false => if (false)           { ... }
 216      *
 217      * @implNote
 218      * The actual value for this field is injected by JVM. The static
 219      * initialization block is used to set the value here to communicate
 220      * that this static final field is not statically foldable, and to
 221      * avoid any possible circular dependency during vm initialization.
 222      */
 223     static final boolean COMPACT_STRINGS;
 224 
 225     static {
 226         COMPACT_STRINGS = true;
 227     }
 228 
 229     /**
 230      * Class String is special cased within the Serialization Stream Protocol.
 231      *
 232      * A String instance is written into an ObjectOutputStream according to
 233      * <a href="{@docRoot}/../specs/serialization/protocol.html#stream-elements">
 234      * <cite>Java Object Serialization Specification</cite>, Section 6.2, "Stream Elements"</a>
 235      */
 236     @java.io.Serial
 237     private static final ObjectStreamField[] serialPersistentFields =
 238         new ObjectStreamField[0];
 239 
 240     /**
 241      * Initializes a newly created {@code String} object so that it represents
 242      * an empty character sequence.  Note that use of this constructor is
 243      * unnecessary since Strings are immutable.
 244      */
 245     public String() {
 246         this.value = "".value;
 247         this.coder = "".coder;
 248     }
 249 
 250     /**
 251      * Initializes a newly created {@code String} object so that it represents
 252      * the same sequence of characters as the argument; in other words, the
 253      * newly created string is a copy of the argument string. Unless an
 254      * explicit copy of {@code original} is needed, use of this constructor is
 255      * unnecessary since Strings are immutable.
 256      *
 257      * @param  original
 258      *         A {@code String}
 259      */
 260     @IntrinsicCandidate
 261     public String(String original) {
 262         this.value = original.value;
 263         this.coder = original.coder;
 264         this.hash = original.hash;
 265         this.hashIsZero = original.hashIsZero;
 266     }
 267 
 268     /**
 269      * Allocates a new {@code String} so that it represents the sequence of
 270      * characters currently contained in the character array argument. The
 271      * contents of the character array are copied; subsequent modification of
 272      * the character array does not affect the newly created string.
 273      *
 274      * @param  value
 275      *         The initial value of the string
 276      */
 277     public String(char[] value) {
 278         this(value, 0, value.length, null);
 279     }
 280 
 281     /**
 282      * Allocates a new {@code String} that contains characters from a subarray
 283      * of the character array argument. The {@code offset} argument is the
 284      * index of the first character of the subarray and the {@code count}
 285      * argument specifies the length of the subarray. The contents of the
 286      * subarray are copied; subsequent modification of the character array does
 287      * not affect the newly created string.
 288      *
 289      * @param  value
 290      *         Array that is the source of characters
 291      *
 292      * @param  offset
 293      *         The initial offset
 294      *
 295      * @param  count
 296      *         The length
 297      *
 298      * @throws  IndexOutOfBoundsException
 299      *          If {@code offset} is negative, {@code count} is negative, or
 300      *          {@code offset} is greater than {@code value.length - count}
 301      */
 302     public String(char[] value, int offset, int count) {
 303         this(value, offset, count, rangeCheck(value, offset, count));
 304     }
 305 
 306     private static Void rangeCheck(char[] value, int offset, int count) {
 307         checkBoundsOffCount(offset, count, value.length);
 308         return null;
 309     }
 310 
 311     /**
 312      * Allocates a new {@code String} that contains characters from a subarray
 313      * of the <a href="Character.html#unicode">Unicode code point</a> array
 314      * argument.  The {@code offset} argument is the index of the first code
 315      * point of the subarray and the {@code count} argument specifies the
 316      * length of the subarray.  The contents of the subarray are converted to
 317      * {@code char}s; subsequent modification of the {@code int} array does not
 318      * affect the newly created string.
 319      *
 320      * @param  codePoints
 321      *         Array that is the source of Unicode code points
 322      *
 323      * @param  offset
 324      *         The initial offset
 325      *
 326      * @param  count
 327      *         The length
 328      *
 329      * @throws  IllegalArgumentException
 330      *          If any invalid Unicode code point is found in {@code
 331      *          codePoints}
 332      *
 333      * @throws  IndexOutOfBoundsException
 334      *          If {@code offset} is negative, {@code count} is negative, or
 335      *          {@code offset} is greater than {@code codePoints.length - count}
 336      *
 337      * @since  1.5
 338      */
 339     public String(int[] codePoints, int offset, int count) {
 340         checkBoundsOffCount(offset, count, codePoints.length);
 341         if (count == 0) {
 342             this.value = "".value;
 343             this.coder = "".coder;
 344             return;
 345         }
 346         if (COMPACT_STRINGS) {
 347             byte[] val = StringLatin1.toBytes(codePoints, offset, count);
 348             if (val != null) {
 349                 this.coder = LATIN1;
 350                 this.value = val;
 351                 return;
 352             }
 353         }
 354         this.coder = UTF16;
 355         this.value = StringUTF16.toBytes(codePoints, offset, count);
 356     }
 357 
 358     /**
 359      * Allocates a new {@code String} constructed from a subarray of an array
 360      * of 8-bit integer values.
 361      *
 362      * <p> The {@code offset} argument is the index of the first byte of the
 363      * subarray, and the {@code count} argument specifies the length of the
 364      * subarray.
 365      *
 366      * <p> Each {@code byte} in the subarray is converted to a {@code char} as
 367      * specified in the {@link #String(byte[],int) String(byte[],int)} constructor.
 368      *
 369      * @deprecated This method does not properly convert bytes into characters.
 370      * As of JDK&nbsp;1.1, the preferred way to do this is via the
 371      * {@code String} constructors that take a {@link Charset}, charset name,
 372      * or that use the {@link Charset#defaultCharset() default charset}.
 373      *
 374      * @param  ascii
 375      *         The bytes to be converted to characters
 376      *
 377      * @param  hibyte
 378      *         The top 8 bits of each 16-bit Unicode code unit
 379      *
 380      * @param  offset
 381      *         The initial offset
 382      * @param  count
 383      *         The length
 384      *
 385      * @throws  IndexOutOfBoundsException
 386      *          If {@code offset} is negative, {@code count} is negative, or
 387      *          {@code offset} is greater than {@code ascii.length - count}
 388      *
 389      * @see  #String(byte[], int)
 390      * @see  #String(byte[], int, int, java.lang.String)
 391      * @see  #String(byte[], int, int, java.nio.charset.Charset)
 392      * @see  #String(byte[], int, int)
 393      * @see  #String(byte[], java.lang.String)
 394      * @see  #String(byte[], java.nio.charset.Charset)
 395      * @see  #String(byte[])
 396      */
 397     @Deprecated(since="1.1")
 398     public String(byte[] ascii, int hibyte, int offset, int count) {
 399         checkBoundsOffCount(offset, count, ascii.length);
 400         if (count == 0) {
 401             this.value = "".value;
 402             this.coder = "".coder;
 403             return;
 404         }
 405         if (COMPACT_STRINGS && (byte)hibyte == 0) {
 406             this.value = Arrays.copyOfRange(ascii, offset, offset + count);
 407             this.coder = LATIN1;
 408         } else {
 409             hibyte <<= 8;
 410             byte[] val = StringUTF16.newBytesFor(count);
 411             for (int i = 0; i < count; i++) {
 412                 StringUTF16.putChar(val, i, hibyte | (ascii[offset++] & 0xff));
 413             }
 414             this.value = val;
 415             this.coder = UTF16;
 416         }
 417     }
 418 
 419     /**
 420      * Allocates a new {@code String} containing characters constructed from
 421      * an array of 8-bit integer values. Each character <i>c</i> in the
 422      * resulting string is constructed from the corresponding component
 423      * <i>b</i> in the byte array such that:
 424      *
 425      * <blockquote><pre>
 426      *     <b><i>c</i></b> == (char)(((hibyte &amp; 0xff) &lt;&lt; 8)
 427      *                         | (<b><i>b</i></b> &amp; 0xff))
 428      * </pre></blockquote>
 429      *
 430      * @deprecated  This method does not properly convert bytes into
 431      * characters.  As of JDK&nbsp;1.1, the preferred way to do this is via the
 432      * {@code String} constructors that take a {@link Charset}, charset name,
 433      * or that use the {@link Charset#defaultCharset() default charset}.
 434      *
 435      * @param  ascii
 436      *         The bytes to be converted to characters
 437      *
 438      * @param  hibyte
 439      *         The top 8 bits of each 16-bit Unicode code unit
 440      *
 441      * @see  #String(byte[], int, int, java.lang.String)
 442      * @see  #String(byte[], int, int, java.nio.charset.Charset)
 443      * @see  #String(byte[], int, int)
 444      * @see  #String(byte[], java.lang.String)
 445      * @see  #String(byte[], java.nio.charset.Charset)
 446      * @see  #String(byte[])
 447      */
 448     @Deprecated(since="1.1")
 449     public String(byte[] ascii, int hibyte) {
 450         this(ascii, hibyte, 0, ascii.length);
 451     }
 452 
 453     /**
 454      * Constructs a new {@code String} by decoding the specified subarray of
 455      * bytes using the specified charset.  The length of the new {@code String}
 456      * is a function of the charset, and hence may not be equal to the length
 457      * of the subarray.
 458      *
 459      * <p> The behavior of this constructor when the given bytes are not valid
 460      * in the given charset is unspecified.  The {@link
 461      * java.nio.charset.CharsetDecoder} class should be used when more control
 462      * over the decoding process is required.
 463      *
 464      * @param  bytes
 465      *         The bytes to be decoded into characters
 466      *
 467      * @param  offset
 468      *         The index of the first byte to decode
 469      *
 470      * @param  length
 471      *         The number of bytes to decode
 472      *
 473      * @param  charsetName
 474      *         The name of a supported {@linkplain java.nio.charset.Charset
 475      *         charset}
 476      *
 477      * @throws  UnsupportedEncodingException
 478      *          If the named charset is not supported
 479      *
 480      * @throws  IndexOutOfBoundsException
 481      *          If {@code offset} is negative, {@code length} is negative, or
 482      *          {@code offset} is greater than {@code bytes.length - length}
 483      *
 484      * @since  1.1
 485      */
 486     public String(byte[] bytes, int offset, int length, String charsetName)
 487             throws UnsupportedEncodingException {
 488         this(lookupCharset(charsetName), bytes, checkBoundsOffCount(offset, length, bytes.length), length);
 489     }
 490 
 491     /**
 492      * Constructs a new {@code String} by decoding the specified subarray of
 493      * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
 494      * The length of the new {@code String} is a function of the charset, and
 495      * hence may not be equal to the length of the subarray.
 496      *
 497      * <p> This method always replaces malformed-input and unmappable-character
 498      * sequences with this charset's default replacement string.  The {@link
 499      * java.nio.charset.CharsetDecoder} class should be used when more control
 500      * over the decoding process is required.
 501      *
 502      * @param  bytes
 503      *         The bytes to be decoded into characters
 504      *
 505      * @param  offset
 506      *         The index of the first byte to decode
 507      *
 508      * @param  length
 509      *         The number of bytes to decode
 510      *
 511      * @param  charset
 512      *         The {@linkplain java.nio.charset.Charset charset} to be used to
 513      *         decode the {@code bytes}
 514      *
 515      * @throws  IndexOutOfBoundsException
 516      *          If {@code offset} is negative, {@code length} is negative, or
 517      *          {@code offset} is greater than {@code bytes.length - length}
 518      *
 519      * @since  1.6
 520      */
 521     public String(byte[] bytes, int offset, int length, Charset charset) {
 522         this(Objects.requireNonNull(charset), bytes, checkBoundsOffCount(offset, length, bytes.length), length);
 523     }
 524 
 525     /**
 526      * This method does not do any precondition checks on its arguments.
 527      * <p>
 528      * Important: parameter order of this method is deliberately changed in order to
 529      * disambiguate it against other similar methods of this class.
 530      */
 531     @SuppressWarnings("removal")
 532     private String(Charset charset, byte[] bytes, int offset, int length) {
 533         if (length == 0) {
 534             this.value = "".value;
 535             this.coder = "".coder;
 536         } else if (charset == UTF_8.INSTANCE) {
 537             if (COMPACT_STRINGS) {
 538                 int dp = StringCoding.countPositives(bytes, offset, length);
 539                 if (dp == length) {
 540                     this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 541                     this.coder = LATIN1;
 542                     return;
 543                 }
 544                 int sl = offset + length;
 545                 byte[] dst = new byte[length];
 546                 if (dp > 0) {
 547                     System.arraycopy(bytes, offset, dst, 0, dp);
 548                     offset += dp;
 549                 }
 550                 while (offset < sl) {
 551                     int b1 = bytes[offset++];
 552                     if (b1 >= 0) {
 553                         dst[dp++] = (byte)b1;
 554                         continue;
 555                     }
 556                     if ((b1 & 0xfe) == 0xc2 && offset < sl) { // b1 either 0xc2 or 0xc3
 557                         int b2 = bytes[offset];
 558                         if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
 559                             dst[dp++] = (byte)decode2(b1, b2);
 560                             offset++;
 561                             continue;
 562                         }
 563                     }
 564                     // anything not a latin1, including the REPL
 565                     // we have to go with the utf16
 566                     offset--;
 567                     break;
 568                 }
 569                 if (offset == sl) {
 570                     if (dp != dst.length) {
 571                         dst = Arrays.copyOf(dst, dp);
 572                     }
 573                     this.value = dst;
 574                     this.coder = LATIN1;
 575                     return;
 576                 }
 577                 byte[] buf = new byte[length << 1];
 578                 StringLatin1.inflate(dst, 0, buf, 0, dp);
 579                 dst = buf;
 580                 dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, true);
 581                 if (dp != length) {
 582                     dst = Arrays.copyOf(dst, dp << 1);
 583                 }
 584                 this.value = dst;
 585                 this.coder = UTF16;
 586             } else { // !COMPACT_STRINGS
 587                 byte[] dst = new byte[length << 1];
 588                 int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true);
 589                 if (dp != length) {
 590                     dst = Arrays.copyOf(dst, dp << 1);
 591                 }
 592                 this.value = dst;
 593                 this.coder = UTF16;
 594             }
 595         } else if (charset == ISO_8859_1.INSTANCE) {
 596             if (COMPACT_STRINGS) {
 597                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 598                 this.coder = LATIN1;
 599             } else {
 600                 this.value = StringLatin1.inflate(bytes, offset, length);
 601                 this.coder = UTF16;
 602             }
 603         } else if (charset == US_ASCII.INSTANCE) {
 604             if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
 605                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 606                 this.coder = LATIN1;
 607             } else {
 608                 byte[] dst = new byte[length << 1];
 609                 int dp = 0;
 610                 while (dp < length) {
 611                     int b = bytes[offset++];
 612                     StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
 613                 }
 614                 this.value = dst;
 615                 this.coder = UTF16;
 616             }
 617         } else {
 618             // (1)We never cache the "external" cs, the only benefit of creating
 619             // an additional StringDe/Encoder object to wrap it is to share the
 620             // de/encode() method. These SD/E objects are short-lived, the young-gen
 621             // gc should be able to take care of them well. But the best approach
 622             // is still not to generate them if not really necessary.
 623             // (2)The defensive copy of the input byte/char[] has a big performance
 624             // impact, as well as the outgoing result byte/char[]. Need to do the
 625             // optimization check of (sm==null && classLoader0==null) for both.
 626             CharsetDecoder cd = charset.newDecoder();
 627             // ArrayDecoder fastpaths
 628             if (cd instanceof ArrayDecoder ad) {
 629                 // ascii
 630                 if (ad.isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
 631                     if (COMPACT_STRINGS) {
 632                         this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 633                         this.coder = LATIN1;
 634                         return;
 635                     }
 636                     this.value = StringLatin1.inflate(bytes, offset, length);
 637                     this.coder = UTF16;
 638                     return;
 639                 }
 640 
 641                 // fastpath for always Latin1 decodable single byte
 642                 if (COMPACT_STRINGS && ad.isLatin1Decodable()) {
 643                     byte[] dst = new byte[length];
 644                     ad.decodeToLatin1(bytes, offset, length, dst);
 645                     this.value = dst;
 646                     this.coder = LATIN1;
 647                     return;
 648                 }
 649 
 650                 int en = scale(length, cd.maxCharsPerByte());
 651                 cd.onMalformedInput(CodingErrorAction.REPLACE)
 652                         .onUnmappableCharacter(CodingErrorAction.REPLACE);
 653                 char[] ca = new char[en];
 654                 int clen = ad.decode(bytes, offset, length, ca);
 655                 if (COMPACT_STRINGS) {
 656                     byte[] bs = StringUTF16.compress(ca, 0, clen);
 657                     if (bs != null) {
 658                         value = bs;
 659                         coder = LATIN1;
 660                         return;
 661                     }
 662                 }
 663                 coder = UTF16;
 664                 value = StringUTF16.toBytes(ca, 0, clen);
 665                 return;
 666             }
 667 
 668             // decode using CharsetDecoder
 669             int en = scale(length, cd.maxCharsPerByte());
 670             cd.onMalformedInput(CodingErrorAction.REPLACE)
 671                     .onUnmappableCharacter(CodingErrorAction.REPLACE);
 672             char[] ca = new char[en];
 673             if (charset.getClass().getClassLoader0() != null &&
 674                     System.getSecurityManager() != null) {
 675                 bytes = Arrays.copyOfRange(bytes, offset, offset + length);
 676                 offset = 0;
 677             }
 678 
 679             int caLen;
 680             try {
 681                 caLen = decodeWithDecoder(cd, ca, bytes, offset, length);
 682             } catch (CharacterCodingException x) {
 683                 // Substitution is enabled, so this shouldn't happen
 684                 throw new Error(x);
 685             }
 686             if (COMPACT_STRINGS) {
 687                 byte[] bs = StringUTF16.compress(ca, 0, caLen);
 688                 if (bs != null) {
 689                     value = bs;
 690                     coder = LATIN1;
 691                     return;
 692                 }
 693             }
 694             coder = UTF16;
 695             value = StringUTF16.toBytes(ca, 0, caLen);
 696         }
 697     }
 698 
 699     /*
 700      * Throws iae, instead of replacing, if malformed or unmappable.
 701      *
 702      * @param  noShare
 703      *         {@code true} if the resulting string MUST NOT share the byte array,
 704      *         {@code false} if the byte array can be exclusively used to construct
 705      *         the string and is not modified or used for any other purpose.
 706      */
 707     static String newStringUTF8NoRepl(byte[] bytes, int offset, int length, boolean noShare) {
 708         checkBoundsOffCount(offset, length, bytes.length);
 709         if (length == 0) {
 710             return "";
 711         }
 712         int dp;
 713         byte[] dst;
 714         if (COMPACT_STRINGS) {
 715             dp = StringCoding.countPositives(bytes, offset, length);
 716             int sl = offset + length;
 717             if (dp == length) {
 718                 if (noShare || length != bytes.length) {
 719                     return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
 720                 } else {
 721                     return new String(bytes, LATIN1);
 722                 }
 723             }
 724             dst = new byte[length];
 725             System.arraycopy(bytes, offset, dst, 0, dp);
 726             offset += dp;
 727             while (offset < sl) {
 728                 int b1 = bytes[offset++];
 729                 if (b1 >= 0) {
 730                     dst[dp++] = (byte)b1;
 731                     continue;
 732                 }
 733                 if ((b1 & 0xfe) == 0xc2 && offset < sl) { // b1 either 0xc2 or 0xc3
 734                     int b2 = bytes[offset];
 735                     if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
 736                         dst[dp++] = (byte)decode2(b1, b2);
 737                         offset++;
 738                         continue;
 739                     }
 740                 }
 741                 // anything not a latin1, including the REPL
 742                 // we have to go with the utf16
 743                 offset--;
 744                 break;
 745             }
 746             if (offset == sl) {
 747                 if (dp != dst.length) {
 748                     dst = Arrays.copyOf(dst, dp);
 749                 }
 750                 return new String(dst, LATIN1);
 751             }
 752             if (dp == 0) {
 753                 dst = new byte[length << 1];
 754             } else {
 755                 byte[] buf = new byte[length << 1];
 756                 StringLatin1.inflate(dst, 0, buf, 0, dp);
 757                 dst = buf;
 758             }
 759             dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
 760         } else { // !COMPACT_STRINGS
 761             dst = new byte[length << 1];
 762             dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, false);
 763         }
 764         if (dp != length) {
 765             dst = Arrays.copyOf(dst, dp << 1);
 766         }
 767         return new String(dst, UTF16);
 768     }
 769 
 770     static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {
 771         try {
 772             return newStringNoRepl1(src, cs);
 773         } catch (IllegalArgumentException e) {
 774             //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause
 775             Throwable cause = e.getCause();
 776             if (cause instanceof MalformedInputException mie) {
 777                 throw mie;
 778             }
 779             throw (CharacterCodingException)cause;
 780         }
 781     }
 782 
 783     @SuppressWarnings("removal")
 784     private static String newStringNoRepl1(byte[] src, Charset cs) {
 785         int len = src.length;
 786         if (len == 0) {
 787             return "";
 788         }
 789         if (cs == UTF_8.INSTANCE) {
 790             return newStringUTF8NoRepl(src, 0, src.length, false);
 791         }
 792         if (cs == ISO_8859_1.INSTANCE) {
 793             if (COMPACT_STRINGS)
 794                 return new String(src, LATIN1);
 795             return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
 796         }
 797         if (cs == US_ASCII.INSTANCE) {
 798             if (!StringCoding.hasNegatives(src, 0, src.length)) {
 799                 if (COMPACT_STRINGS)
 800                     return new String(src, LATIN1);
 801                 return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
 802             } else {
 803                 throwMalformed(src);
 804             }
 805         }
 806 
 807         CharsetDecoder cd = cs.newDecoder();
 808         // ascii fastpath
 809         if (cd instanceof ArrayDecoder ad &&
 810                 ad.isASCIICompatible() &&
 811                 !StringCoding.hasNegatives(src, 0, src.length)) {
 812             if (COMPACT_STRINGS)
 813                 return new String(src, LATIN1);
 814             return new String(src, 0, src.length, ISO_8859_1.INSTANCE);
 815         }
 816         int en = scale(len, cd.maxCharsPerByte());
 817         char[] ca = new char[en];
 818         if (cs.getClass().getClassLoader0() != null &&
 819                 System.getSecurityManager() != null) {
 820             src = Arrays.copyOf(src, len);
 821         }
 822         int caLen;
 823         try {
 824             caLen = decodeWithDecoder(cd, ca, src, 0, src.length);
 825         } catch (CharacterCodingException x) {
 826             // throw via IAE
 827             throw new IllegalArgumentException(x);
 828         }
 829         if (COMPACT_STRINGS) {
 830             byte[] bs = StringUTF16.compress(ca, 0, caLen);
 831             if (bs != null) {
 832                 return new String(bs, LATIN1);
 833             }
 834         }
 835         return new String(StringUTF16.toBytes(ca, 0, caLen), UTF16);
 836     }
 837 
 838     private static final char REPL = '\ufffd';
 839 
 840     // Trim the given byte array to the given length
 841     @SuppressWarnings("removal")
 842     private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
 843         if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) {
 844             return ba;
 845         } else {
 846             return Arrays.copyOf(ba, len);
 847         }
 848     }
 849 
 850     private static int scale(int len, float expansionFactor) {
 851         // We need to perform double, not float, arithmetic; otherwise
 852         // we lose low order bits when len is larger than 2**24.
 853         return (int)(len * (double)expansionFactor);
 854     }
 855 
 856     private static Charset lookupCharset(String csn) throws UnsupportedEncodingException {
 857         Objects.requireNonNull(csn);
 858         try {
 859             return Charset.forName(csn);
 860         } catch (UnsupportedCharsetException | IllegalCharsetNameException x) {
 861             throw new UnsupportedEncodingException(csn);
 862         }
 863     }
 864 
 865     private static byte[] encode(Charset cs, byte coder, byte[] val) {
 866         if (cs == UTF_8.INSTANCE) {
 867             return encodeUTF8(coder, val, true);
 868         }
 869         if (cs == ISO_8859_1.INSTANCE) {
 870             return encode8859_1(coder, val);
 871         }
 872         if (cs == US_ASCII.INSTANCE) {
 873             return encodeASCII(coder, val);
 874         }
 875         return encodeWithEncoder(cs, coder, val, true);
 876     }
 877 
 878     private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, boolean doReplace) {
 879         CharsetEncoder ce = cs.newEncoder();
 880         int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
 881         int en = scale(len, ce.maxBytesPerChar());
 882         // fastpath with ArrayEncoder implies `doReplace`.
 883         if (doReplace && ce instanceof ArrayEncoder ae) {
 884             // fastpath for ascii compatible
 885             if (coder == LATIN1 &&
 886                     ae.isASCIICompatible() &&
 887                     !StringCoding.hasNegatives(val, 0, val.length)) {
 888                 return val.clone();
 889             }
 890             byte[] ba = new byte[en];
 891             if (len == 0) {
 892                 return ba;
 893             }
 894 
 895             int blen = (coder == LATIN1) ? ae.encodeFromLatin1(val, 0, len, ba)
 896                     : ae.encodeFromUTF16(val, 0, len, ba);
 897             if (blen != -1) {
 898                 return safeTrim(ba, blen, true);
 899             }
 900         }
 901 
 902         byte[] ba = new byte[en];
 903         if (len == 0) {
 904             return ba;
 905         }
 906         if (doReplace) {
 907             ce.onMalformedInput(CodingErrorAction.REPLACE)
 908                     .onUnmappableCharacter(CodingErrorAction.REPLACE);
 909         }
 910         char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
 911                 : StringUTF16.toChars(val);
 912         ByteBuffer bb = ByteBuffer.wrap(ba);
 913         CharBuffer cb = CharBuffer.wrap(ca, 0, len);
 914         try {
 915             CoderResult cr = ce.encode(cb, bb, true);
 916             if (!cr.isUnderflow())
 917                 cr.throwException();
 918             cr = ce.flush(bb);
 919             if (!cr.isUnderflow())
 920                 cr.throwException();
 921         } catch (CharacterCodingException x) {
 922             if (!doReplace) {
 923                 throw new IllegalArgumentException(x);
 924             } else {
 925                 throw new Error(x);
 926             }
 927         }
 928         return safeTrim(ba, bb.position(), cs.getClass().getClassLoader0() == null);
 929     }
 930 
 931     /*
 932      * Throws iae, instead of replacing, if unmappable.
 933      */
 934     static byte[] getBytesUTF8NoRepl(String s) {
 935         return encodeUTF8(s.coder(), s.value(), false);
 936     }
 937 
 938     private static boolean isASCII(byte[] src) {
 939         return !StringCoding.hasNegatives(src, 0, src.length);
 940     }
 941 
 942     /*
 943      * Throws CCE, instead of replacing, if unmappable.
 944      */
 945     static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
 946         try {
 947             return getBytesNoRepl1(s, cs);
 948         } catch (IllegalArgumentException e) {
 949             //getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause
 950             Throwable cause = e.getCause();
 951             if (cause instanceof UnmappableCharacterException) {
 952                 throw (UnmappableCharacterException)cause;
 953             }
 954             throw (CharacterCodingException)cause;
 955         }
 956     }
 957 
 958     private static byte[] getBytesNoRepl1(String s, Charset cs) {
 959         byte[] val = s.value();
 960         byte coder = s.coder();
 961         if (cs == UTF_8.INSTANCE) {
 962             if (coder == LATIN1 && isASCII(val)) {
 963                 return val;
 964             }
 965             return encodeUTF8(coder, val, false);
 966         }
 967         if (cs == ISO_8859_1.INSTANCE) {
 968             if (coder == LATIN1) {
 969                 return val;
 970             }
 971             return encode8859_1(coder, val, false);
 972         }
 973         if (cs == US_ASCII.INSTANCE) {
 974             if (coder == LATIN1) {
 975                 if (isASCII(val)) {
 976                     return val;
 977                 } else {
 978                     throwUnmappable(val);
 979                 }
 980             }
 981         }
 982         return encodeWithEncoder(cs, coder, val, false);
 983     }
 984 
 985     private static byte[] encodeASCII(byte coder, byte[] val) {
 986         if (coder == LATIN1) {
 987             int positives = StringCoding.countPositives(val, 0, val.length);
 988             byte[] dst = val.clone();
 989             if (positives < dst.length) {
 990                 replaceNegatives(dst, positives);
 991             }
 992             return dst;
 993         }
 994         int len = val.length >> 1;
 995         byte[] dst = new byte[len];
 996         int dp = 0;
 997         for (int i = 0; i < len; i++) {
 998             char c = StringUTF16.getChar(val, i);
 999             if (c < 0x80) {
1000                 dst[dp++] = (byte)c;
1001                 continue;
1002             }
1003             if (Character.isHighSurrogate(c) && i + 1 < len &&
1004                     Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
1005                 i++;
1006             }
1007             dst[dp++] = '?';
1008         }
1009         if (len == dp) {
1010             return dst;
1011         }
1012         return Arrays.copyOf(dst, dp);
1013     }
1014 
1015     private static void replaceNegatives(byte[] val, int fromIndex) {
1016         for (int i = fromIndex; i < val.length; i++) {
1017             if (val[i] < 0) {
1018                 val[i] = '?';
1019             }
1020         }
1021     }
1022 
1023     private static byte[] encode8859_1(byte coder, byte[] val) {
1024         return encode8859_1(coder, val, true);
1025     }
1026 
1027     private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
1028         if (coder == LATIN1) {
1029             return val.clone();
1030         }
1031         int len = val.length >> 1;
1032         byte[] dst = new byte[len];
1033         int dp = 0;
1034         int sp = 0;
1035         int sl = len;
1036         while (sp < sl) {
1037             int ret = StringCoding.implEncodeISOArray(val, sp, dst, dp, len);
1038             sp = sp + ret;
1039             dp = dp + ret;
1040             if (ret != len) {
1041                 if (!doReplace) {
1042                     throwUnmappable(sp);
1043                 }
1044                 char c = StringUTF16.getChar(val, sp++);
1045                 if (Character.isHighSurrogate(c) && sp < sl &&
1046                         Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
1047                     sp++;
1048                 }
1049                 dst[dp++] = '?';
1050                 len = sl - sp;
1051             }
1052         }
1053         if (dp == dst.length) {
1054             return dst;
1055         }
1056         return Arrays.copyOf(dst, dp);
1057     }
1058 
1059     //////////////////////////////// utf8 ////////////////////////////////////
1060 
1061     /**
1062      * Decodes ASCII from the source byte array into the destination
1063      * char array. Used via JavaLangAccess from UTF_8 and other charset
1064      * decoders.
1065      *
1066      * @return the number of bytes successfully decoded, at most len
1067      */
1068     /* package-private */
1069     static int decodeASCII(byte[] sa, int sp, char[] da, int dp, int len) {
1070         int count = StringCoding.countPositives(sa, sp, len);
1071         while (count < len) {
1072             if (sa[sp + count] < 0) {
1073                 break;
1074             }
1075             count++;
1076         }
1077         StringLatin1.inflate(sa, sp, da, dp, count);
1078         return count;
1079     }
1080 
1081     private static boolean isNotContinuation(int b) {
1082         return (b & 0xc0) != 0x80;
1083     }
1084 
1085     private static boolean isMalformed3(int b1, int b2, int b3) {
1086         return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
1087                 (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
1088     }
1089 
1090     private static boolean isMalformed3_2(int b1, int b2) {
1091         return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
1092                 (b2 & 0xc0) != 0x80;
1093     }
1094 
1095     private static boolean isMalformed4(int b2, int b3, int b4) {
1096         return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
1097                 (b4 & 0xc0) != 0x80;
1098     }
1099 
1100     private static boolean isMalformed4_2(int b1, int b2) {
1101         return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
1102                 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
1103                 (b2 & 0xc0) != 0x80;
1104     }
1105 
1106     private static boolean isMalformed4_3(int b3) {
1107         return (b3 & 0xc0) != 0x80;
1108     }
1109 
1110     private static char decode2(int b1, int b2) {
1111         return (char)(((b1 << 6) ^ b2) ^
1112                 (((byte) 0xC0 << 6) ^
1113                         ((byte) 0x80 << 0)));
1114     }
1115 
1116     private static char decode3(int b1, int b2, int b3) {
1117         return (char)((b1 << 12) ^
1118                 (b2 <<  6) ^
1119                 (b3 ^
1120                         (((byte) 0xE0 << 12) ^
1121                                 ((byte) 0x80 <<  6) ^
1122                                 ((byte) 0x80 <<  0))));
1123     }
1124 
1125     private static int decode4(int b1, int b2, int b3, int b4) {
1126         return ((b1 << 18) ^
1127                 (b2 << 12) ^
1128                 (b3 <<  6) ^
1129                 (b4 ^
1130                         (((byte) 0xF0 << 18) ^
1131                                 ((byte) 0x80 << 12) ^
1132                                 ((byte) 0x80 <<  6) ^
1133                                 ((byte) 0x80 <<  0))));
1134     }
1135 
1136     private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int dp, boolean doReplace) {
1137         while (sp < sl) {
1138             int b1 = src[sp++];
1139             if (b1 >= 0) {
1140                 StringUTF16.putChar(dst, dp++, (char) b1);
1141             } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
1142                 if (sp < sl) {
1143                     int b2 = src[sp++];
1144                     if (isNotContinuation(b2)) {
1145                         if (!doReplace) {
1146                             throwMalformed(sp - 1, 1);
1147                         }
1148                         StringUTF16.putChar(dst, dp++, REPL);
1149                         sp--;
1150                     } else {
1151                         StringUTF16.putChar(dst, dp++, decode2(b1, b2));
1152                     }
1153                     continue;
1154                 }
1155                 if (!doReplace) {
1156                     throwMalformed(sp, 1);  // underflow()
1157                 }
1158                 StringUTF16.putChar(dst, dp++, REPL);
1159                 break;
1160             } else if ((b1 >> 4) == -2) {
1161                 if (sp + 1 < sl) {
1162                     int b2 = src[sp++];
1163                     int b3 = src[sp++];
1164                     if (isMalformed3(b1, b2, b3)) {
1165                         if (!doReplace) {
1166                             throwMalformed(sp - 3, 3);
1167                         }
1168                         StringUTF16.putChar(dst, dp++, REPL);
1169                         sp -= 3;
1170                         sp += malformed3(src, sp);
1171                     } else {
1172                         char c = decode3(b1, b2, b3);
1173                         if (Character.isSurrogate(c)) {
1174                             if (!doReplace) {
1175                                 throwMalformed(sp - 3, 3);
1176                             }
1177                             StringUTF16.putChar(dst, dp++, REPL);
1178                         } else {
1179                             StringUTF16.putChar(dst, dp++, c);
1180                         }
1181                     }
1182                     continue;
1183                 }
1184                 if (sp < sl && isMalformed3_2(b1, src[sp])) {
1185                     if (!doReplace) {
1186                         throwMalformed(sp - 1, 2);
1187                     }
1188                     StringUTF16.putChar(dst, dp++, REPL);
1189                     continue;
1190                 }
1191                 if (!doReplace) {
1192                     throwMalformed(sp, 1);
1193                 }
1194                 StringUTF16.putChar(dst, dp++, REPL);
1195                 break;
1196             } else if ((b1 >> 3) == -2) {
1197                 if (sp + 2 < sl) {
1198                     int b2 = src[sp++];
1199                     int b3 = src[sp++];
1200                     int b4 = src[sp++];
1201                     int uc = decode4(b1, b2, b3, b4);
1202                     if (isMalformed4(b2, b3, b4) ||
1203                             !Character.isSupplementaryCodePoint(uc)) { // shortest form check
1204                         if (!doReplace) {
1205                             throwMalformed(sp - 4, 4);
1206                         }
1207                         StringUTF16.putChar(dst, dp++, REPL);
1208                         sp -= 4;
1209                         sp += malformed4(src, sp);
1210                     } else {
1211                         StringUTF16.putChar(dst, dp++, Character.highSurrogate(uc));
1212                         StringUTF16.putChar(dst, dp++, Character.lowSurrogate(uc));
1213                     }
1214                     continue;
1215                 }
1216                 b1 &= 0xff;
1217                 if (b1 > 0xf4 || sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
1218                     if (!doReplace) {
1219                         throwMalformed(sp - 1, 1);  // or 2
1220                     }
1221                     StringUTF16.putChar(dst, dp++, REPL);
1222                     continue;
1223                 }
1224                 if (!doReplace) {
1225                     throwMalformed(sp - 1, 1);
1226                 }
1227                 sp++;
1228                 StringUTF16.putChar(dst, dp++, REPL);
1229                 if (sp < sl && isMalformed4_3(src[sp])) {
1230                     continue;
1231                 }
1232                 break;
1233             } else {
1234                 if (!doReplace) {
1235                     throwMalformed(sp - 1, 1);
1236                 }
1237                 StringUTF16.putChar(dst, dp++, REPL);
1238             }
1239         }
1240         return dp;
1241     }
1242 
1243     private static int decodeWithDecoder(CharsetDecoder cd, char[] dst, byte[] src, int offset, int length)
1244                                             throws CharacterCodingException {
1245         ByteBuffer bb = ByteBuffer.wrap(src, offset, length);
1246         CharBuffer cb = CharBuffer.wrap(dst, 0, dst.length);
1247         CoderResult cr = cd.decode(bb, cb, true);
1248         if (!cr.isUnderflow())
1249             cr.throwException();
1250         cr = cd.flush(cb);
1251         if (!cr.isUnderflow())
1252             cr.throwException();
1253         return cb.position();
1254     }
1255 
1256     private static int malformed3(byte[] src, int sp) {
1257         int b1 = src[sp++];
1258         int b2 = src[sp];    // no need to lookup b3
1259         return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
1260                 isNotContinuation(b2)) ? 1 : 2;
1261     }
1262 
1263     private static int malformed4(byte[] src, int sp) {
1264         // we don't care the speed here
1265         int b1 = src[sp++] & 0xff;
1266         int b2 = src[sp++] & 0xff;
1267         if (b1 > 0xf4 ||
1268                 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
1269                 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
1270                 isNotContinuation(b2))
1271             return 1;
1272         if (isNotContinuation(src[sp]))
1273             return 2;
1274         return 3;
1275     }
1276 
1277     private static void throwMalformed(int off, int nb) {
1278         String msg = "malformed input off : " + off + ", length : " + nb;
1279         throw new IllegalArgumentException(msg, new MalformedInputException(nb));
1280     }
1281 
1282     private static void throwMalformed(byte[] val) {
1283         int dp = StringCoding.countPositives(val, 0, val.length);
1284         throwMalformed(dp, 1);
1285     }
1286 
1287     private static void throwUnmappable(int off) {
1288         String msg = "malformed input off : " + off + ", length : 1";
1289         throw new IllegalArgumentException(msg, new UnmappableCharacterException(1));
1290     }
1291 
1292     private static void throwUnmappable(byte[] val) {
1293         int dp = StringCoding.countPositives(val, 0, val.length);
1294         throwUnmappable(dp);
1295     }
1296 
1297     private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
1298         if (coder == UTF16) {
1299             return encodeUTF8_UTF16(val, doReplace);
1300         }
1301 
1302         if (!StringCoding.hasNegatives(val, 0, val.length)) {
1303             return val.clone();
1304         }
1305 
1306         int dp = 0;
1307         byte[] dst = new byte[val.length << 1];
1308         for (byte c : val) {
1309             if (c < 0) {
1310                 dst[dp++] = (byte) (0xc0 | ((c & 0xff) >> 6));
1311                 dst[dp++] = (byte) (0x80 | (c & 0x3f));
1312             } else {
1313                 dst[dp++] = c;
1314             }
1315         }
1316         if (dp == dst.length) {
1317             return dst;
1318         }
1319         return Arrays.copyOf(dst, dp);
1320     }
1321 
1322     private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
1323         int dp = 0;
1324         int sp = 0;
1325         int sl = val.length >> 1;
1326         byte[] dst = new byte[sl * 3];
1327         while (sp < sl) {
1328             // ascii fast loop;
1329             char c = StringUTF16.getChar(val, sp);
1330             if (c >= '\u0080') {
1331                 break;
1332             }
1333             dst[dp++] = (byte)c;
1334             sp++;
1335         }
1336         while (sp < sl) {
1337             char c = StringUTF16.getChar(val, sp++);
1338             if (c < 0x80) {
1339                 dst[dp++] = (byte)c;
1340             } else if (c < 0x800) {
1341                 dst[dp++] = (byte)(0xc0 | (c >> 6));
1342                 dst[dp++] = (byte)(0x80 | (c & 0x3f));
1343             } else if (Character.isSurrogate(c)) {
1344                 int uc = -1;
1345                 char c2;
1346                 if (Character.isHighSurrogate(c) && sp < sl &&
1347                         Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
1348                     uc = Character.toCodePoint(c, c2);
1349                 }
1350                 if (uc < 0) {
1351                     if (doReplace) {
1352                         dst[dp++] = '?';
1353                     } else {
1354                         throwUnmappable(sp - 1);
1355                     }
1356                 } else {
1357                     dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
1358                     dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
1359                     dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
1360                     dst[dp++] = (byte)(0x80 | (uc & 0x3f));
1361                     sp++;  // 2 chars
1362                 }
1363             } else {
1364                 // 3 bytes, 16 bits
1365                 dst[dp++] = (byte)(0xe0 | ((c >> 12)));
1366                 dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
1367                 dst[dp++] = (byte)(0x80 | (c & 0x3f));
1368             }
1369         }
1370         if (dp == dst.length) {
1371             return dst;
1372         }
1373         return Arrays.copyOf(dst, dp);
1374     }
1375 
1376     /**
1377      * Constructs a new {@code String} by decoding the specified array of bytes
1378      * using the specified {@linkplain java.nio.charset.Charset charset}.  The
1379      * length of the new {@code String} is a function of the charset, and hence
1380      * may not be equal to the length of the byte array.
1381      *
1382      * <p> The behavior of this constructor when the given bytes are not valid
1383      * in the given charset is unspecified.  The {@link
1384      * java.nio.charset.CharsetDecoder} class should be used when more control
1385      * over the decoding process is required.
1386      *
1387      * @param  bytes
1388      *         The bytes to be decoded into characters
1389      *
1390      * @param  charsetName
1391      *         The name of a supported {@linkplain java.nio.charset.Charset
1392      *         charset}
1393      *
1394      * @throws  UnsupportedEncodingException
1395      *          If the named charset is not supported
1396      *
1397      * @since  1.1
1398      */
1399     public String(byte[] bytes, String charsetName)
1400             throws UnsupportedEncodingException {
1401         this(lookupCharset(charsetName), bytes, 0, bytes.length);
1402     }
1403 
1404     /**
1405      * Constructs a new {@code String} by decoding the specified array of
1406      * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
1407      * The length of the new {@code String} is a function of the charset, and
1408      * hence may not be equal to the length of the byte array.
1409      *
1410      * <p> This method always replaces malformed-input and unmappable-character
1411      * sequences with this charset's default replacement string.  The {@link
1412      * java.nio.charset.CharsetDecoder} class should be used when more control
1413      * over the decoding process is required.
1414      *
1415      * @param  bytes
1416      *         The bytes to be decoded into characters
1417      *
1418      * @param  charset
1419      *         The {@linkplain java.nio.charset.Charset charset} to be used to
1420      *         decode the {@code bytes}
1421      *
1422      * @since  1.6
1423      */
1424     public String(byte[] bytes, Charset charset) {
1425         this(Objects.requireNonNull(charset), bytes, 0, bytes.length);
1426     }
1427 
1428     /**
1429      * Constructs a new {@code String} by decoding the specified subarray of
1430      * bytes using the {@link Charset#defaultCharset() default charset}.
1431      * The length of the new {@code String} is a function of the charset,
1432      * and hence may not be equal to the length of the subarray.
1433      *
1434      * <p> The behavior of this constructor when the given bytes are not valid
1435      * in the default charset is unspecified.  The {@link
1436      * java.nio.charset.CharsetDecoder} class should be used when more control
1437      * over the decoding process is required.
1438      *
1439      * @param  bytes
1440      *         The bytes to be decoded into characters
1441      *
1442      * @param  offset
1443      *         The index of the first byte to decode
1444      *
1445      * @param  length
1446      *         The number of bytes to decode
1447      *
1448      * @throws  IndexOutOfBoundsException
1449      *          If {@code offset} is negative, {@code length} is negative, or
1450      *          {@code offset} is greater than {@code bytes.length - length}
1451      *
1452      * @since  1.1
1453      */
1454     public String(byte[] bytes, int offset, int length) {
1455         this(Charset.defaultCharset(), bytes, checkBoundsOffCount(offset, length, bytes.length), length);
1456     }
1457 
1458     /**
1459      * Constructs a new {@code String} by decoding the specified array of bytes
1460      * using the {@link Charset#defaultCharset() default charset}. The length
1461      * of the new {@code String} is a function of the charset, and hence may not
1462      * be equal to the length of the byte array.
1463      *
1464      * <p> The behavior of this constructor when the given bytes are not valid
1465      * in the default charset is unspecified.  The {@link
1466      * java.nio.charset.CharsetDecoder} class should be used when more control
1467      * over the decoding process is required.
1468      *
1469      * @param  bytes
1470      *         The bytes to be decoded into characters
1471      *
1472      * @since  1.1
1473      */
1474     public String(byte[] bytes) {
1475         this(Charset.defaultCharset(), bytes, 0, bytes.length);
1476     }
1477 
1478     /**
1479      * Allocates a new string that contains the sequence of characters
1480      * currently contained in the string buffer argument. The contents of the
1481      * string buffer are copied; subsequent modification of the string buffer
1482      * does not affect the newly created string.
1483      *
1484      * @param  buffer
1485      *         A {@code StringBuffer}
1486      */
1487     public String(StringBuffer buffer) {
1488         this(buffer.toString());
1489     }
1490 
1491     /**
1492      * Allocates a new string that contains the sequence of characters
1493      * currently contained in the string builder argument. The contents of the
1494      * string builder are copied; subsequent modification of the string builder
1495      * does not affect the newly created string.
1496      *
1497      * <p> This constructor is provided to ease migration to {@code
1498      * StringBuilder}. Obtaining a string from a string builder via the {@code
1499      * toString} method is likely to run faster and is generally preferred.
1500      *
1501      * @param   builder
1502      *          A {@code StringBuilder}
1503      *
1504      * @since  1.5
1505      */
1506     public String(StringBuilder builder) {
1507         this(builder, null);
1508     }
1509 
1510     /**
1511      * Returns the length of this string.
1512      * The length is equal to the number of <a href="Character.html#unicode">Unicode
1513      * code units</a> in the string.
1514      *
1515      * @return  the length of the sequence of characters represented by this
1516      *          object.
1517      */
1518     public int length() {
1519         return value.length >> coder();
1520     }
1521 
1522     /**
1523      * Returns {@code true} if, and only if, {@link #length()} is {@code 0}.
1524      *
1525      * @return {@code true} if {@link #length()} is {@code 0}, otherwise
1526      * {@code false}
1527      *
1528      * @since 1.6
1529      */
1530     @Override
1531     public boolean isEmpty() {
1532         return value.length == 0;
1533     }
1534 
1535     /**
1536      * Returns the {@code char} value at the
1537      * specified index. An index ranges from {@code 0} to
1538      * {@code length() - 1}. The first {@code char} value of the sequence
1539      * is at index {@code 0}, the next at index {@code 1},
1540      * and so on, as for array indexing.
1541      *
1542      * <p>If the {@code char} value specified by the index is a
1543      * <a href="Character.html#unicode">surrogate</a>, the surrogate
1544      * value is returned.
1545      *
1546      * @param      index   the index of the {@code char} value.
1547      * @return     the {@code char} value at the specified index of this string.
1548      *             The first {@code char} value is at index {@code 0}.
1549      * @throws     IndexOutOfBoundsException  if the {@code index}
1550      *             argument is negative or not less than the length of this
1551      *             string.
1552      */
1553     public char charAt(int index) {
1554         if (isLatin1()) {
1555             return StringLatin1.charAt(value, index);
1556         } else {
1557             return StringUTF16.charAt(value, index);
1558         }
1559     }
1560 
1561     /**
1562      * Returns the character (Unicode code point) at the specified
1563      * index. The index refers to {@code char} values
1564      * (Unicode code units) and ranges from {@code 0} to
1565      * {@link #length()}{@code  - 1}.
1566      *
1567      * <p> If the {@code char} value specified at the given index
1568      * is in the high-surrogate range, the following index is less
1569      * than the length of this {@code String}, and the
1570      * {@code char} value at the following index is in the
1571      * low-surrogate range, then the supplementary code point
1572      * corresponding to this surrogate pair is returned. Otherwise,
1573      * the {@code char} value at the given index is returned.
1574      *
1575      * @param      index the index to the {@code char} values
1576      * @return     the code point value of the character at the
1577      *             {@code index}
1578      * @throws     IndexOutOfBoundsException  if the {@code index}
1579      *             argument is negative or not less than the length of this
1580      *             string.
1581      * @since      1.5
1582      */
1583     public int codePointAt(int index) {
1584         if (isLatin1()) {
1585             checkIndex(index, value.length);
1586             return value[index] & 0xff;
1587         }
1588         int length = value.length >> 1;
1589         checkIndex(index, length);
1590         return StringUTF16.codePointAt(value, index, length);
1591     }
1592 
1593     /**
1594      * Returns the character (Unicode code point) before the specified
1595      * index. The index refers to {@code char} values
1596      * (Unicode code units) and ranges from {@code 1} to {@link
1597      * CharSequence#length() length}.
1598      *
1599      * <p> If the {@code char} value at {@code (index - 1)}
1600      * is in the low-surrogate range, {@code (index - 2)} is not
1601      * negative, and the {@code char} value at {@code (index -
1602      * 2)} is in the high-surrogate range, then the
1603      * supplementary code point value of the surrogate pair is
1604      * returned. If the {@code char} value at {@code index -
1605      * 1} is an unpaired low-surrogate or a high-surrogate, the
1606      * surrogate value is returned.
1607      *
1608      * @param     index the index following the code point that should be returned
1609      * @return    the Unicode code point value before the given index.
1610      * @throws    IndexOutOfBoundsException if the {@code index}
1611      *            argument is less than 1 or greater than the length
1612      *            of this string.
1613      * @since     1.5
1614      */
1615     public int codePointBefore(int index) {
1616         int i = index - 1;
1617         checkIndex(i, length());
1618         if (isLatin1()) {
1619             return (value[i] & 0xff);
1620         }
1621         return StringUTF16.codePointBefore(value, index);
1622     }
1623 
1624     /**
1625      * Returns the number of Unicode code points in the specified text
1626      * range of this {@code String}. The text range begins at the
1627      * specified {@code beginIndex} and extends to the
1628      * {@code char} at index {@code endIndex - 1}. Thus the
1629      * length (in {@code char}s) of the text range is
1630      * {@code endIndex-beginIndex}. Unpaired surrogates within
1631      * the text range count as one code point each.
1632      *
1633      * @param beginIndex the index to the first {@code char} of
1634      * the text range.
1635      * @param endIndex the index after the last {@code char} of
1636      * the text range.
1637      * @return the number of Unicode code points in the specified text
1638      * range
1639      * @throws    IndexOutOfBoundsException if the
1640      * {@code beginIndex} is negative, or {@code endIndex}
1641      * is larger than the length of this {@code String}, or
1642      * {@code beginIndex} is larger than {@code endIndex}.
1643      * @since  1.5
1644      */
1645     public int codePointCount(int beginIndex, int endIndex) {
1646         Objects.checkFromToIndex(beginIndex, endIndex, length());
1647         if (isLatin1()) {
1648             return endIndex - beginIndex;
1649         }
1650         return StringUTF16.codePointCount(value, beginIndex, endIndex);
1651     }
1652 
1653     /**
1654      * Returns the index within this {@code String} that is
1655      * offset from the given {@code index} by
1656      * {@code codePointOffset} code points. Unpaired surrogates
1657      * within the text range given by {@code index} and
1658      * {@code codePointOffset} count as one code point each.
1659      *
1660      * @param index the index to be offset
1661      * @param codePointOffset the offset in code points
1662      * @return the index within this {@code String}
1663      * @throws    IndexOutOfBoundsException if {@code index}
1664      *   is negative or larger than the length of this
1665      *   {@code String}, or if {@code codePointOffset} is positive
1666      *   and the substring starting with {@code index} has fewer
1667      *   than {@code codePointOffset} code points,
1668      *   or if {@code codePointOffset} is negative and the substring
1669      *   before {@code index} has fewer than the absolute value
1670      *   of {@code codePointOffset} code points.
1671      * @since 1.5
1672      */
1673     public int offsetByCodePoints(int index, int codePointOffset) {
1674         return Character.offsetByCodePoints(this, index, codePointOffset);
1675     }
1676 
1677     /**
1678      * Copies characters from this string into the destination character
1679      * array.
1680      * <p>
1681      * The first character to be copied is at index {@code srcBegin};
1682      * the last character to be copied is at index {@code srcEnd-1}
1683      * (thus the total number of characters to be copied is
1684      * {@code srcEnd-srcBegin}). The characters are copied into the
1685      * subarray of {@code dst} starting at index {@code dstBegin}
1686      * and ending at index:
1687      * <blockquote><pre>
1688      *     dstBegin + (srcEnd-srcBegin) - 1
1689      * </pre></blockquote>
1690      *
1691      * @param      srcBegin   index of the first character in the string
1692      *                        to copy.
1693      * @param      srcEnd     index after the last character in the string
1694      *                        to copy.
1695      * @param      dst        the destination array.
1696      * @param      dstBegin   the start offset in the destination array.
1697      * @throws    IndexOutOfBoundsException If any of the following
1698      *            is true:
1699      *            <ul><li>{@code srcBegin} is negative.
1700      *            <li>{@code srcBegin} is greater than {@code srcEnd}
1701      *            <li>{@code srcEnd} is greater than the length of this
1702      *                string
1703      *            <li>{@code dstBegin} is negative
1704      *            <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than
1705      *                {@code dst.length}</ul>
1706      */
1707     public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) {
1708         checkBoundsBeginEnd(srcBegin, srcEnd, length());
1709         checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length);
1710         if (isLatin1()) {
1711             StringLatin1.getChars(value, srcBegin, srcEnd, dst, dstBegin);
1712         } else {
1713             StringUTF16.getChars(value, srcBegin, srcEnd, dst, dstBegin);
1714         }
1715     }
1716 
1717     /**
1718      * Copies characters from this string into the destination byte array. Each
1719      * byte receives the 8 low-order bits of the corresponding character. The
1720      * eight high-order bits of each character are not copied and do not
1721      * participate in the transfer in any way.
1722      *
1723      * <p> The first character to be copied is at index {@code srcBegin}; the
1724      * last character to be copied is at index {@code srcEnd-1}.  The total
1725      * number of characters to be copied is {@code srcEnd-srcBegin}. The
1726      * characters, converted to bytes, are copied into the subarray of {@code
1727      * dst} starting at index {@code dstBegin} and ending at index:
1728      *
1729      * <blockquote><pre>
1730      *     dstBegin + (srcEnd-srcBegin) - 1
1731      * </pre></blockquote>
1732      *
1733      * @deprecated  This method does not properly convert characters into
1734      * bytes.  As of JDK&nbsp;1.1, the preferred way to do this is via the
1735      * {@link #getBytes()} method, which uses the {@link Charset#defaultCharset()
1736      * default charset}.
1737      *
1738      * @param  srcBegin
1739      *         Index of the first character in the string to copy
1740      *
1741      * @param  srcEnd
1742      *         Index after the last character in the string to copy
1743      *
1744      * @param  dst
1745      *         The destination array
1746      *
1747      * @param  dstBegin
1748      *         The start offset in the destination array
1749      *
1750      * @throws  IndexOutOfBoundsException
1751      *          If any of the following is true:
1752      *          <ul>
1753      *            <li> {@code srcBegin} is negative
1754      *            <li> {@code srcBegin} is greater than {@code srcEnd}
1755      *            <li> {@code srcEnd} is greater than the length of this String
1756      *            <li> {@code dstBegin} is negative
1757      *            <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code
1758      *                 dst.length}
1759      *          </ul>
1760      */
1761     @Deprecated(since="1.1")
1762     public void getBytes(int srcBegin, int srcEnd, byte[] dst, int dstBegin) {
1763         checkBoundsBeginEnd(srcBegin, srcEnd, length());
1764         Objects.requireNonNull(dst);
1765         checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length);
1766         if (isLatin1()) {
1767             StringLatin1.getBytes(value, srcBegin, srcEnd, dst, dstBegin);
1768         } else {
1769             StringUTF16.getBytes(value, srcBegin, srcEnd, dst, dstBegin);
1770         }
1771     }
1772 
1773     /**
1774      * Encodes this {@code String} into a sequence of bytes using the named
1775      * charset, storing the result into a new byte array.
1776      *
1777      * <p> The behavior of this method when this string cannot be encoded in
1778      * the given charset is unspecified.  The {@link
1779      * java.nio.charset.CharsetEncoder} class should be used when more control
1780      * over the encoding process is required.
1781      *
1782      * @param  charsetName
1783      *         The name of a supported {@linkplain java.nio.charset.Charset
1784      *         charset}
1785      *
1786      * @return  The resultant byte array
1787      *
1788      * @throws  UnsupportedEncodingException
1789      *          If the named charset is not supported
1790      *
1791      * @since  1.1
1792      */
1793     public byte[] getBytes(String charsetName)
1794             throws UnsupportedEncodingException {
1795         return encode(lookupCharset(charsetName), coder(), value);
1796     }
1797 
1798     /**
1799      * Encodes this {@code String} into a sequence of bytes using the given
1800      * {@linkplain java.nio.charset.Charset charset}, storing the result into a
1801      * new byte array.
1802      *
1803      * <p> This method always replaces malformed-input and unmappable-character
1804      * sequences with this charset's default replacement byte array.  The
1805      * {@link java.nio.charset.CharsetEncoder} class should be used when more
1806      * control over the encoding process is required.
1807      *
1808      * @param  charset
1809      *         The {@linkplain java.nio.charset.Charset} to be used to encode
1810      *         the {@code String}
1811      *
1812      * @return  The resultant byte array
1813      *
1814      * @since  1.6
1815      */
1816     public byte[] getBytes(Charset charset) {
1817         if (charset == null) throw new NullPointerException();
1818         return encode(charset, coder(), value);
1819      }
1820 
1821     /**
1822      * Encodes this {@code String} into a sequence of bytes using the
1823      * {@link Charset#defaultCharset() default charset}, storing the result
1824      * into a new byte array.
1825      *
1826      * <p> The behavior of this method when this string cannot be encoded in
1827      * the default charset is unspecified.  The {@link
1828      * java.nio.charset.CharsetEncoder} class should be used when more control
1829      * over the encoding process is required.
1830      *
1831      * @return  The resultant byte array
1832      *
1833      * @since      1.1
1834      */
1835     public byte[] getBytes() {
1836         return encode(Charset.defaultCharset(), coder(), value);
1837     }
1838 
1839     /**
1840      * Compares this string to the specified object.  The result is {@code
1841      * true} if and only if the argument is not {@code null} and is a {@code
1842      * String} object that represents the same sequence of characters as this
1843      * object.
1844      *
1845      * <p>For finer-grained String comparison, refer to
1846      * {@link java.text.Collator}.
1847      *
1848      * @param  anObject
1849      *         The object to compare this {@code String} against
1850      *
1851      * @return  {@code true} if the given object represents a {@code String}
1852      *          equivalent to this string, {@code false} otherwise
1853      *
1854      * @see  #compareTo(String)
1855      * @see  #equalsIgnoreCase(String)
1856      */
1857     public boolean equals(Object anObject) {
1858         if (this == anObject) {
1859             return true;
1860         }
1861         return (anObject instanceof String aString)
1862                 && (!COMPACT_STRINGS || this.coder == aString.coder)
1863                 && StringLatin1.equals(value, aString.value);
1864     }
1865 
1866     /**
1867      * Compares this string to the specified {@code StringBuffer}.  The result
1868      * is {@code true} if and only if this {@code String} represents the same
1869      * sequence of characters as the specified {@code StringBuffer}. This method
1870      * synchronizes on the {@code StringBuffer}.
1871      *
1872      * <p>For finer-grained String comparison, refer to
1873      * {@link java.text.Collator}.
1874      *
1875      * @param  sb
1876      *         The {@code StringBuffer} to compare this {@code String} against
1877      *
1878      * @return  {@code true} if this {@code String} represents the same
1879      *          sequence of characters as the specified {@code StringBuffer},
1880      *          {@code false} otherwise
1881      *
1882      * @since  1.4
1883      */
1884     public boolean contentEquals(StringBuffer sb) {
1885         return contentEquals((CharSequence)sb);
1886     }
1887 
1888     private boolean nonSyncContentEquals(AbstractStringBuilder sb) {
1889         int len = length();
1890         if (len != sb.length()) {
1891             return false;
1892         }
1893         byte[] v1 = value;
1894         byte[] v2 = sb.getValue();
1895         byte coder = coder();
1896         if (coder == sb.getCoder()) {
1897             return v1.length <= v2.length && ArraysSupport.mismatch(v1, v2, v1.length) < 0;
1898         } else {
1899             if (coder != LATIN1) {  // utf16 str and latin1 abs can never be "equal"
1900                 return false;
1901             }
1902             return StringUTF16.contentEquals(v1, v2, len);
1903         }
1904     }
1905 
1906     /**
1907      * Compares this string to the specified {@code CharSequence}.  The
1908      * result is {@code true} if and only if this {@code String} represents the
1909      * same sequence of char values as the specified sequence. Note that if the
1910      * {@code CharSequence} is a {@code StringBuffer} then the method
1911      * synchronizes on it.
1912      *
1913      * <p>For finer-grained String comparison, refer to
1914      * {@link java.text.Collator}.
1915      *
1916      * @param  cs
1917      *         The sequence to compare this {@code String} against
1918      *
1919      * @return  {@code true} if this {@code String} represents the same
1920      *          sequence of char values as the specified sequence, {@code
1921      *          false} otherwise
1922      *
1923      * @since  1.5
1924      */
1925     public boolean contentEquals(CharSequence cs) {
1926         // Argument is a StringBuffer, StringBuilder
1927         if (cs instanceof AbstractStringBuilder) {
1928             if (cs instanceof StringBuffer) {
1929                 synchronized(cs) {
1930                    return nonSyncContentEquals((AbstractStringBuilder)cs);
1931                 }
1932             } else {
1933                 return nonSyncContentEquals((AbstractStringBuilder)cs);
1934             }
1935         }
1936         // Argument is a String
1937         if (cs instanceof String) {
1938             return equals(cs);
1939         }
1940         // Argument is a generic CharSequence
1941         int n = cs.length();
1942         if (n != length()) {
1943             return false;
1944         }
1945         byte[] val = this.value;
1946         if (isLatin1()) {
1947             for (int i = 0; i < n; i++) {
1948                 if ((val[i] & 0xff) != cs.charAt(i)) {
1949                     return false;
1950                 }
1951             }
1952         } else {
1953             if (!StringUTF16.contentEquals(val, cs, n)) {
1954                 return false;
1955             }
1956         }
1957         return true;
1958     }
1959 
1960     /**
1961      * Compares this {@code String} to another {@code String}, ignoring case
1962      * considerations.  Two strings are considered equal ignoring case if they
1963      * are of the same length and corresponding Unicode code points in the two
1964      * strings are equal ignoring case.
1965      *
1966      * <p> Two Unicode code points are considered the same
1967      * ignoring case if at least one of the following is true:
1968      * <ul>
1969      *   <li> The two Unicode code points are the same (as compared by the
1970      *        {@code ==} operator)
1971      *   <li> Calling {@code Character.toLowerCase(Character.toUpperCase(int))}
1972      *        on each Unicode code point produces the same result
1973      * </ul>
1974      *
1975      * <p>Note that this method does <em>not</em> take locale into account, and
1976      * will result in unsatisfactory results for certain locales.  The
1977      * {@link java.text.Collator} class provides locale-sensitive comparison.
1978      *
1979      * @param  anotherString
1980      *         The {@code String} to compare this {@code String} against
1981      *
1982      * @return  {@code true} if the argument is not {@code null} and it
1983      *          represents an equivalent {@code String} ignoring case; {@code
1984      *          false} otherwise
1985      *
1986      * @see  #equals(Object)
1987      * @see  #codePoints()
1988      */
1989     public boolean equalsIgnoreCase(String anotherString) {
1990         return (this == anotherString) ? true
1991                 : (anotherString != null)
1992                 && (anotherString.length() == length())
1993                 && regionMatches(true, 0, anotherString, 0, length());
1994     }
1995 
1996     /**
1997      * Compares two strings lexicographically.
1998      * The comparison is based on the Unicode value of each character in
1999      * the strings. The character sequence represented by this
2000      * {@code String} object is compared lexicographically to the
2001      * character sequence represented by the argument string. The result is
2002      * a negative integer if this {@code String} object
2003      * lexicographically precedes the argument string. The result is a
2004      * positive integer if this {@code String} object lexicographically
2005      * follows the argument string. The result is zero if the strings
2006      * are equal; {@code compareTo} returns {@code 0} exactly when
2007      * the {@link #equals(Object)} method would return {@code true}.
2008      * <p>
2009      * This is the definition of lexicographic ordering. If two strings are
2010      * different, then either they have different characters at some index
2011      * that is a valid index for both strings, or their lengths are different,
2012      * or both. If they have different characters at one or more index
2013      * positions, let <i>k</i> be the smallest such index; then the string
2014      * whose character at position <i>k</i> has the smaller value, as
2015      * determined by using the {@code <} operator, lexicographically precedes the
2016      * other string. In this case, {@code compareTo} returns the
2017      * difference of the two character values at position {@code k} in
2018      * the two string -- that is, the value:
2019      * <blockquote><pre>
2020      * this.charAt(k)-anotherString.charAt(k)
2021      * </pre></blockquote>
2022      * If there is no index position at which they differ, then the shorter
2023      * string lexicographically precedes the longer string. In this case,
2024      * {@code compareTo} returns the difference of the lengths of the
2025      * strings -- that is, the value:
2026      * <blockquote><pre>
2027      * this.length()-anotherString.length()
2028      * </pre></blockquote>
2029      *
2030      * <p>For finer-grained String comparison, refer to
2031      * {@link java.text.Collator}.
2032      *
2033      * @param   anotherString   the {@code String} to be compared.
2034      * @return  the value {@code 0} if the argument string is equal to
2035      *          this string; a value less than {@code 0} if this string
2036      *          is lexicographically less than the string argument; and a
2037      *          value greater than {@code 0} if this string is
2038      *          lexicographically greater than the string argument.
2039      */
2040     public int compareTo(String anotherString) {
2041         byte[] v1 = value;
2042         byte[] v2 = anotherString.value;
2043         byte coder = coder();
2044         if (coder == anotherString.coder()) {
2045             return coder == LATIN1 ? StringLatin1.compareTo(v1, v2)
2046                                    : StringUTF16.compareTo(v1, v2);
2047         }
2048         return coder == LATIN1 ? StringLatin1.compareToUTF16(v1, v2)
2049                                : StringUTF16.compareToLatin1(v1, v2);
2050      }
2051 
2052     /**
2053      * A Comparator that orders {@code String} objects as by
2054      * {@link #compareToIgnoreCase(String) compareToIgnoreCase}.
2055      * This comparator is serializable.
2056      * <p>
2057      * Note that this Comparator does <em>not</em> take locale into account,
2058      * and will result in an unsatisfactory ordering for certain locales.
2059      * The {@link java.text.Collator} class provides locale-sensitive comparison.
2060      *
2061      * @see     java.text.Collator
2062      * @since   1.2
2063      */
2064     public static final Comparator<String> CASE_INSENSITIVE_ORDER
2065                                          = new CaseInsensitiveComparator();
2066 
2067     /**
2068      * CaseInsensitiveComparator for Strings.
2069      */
2070     private static class CaseInsensitiveComparator
2071             implements Comparator<String>, java.io.Serializable {
2072         // use serialVersionUID from JDK 1.2.2 for interoperability
2073         @java.io.Serial
2074         private static final long serialVersionUID = 8575799808933029326L;
2075 
2076         public int compare(String s1, String s2) {
2077             byte[] v1 = s1.value;
2078             byte[] v2 = s2.value;
2079             byte coder = s1.coder();
2080             if (coder == s2.coder()) {
2081                 return coder == LATIN1 ? StringLatin1.compareToCI(v1, v2)
2082                                        : StringUTF16.compareToCI(v1, v2);
2083             }
2084             return coder == LATIN1 ? StringLatin1.compareToCI_UTF16(v1, v2)
2085                                    : StringUTF16.compareToCI_Latin1(v1, v2);
2086         }
2087 
2088         /** Replaces the de-serialized object. */
2089         @java.io.Serial
2090         private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
2091     }
2092 
2093     /**
2094      * Compares two strings lexicographically, ignoring case
2095      * differences. This method returns an integer whose sign is that of
2096      * calling {@code compareTo} with case folded versions of the strings
2097      * where case differences have been eliminated by calling
2098      * {@code Character.toLowerCase(Character.toUpperCase(int))} on
2099      * each Unicode code point.
2100      * <p>
2101      * Note that this method does <em>not</em> take locale into account,
2102      * and will result in an unsatisfactory ordering for certain locales.
2103      * The {@link java.text.Collator} class provides locale-sensitive comparison.
2104      *
2105      * @param   str   the {@code String} to be compared.
2106      * @return  a negative integer, zero, or a positive integer as the
2107      *          specified String is greater than, equal to, or less
2108      *          than this String, ignoring case considerations.
2109      * @see     java.text.Collator
2110      * @see     #codePoints()
2111      * @since   1.2
2112      */
2113     public int compareToIgnoreCase(String str) {
2114         return CASE_INSENSITIVE_ORDER.compare(this, str);
2115     }
2116 
2117     /**
2118      * Tests if two string regions are equal.
2119      * <p>
2120      * A substring of this {@code String} object is compared to a substring
2121      * of the argument other. The result is true if these substrings
2122      * represent identical character sequences. The substring of this
2123      * {@code String} object to be compared begins at index {@code toffset}
2124      * and has length {@code len}. The substring of other to be compared
2125      * begins at index {@code ooffset} and has length {@code len}. The
2126      * result is {@code false} if and only if at least one of the following
2127      * is true:
2128      * <ul><li>{@code toffset} is negative.
2129      * <li>{@code ooffset} is negative.
2130      * <li>{@code toffset+len} is greater than the length of this
2131      * {@code String} object.
2132      * <li>{@code ooffset+len} is greater than the length of the other
2133      * argument.
2134      * <li>There is some nonnegative integer <i>k</i> less than {@code len}
2135      * such that:
2136      * {@code this.charAt(toffset + }<i>k</i>{@code ) != other.charAt(ooffset + }
2137      * <i>k</i>{@code )}
2138      * </ul>
2139      *
2140      * <p>Note that this method does <em>not</em> take locale into account.  The
2141      * {@link java.text.Collator} class provides locale-sensitive comparison.
2142      *
2143      * @param   toffset   the starting offset of the subregion in this string.
2144      * @param   other     the string argument.
2145      * @param   ooffset   the starting offset of the subregion in the string
2146      *                    argument.
2147      * @param   len       the number of characters to compare.
2148      * @return  {@code true} if the specified subregion of this string
2149      *          exactly matches the specified subregion of the string argument;
2150      *          {@code false} otherwise.
2151      */
2152     public boolean regionMatches(int toffset, String other, int ooffset, int len) {
2153         // Note: toffset, ooffset, or len might be near -1>>>1.
2154         if ((ooffset < 0) || (toffset < 0) ||
2155              (toffset > (long)length() - len) ||
2156              (ooffset > (long)other.length() - len)) {
2157             return false;
2158         }
2159         byte[] tv = value;
2160         byte[] ov = other.value;
2161         byte coder = coder();
2162         if (coder == other.coder()) {
2163             if (coder == UTF16) {
2164                 toffset <<= UTF16;
2165                 ooffset <<= UTF16;
2166                 len <<= UTF16;
2167             }
2168             return ArraysSupport.mismatch(tv, toffset,
2169                     ov, ooffset, len) < 0;
2170         } else {
2171             if (coder == LATIN1) {
2172                 while (len-- > 0) {
2173                     if (StringLatin1.getChar(tv, toffset++) !=
2174                         StringUTF16.getChar(ov, ooffset++)) {
2175                         return false;
2176                     }
2177                 }
2178             } else {
2179                 while (len-- > 0) {
2180                     if (StringUTF16.getChar(tv, toffset++) !=
2181                         StringLatin1.getChar(ov, ooffset++)) {
2182                         return false;
2183                     }
2184                 }
2185             }
2186         }
2187         return true;
2188     }
2189 
2190     /**
2191      * Tests if two string regions are equal.
2192      * <p>
2193      * A substring of this {@code String} object is compared to a substring
2194      * of the argument {@code other}. The result is {@code true} if these
2195      * substrings represent Unicode code point sequences that are the same,
2196      * ignoring case if and only if {@code ignoreCase} is true.
2197      * The sequences {@code tsequence} and {@code osequence} are compared,
2198      * where {@code tsequence} is the sequence produced as if by calling
2199      * {@code this.substring(toffset, toffset + len).codePoints()} and
2200      * {@code osequence} is the sequence produced as if by calling
2201      * {@code other.substring(ooffset, ooffset + len).codePoints()}.
2202      * The result is {@code true} if and only if all of the following
2203      * are true:
2204      * <ul><li>{@code toffset} is non-negative.
2205      * <li>{@code ooffset} is non-negative.
2206      * <li>{@code toffset+len} is less than or equal to the length of this
2207      * {@code String} object.
2208      * <li>{@code ooffset+len} is less than or equal to the length of the other
2209      * argument.
2210      * <li>if {@code ignoreCase} is {@code false}, all pairs of corresponding Unicode
2211      * code points are equal integer values; or if {@code ignoreCase} is {@code true},
2212      * {@link Character#toLowerCase(int) Character.toLowerCase(}
2213      * {@link Character#toUpperCase(int)}{@code )} on all pairs of Unicode code points
2214      * results in equal integer values.
2215      * </ul>
2216      *
2217      * <p>Note that this method does <em>not</em> take locale into account,
2218      * and will result in unsatisfactory results for certain locales when
2219      * {@code ignoreCase} is {@code true}.  The {@link java.text.Collator} class
2220      * provides locale-sensitive comparison.
2221      *
2222      * @param   ignoreCase   if {@code true}, ignore case when comparing
2223      *                       characters.
2224      * @param   toffset      the starting offset of the subregion in this
2225      *                       string.
2226      * @param   other        the string argument.
2227      * @param   ooffset      the starting offset of the subregion in the string
2228      *                       argument.
2229      * @param   len          the number of characters (Unicode code units -
2230      *                       16bit {@code char} value) to compare.
2231      * @return  {@code true} if the specified subregion of this string
2232      *          matches the specified subregion of the string argument;
2233      *          {@code false} otherwise. Whether the matching is exact
2234      *          or case insensitive depends on the {@code ignoreCase}
2235      *          argument.
2236      * @see     #codePoints()
2237      */
2238     public boolean regionMatches(boolean ignoreCase, int toffset,
2239             String other, int ooffset, int len) {
2240         if (!ignoreCase) {
2241             return regionMatches(toffset, other, ooffset, len);
2242         }
2243         // Note: toffset, ooffset, or len might be near -1>>>1.
2244         if ((ooffset < 0) || (toffset < 0)
2245                 || (toffset > (long)length() - len)
2246                 || (ooffset > (long)other.length() - len)) {
2247             return false;
2248         }
2249         byte[] tv = value;
2250         byte[] ov = other.value;
2251         byte coder = coder();
2252         if (coder == other.coder()) {
2253             return coder == LATIN1
2254               ? StringLatin1.regionMatchesCI(tv, toffset, ov, ooffset, len)
2255               : StringUTF16.regionMatchesCI(tv, toffset, ov, ooffset, len);
2256         }
2257         return coder == LATIN1
2258               ? StringLatin1.regionMatchesCI_UTF16(tv, toffset, ov, ooffset, len)
2259               : StringUTF16.regionMatchesCI_Latin1(tv, toffset, ov, ooffset, len);
2260     }
2261 
2262     /**
2263      * Tests if the substring of this string beginning at the
2264      * specified index starts with the specified prefix.
2265      *
2266      * @param   prefix    the prefix.
2267      * @param   toffset   where to begin looking in this string.
2268      * @return  {@code true} if the character sequence represented by the
2269      *          argument is a prefix of the substring of this object starting
2270      *          at index {@code toffset}; {@code false} otherwise.
2271      *          The result is {@code false} if {@code toffset} is
2272      *          negative or greater than the length of this
2273      *          {@code String} object; otherwise the result is the same
2274      *          as the result of the expression
2275      *          <pre>
2276      *          this.substring(toffset).startsWith(prefix)
2277      *          </pre>
2278      */
2279     public boolean startsWith(String prefix, int toffset) {
2280         // Note: toffset might be near -1>>>1.
2281         if (toffset < 0 || toffset > length() - prefix.length()) {
2282             return false;
2283         }
2284         byte[] ta = value;
2285         byte[] pa = prefix.value;
2286         int po = 0;
2287         int pc = pa.length;
2288         byte coder = coder();
2289         if (coder == prefix.coder()) {
2290             if (coder == UTF16) {
2291                 toffset <<= UTF16;
2292             }
2293             return ArraysSupport.mismatch(ta, toffset,
2294                     pa, 0, pc) < 0;
2295         } else {
2296             if (coder == LATIN1) {  // && pcoder == UTF16
2297                 return false;
2298             }
2299             // coder == UTF16 && pcoder == LATIN1)
2300             while (po < pc) {
2301                 if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) {
2302                     return false;
2303                }
2304             }
2305         }
2306         return true;
2307     }
2308 
2309     /**
2310      * Tests if this string starts with the specified prefix.
2311      *
2312      * @param   prefix   the prefix.
2313      * @return  {@code true} if the character sequence represented by the
2314      *          argument is a prefix of the character sequence represented by
2315      *          this string; {@code false} otherwise.
2316      *          Note also that {@code true} will be returned if the
2317      *          argument is an empty string or is equal to this
2318      *          {@code String} object as determined by the
2319      *          {@link #equals(Object)} method.
2320      * @since   1.0
2321      */
2322     public boolean startsWith(String prefix) {
2323         return startsWith(prefix, 0);
2324     }
2325 
2326     /**
2327      * Tests if this string ends with the specified suffix.
2328      *
2329      * @param   suffix   the suffix.
2330      * @return  {@code true} if the character sequence represented by the
2331      *          argument is a suffix of the character sequence represented by
2332      *          this object; {@code false} otherwise. Note that the
2333      *          result will be {@code true} if the argument is the
2334      *          empty string or is equal to this {@code String} object
2335      *          as determined by the {@link #equals(Object)} method.
2336      */
2337     public boolean endsWith(String suffix) {
2338         return startsWith(suffix, length() - suffix.length());
2339     }
2340 
2341     /**
2342      * Returns a hash code for this string. The hash code for a
2343      * {@code String} object is computed as
2344      * <blockquote><pre>
2345      * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
2346      * </pre></blockquote>
2347      * using {@code int} arithmetic, where {@code s[i]} is the
2348      * <i>i</i>th character of the string, {@code n} is the length of
2349      * the string, and {@code ^} indicates exponentiation.
2350      * (The hash value of the empty string is zero.)
2351      *
2352      * @return  a hash code value for this object.
2353      */
2354     public int hashCode() {
2355         // The hash or hashIsZero fields are subject to a benign data race,
2356         // making it crucial to ensure that any observable result of the
2357         // calculation in this method stays correct under any possible read of
2358         // these fields. Necessary restrictions to allow this to be correct
2359         // without explicit memory fences or similar concurrency primitives is
2360         // that we can ever only write to one of these two fields for a given
2361         // String instance, and that the computation is idempotent and derived
2362         // from immutable state
2363         int h = hash;
2364         if (h == 0 && !hashIsZero) {
2365             h = isLatin1() ? StringLatin1.hashCode(value)
2366                            : StringUTF16.hashCode(value);
2367             if (h == 0) {
2368                 hashIsZero = true;
2369             } else {
2370                 hash = h;
2371             }
2372         }
2373         return h;
2374     }
2375 
2376     /**
2377      * Returns the index within this string of the first occurrence of
2378      * the specified character. If a character with value
2379      * {@code ch} occurs in the character sequence represented by
2380      * this {@code String} object, then the index (in Unicode
2381      * code units) of the first such occurrence is returned. For
2382      * values of {@code ch} in the range from 0 to 0xFFFF
2383      * (inclusive), this is the smallest value <i>k</i> such that:
2384      * <blockquote><pre>
2385      * this.charAt(<i>k</i>) == ch
2386      * </pre></blockquote>
2387      * is true. For other values of {@code ch}, it is the
2388      * smallest value <i>k</i> such that:
2389      * <blockquote><pre>
2390      * this.codePointAt(<i>k</i>) == ch
2391      * </pre></blockquote>
2392      * is true. In either case, if no such character occurs in this
2393      * string, then {@code -1} is returned.
2394      *
2395      * @param   ch   a character (Unicode code point).
2396      * @return  the index of the first occurrence of the character in the
2397      *          character sequence represented by this object, or
2398      *          {@code -1} if the character does not occur.
2399      */
2400     public int indexOf(int ch) {
2401         return indexOf(ch, 0);
2402     }
2403 
2404     /**
2405      * Returns the index within this string of the first occurrence of the
2406      * specified character, starting the search at the specified index.
2407      * <p>
2408      * If a character with value {@code ch} occurs in the
2409      * character sequence represented by this {@code String}
2410      * object at an index no smaller than {@code fromIndex}, then
2411      * the index of the first such occurrence is returned. For values
2412      * of {@code ch} in the range from 0 to 0xFFFF (inclusive),
2413      * this is the smallest value <i>k</i> such that:
2414      * <blockquote><pre>
2415      * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &gt;= fromIndex)
2416      * </pre></blockquote>
2417      * is true. For other values of {@code ch}, it is the
2418      * smallest value <i>k</i> such that:
2419      * <blockquote><pre>
2420      * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &gt;= fromIndex)
2421      * </pre></blockquote>
2422      * is true. In either case, if no such character occurs in this
2423      * string at or after position {@code fromIndex}, then
2424      * {@code -1} is returned.
2425      *
2426      * <p>
2427      * There is no restriction on the value of {@code fromIndex}. If it
2428      * is negative, it has the same effect as if it were zero: this entire
2429      * string may be searched. If it is greater than the length of this
2430      * string, it has the same effect as if it were equal to the length of
2431      * this string: {@code -1} is returned.
2432      *
2433      * <p>All indices are specified in {@code char} values
2434      * (Unicode code units).
2435      *
2436      * @param   ch          a character (Unicode code point).
2437      * @param   fromIndex   the index to start the search from.
2438      * @return  the index of the first occurrence of the character in the
2439      *          character sequence represented by this object that is greater
2440      *          than or equal to {@code fromIndex}, or {@code -1}
2441      *          if the character does not occur.
2442      *
2443      * @apiNote
2444      * Unlike {@link #substring(int)}, for example, this method does not throw
2445      * an exception when {@code fromIndex} is outside the valid range.
2446      * Rather, it returns -1 when {@code fromIndex} is larger than the length of
2447      * the string.
2448      * This result is, by itself, indistinguishable from a genuine absence of
2449      * {@code ch} in the string.
2450      * If stricter behavior is needed, {@link #indexOf(int, int, int)}
2451      * should be considered instead.
2452      * On a {@link String} {@code s}, for example,
2453      * {@code s.indexOf(ch, fromIndex, s.length())} would throw if
2454      * {@code fromIndex} were larger than the string length, or were negative.
2455      */
2456     public int indexOf(int ch, int fromIndex) {
2457         return isLatin1() ? StringLatin1.indexOf(value, ch, fromIndex, length())
2458                 : StringUTF16.indexOf(value, ch, fromIndex, length());
2459     }
2460 
2461     /**
2462      * Returns the index within this string of the first occurrence of the
2463      * specified character, starting the search at {@code beginIndex} and
2464      * stopping before {@code endIndex}.
2465      *
2466      * <p>If a character with value {@code ch} occurs in the
2467      * character sequence represented by this {@code String}
2468      * object at an index no smaller than {@code beginIndex} but smaller than
2469      * {@code endIndex}, then
2470      * the index of the first such occurrence is returned. For values
2471      * of {@code ch} in the range from 0 to 0xFFFF (inclusive),
2472      * this is the smallest value <i>k</i> such that:
2473      * <blockquote><pre>
2474      * (this.charAt(<i>k</i>) == ch) &amp;&amp; (beginIndex &lt;= <i>k</i> &lt; endIndex)
2475      * </pre></blockquote>
2476      * is true. For other values of {@code ch}, it is the
2477      * smallest value <i>k</i> such that:
2478      * <blockquote><pre>
2479      * (this.codePointAt(<i>k</i>) == ch) &amp;&amp; (beginIndex &lt;= <i>k</i> &lt; endIndex)
2480      * </pre></blockquote>
2481      * is true. In either case, if no such character occurs in this
2482      * string at or after position {@code beginIndex} and before position
2483      * {@code endIndex}, then {@code -1} is returned.
2484      *
2485      * <p>All indices are specified in {@code char} values
2486      * (Unicode code units).
2487      *
2488      * @param   ch          a character (Unicode code point).
2489      * @param   beginIndex  the index to start the search from (included).
2490      * @param   endIndex    the index to stop the search at (excluded).
2491      * @return  the index of the first occurrence of the character in the
2492      *          character sequence represented by this object that is greater
2493      *          than or equal to {@code beginIndex} and less than {@code endIndex},
2494      *          or {@code -1} if the character does not occur.
2495      * @throws  StringIndexOutOfBoundsException if {@code beginIndex}
2496      *          is negative, or {@code endIndex} is larger than the length of
2497      *          this {@code String} object, or {@code beginIndex} is larger than
2498      *          {@code endIndex}.
2499      * @since   21
2500      */
2501     public int indexOf(int ch, int beginIndex, int endIndex) {
2502         checkBoundsBeginEnd(beginIndex, endIndex, length());
2503         return isLatin1() ? StringLatin1.indexOf(value, ch, beginIndex, endIndex)
2504                 : StringUTF16.indexOf(value, ch, beginIndex, endIndex);
2505     }
2506 
2507     /**
2508      * Returns the index within this string of the last occurrence of
2509      * the specified character. For values of {@code ch} in the
2510      * range from 0 to 0xFFFF (inclusive), the index (in Unicode code
2511      * units) returned is the largest value <i>k</i> such that:
2512      * <blockquote><pre>
2513      * this.charAt(<i>k</i>) == ch
2514      * </pre></blockquote>
2515      * is true. For other values of {@code ch}, it is the
2516      * largest value <i>k</i> such that:
2517      * <blockquote><pre>
2518      * this.codePointAt(<i>k</i>) == ch
2519      * </pre></blockquote>
2520      * is true.  In either case, if no such character occurs in this
2521      * string, then {@code -1} is returned.  The
2522      * {@code String} is searched backwards starting at the last
2523      * character.
2524      *
2525      * @param   ch   a character (Unicode code point).
2526      * @return  the index of the last occurrence of the character in the
2527      *          character sequence represented by this object, or
2528      *          {@code -1} if the character does not occur.
2529      */
2530     public int lastIndexOf(int ch) {
2531         return lastIndexOf(ch, length() - 1);
2532     }
2533 
2534     /**
2535      * Returns the index within this string of the last occurrence of
2536      * the specified character, searching backward starting at the
2537      * specified index. For values of {@code ch} in the range
2538      * from 0 to 0xFFFF (inclusive), the index returned is the largest
2539      * value <i>k</i> such that:
2540      * <blockquote><pre>
2541      * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &lt;= fromIndex)
2542      * </pre></blockquote>
2543      * is true. For other values of {@code ch}, it is the
2544      * largest value <i>k</i> such that:
2545      * <blockquote><pre>
2546      * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &lt;= fromIndex)
2547      * </pre></blockquote>
2548      * is true. In either case, if no such character occurs in this
2549      * string at or before position {@code fromIndex}, then
2550      * {@code -1} is returned.
2551      *
2552      * <p>All indices are specified in {@code char} values
2553      * (Unicode code units).
2554      *
2555      * @param   ch          a character (Unicode code point).
2556      * @param   fromIndex   the index to start the search from. There is no
2557      *          restriction on the value of {@code fromIndex}. If it is
2558      *          greater than or equal to the length of this string, it has
2559      *          the same effect as if it were equal to one less than the
2560      *          length of this string: this entire string may be searched.
2561      *          If it is negative, it has the same effect as if it were -1:
2562      *          -1 is returned.
2563      * @return  the index of the last occurrence of the character in the
2564      *          character sequence represented by this object that is less
2565      *          than or equal to {@code fromIndex}, or {@code -1}
2566      *          if the character does not occur before that point.
2567      */
2568     public int lastIndexOf(int ch, int fromIndex) {
2569         return isLatin1() ? StringLatin1.lastIndexOf(value, ch, fromIndex)
2570                           : StringUTF16.lastIndexOf(value, ch, fromIndex);
2571     }
2572 
2573     /**
2574      * Returns the index within this string of the first occurrence of the
2575      * specified substring.
2576      *
2577      * <p>The returned index is the smallest value {@code k} for which:
2578      * <pre>{@code
2579      * this.startsWith(str, k)
2580      * }</pre>
2581      * If no such value of {@code k} exists, then {@code -1} is returned.
2582      *
2583      * @param   str   the substring to search for.
2584      * @return  the index of the first occurrence of the specified substring,
2585      *          or {@code -1} if there is no such occurrence.
2586      */
2587     public int indexOf(String str) {
2588         byte coder = coder();
2589         if (coder == str.coder()) {
2590             return isLatin1() ? StringLatin1.indexOf(value, str.value)
2591                               : StringUTF16.indexOf(value, str.value);
2592         }
2593         if (coder == LATIN1) {  // str.coder == UTF16
2594             return -1;
2595         }
2596         return StringUTF16.indexOfLatin1(value, str.value);
2597     }
2598 
2599     /**
2600      * Returns the index within this string of the first occurrence of the
2601      * specified substring, starting at the specified index.
2602      *
2603      * <p>The returned index is the smallest value {@code k} for which:
2604      * <pre>{@code
2605      *     k >= Math.min(fromIndex, this.length()) &&
2606      *                   this.startsWith(str, k)
2607      * }</pre>
2608      * If no such value of {@code k} exists, then {@code -1} is returned.
2609      *
2610      * @apiNote
2611      * Unlike {@link #substring(int)}, for example, this method does not throw
2612      * an exception when {@code fromIndex} is outside the valid range.
2613      * Rather, it returns -1 when {@code fromIndex} is larger than the length of
2614      * the string.
2615      * This result is, by itself, indistinguishable from a genuine absence of
2616      * {@code str} in the string.
2617      * If stricter behavior is needed, {@link #indexOf(String, int, int)}
2618      * should be considered instead.
2619      * On {@link String} {@code s} and a non-empty {@code str}, for example,
2620      * {@code s.indexOf(str, fromIndex, s.length())} would throw if
2621      * {@code fromIndex} were larger than the string length, or were negative.
2622      *
2623      * @param   str         the substring to search for.
2624      * @param   fromIndex   the index from which to start the search.
2625      * @return  the index of the first occurrence of the specified substring,
2626      *          starting at the specified index,
2627      *          or {@code -1} if there is no such occurrence.
2628      */
2629     public int indexOf(String str, int fromIndex) {
2630         return indexOf(value, coder(), length(), str, fromIndex);
2631     }
2632 
2633     /**
2634      * Returns the index of the first occurrence of the specified substring
2635      * within the specified index range of {@code this} string.
2636      *
2637      * <p>This method returns the same result as the one of the invocation
2638      * <pre>{@code
2639      *     s.substring(beginIndex, endIndex).indexOf(str) + beginIndex
2640      * }</pre>
2641      * if the index returned by {@link #indexOf(String)} is non-negative,
2642      * and returns -1 otherwise.
2643      * (No substring is instantiated, though.)
2644      *
2645      * @param   str         the substring to search for.
2646      * @param   beginIndex  the index to start the search from (included).
2647      * @param   endIndex    the index to stop the search at (excluded).
2648      * @return  the index of the first occurrence of the specified substring
2649      *          within the specified index range,
2650      *          or {@code -1} if there is no such occurrence.
2651      * @throws  StringIndexOutOfBoundsException if {@code beginIndex}
2652      *          is negative, or {@code endIndex} is larger than the length of
2653      *          this {@code String} object, or {@code beginIndex} is larger than
2654      *          {@code endIndex}.
2655      * @since   21
2656      */
2657     public int indexOf(String str, int beginIndex, int endIndex) {
2658         if (str.length() == 1) {
2659             /* Simple optimization, can be omitted without behavioral impact */
2660             return indexOf(str.charAt(0), beginIndex, endIndex);
2661         }
2662         checkBoundsBeginEnd(beginIndex, endIndex, length());
2663         return indexOf(value, coder(), endIndex, str, beginIndex);
2664     }
2665 
2666     /**
2667      * Code shared by String and AbstractStringBuilder to do searches. The
2668      * source is the character array being searched, and the target
2669      * is the string being searched for.
2670      *
2671      * @param   src       the characters being searched.
2672      * @param   srcCoder  the coder of the source string.
2673      * @param   srcCount  last index (exclusive) in the source string.
2674      * @param   tgtStr    the characters being searched for.
2675      * @param   fromIndex the index to begin searching from.
2676      */
2677     static int indexOf(byte[] src, byte srcCoder, int srcCount,
2678                        String tgtStr, int fromIndex) {
2679         fromIndex = Math.clamp(fromIndex, 0, srcCount);
2680         int tgtCount = tgtStr.length();
2681         if (tgtCount > srcCount - fromIndex) {
2682             return -1;
2683         }
2684         if (tgtCount == 0) {
2685             return fromIndex;
2686         }
2687 
2688         byte[] tgt = tgtStr.value;
2689         byte tgtCoder = tgtStr.coder();
2690         if (srcCoder == tgtCoder) {
2691             return srcCoder == LATIN1
2692                 ? StringLatin1.indexOf(src, srcCount, tgt, tgtCount, fromIndex)
2693                 : StringUTF16.indexOf(src, srcCount, tgt, tgtCount, fromIndex);
2694         }
2695         if (srcCoder == LATIN1) {    //  && tgtCoder == UTF16
2696             return -1;
2697         }
2698         // srcCoder == UTF16 && tgtCoder == LATIN1) {
2699         return StringUTF16.indexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex);
2700     }
2701 
2702     /**
2703      * Returns the index within this string of the last occurrence of the
2704      * specified substring.  The last occurrence of the empty string ""
2705      * is considered to occur at the index value {@code this.length()}.
2706      *
2707      * <p>The returned index is the largest value {@code k} for which:
2708      * <pre>{@code
2709      * this.startsWith(str, k)
2710      * }</pre>
2711      * If no such value of {@code k} exists, then {@code -1} is returned.
2712      *
2713      * @param   str   the substring to search for.
2714      * @return  the index of the last occurrence of the specified substring,
2715      *          or {@code -1} if there is no such occurrence.
2716      */
2717     public int lastIndexOf(String str) {
2718         return lastIndexOf(str, length());
2719     }
2720 
2721     /**
2722      * Returns the index within this string of the last occurrence of the
2723      * specified substring, searching backward starting at the specified index.
2724      *
2725      * <p>The returned index is the largest value {@code k} for which:
2726      * <pre>{@code
2727      *     k <= Math.min(fromIndex, this.length()) &&
2728      *                   this.startsWith(str, k)
2729      * }</pre>
2730      * If no such value of {@code k} exists, then {@code -1} is returned.
2731      *
2732      * @param   str         the substring to search for.
2733      * @param   fromIndex   the index to start the search from.
2734      * @return  the index of the last occurrence of the specified substring,
2735      *          searching backward from the specified index,
2736      *          or {@code -1} if there is no such occurrence.
2737      */
2738     public int lastIndexOf(String str, int fromIndex) {
2739         return lastIndexOf(value, coder(), length(), str, fromIndex);
2740     }
2741 
2742     /**
2743      * Code shared by String and AbstractStringBuilder to do searches. The
2744      * source is the character array being searched, and the target
2745      * is the string being searched for.
2746      *
2747      * @param   src         the characters being searched.
2748      * @param   srcCoder    coder handles the mapping between bytes/chars
2749      * @param   srcCount    count of the source string.
2750      * @param   tgtStr      the characters being searched for.
2751      * @param   fromIndex   the index to begin searching from.
2752      */
2753     static int lastIndexOf(byte[] src, byte srcCoder, int srcCount,
2754                            String tgtStr, int fromIndex) {
2755         byte[] tgt = tgtStr.value;
2756         byte tgtCoder = tgtStr.coder();
2757         int tgtCount = tgtStr.length();
2758         /*
2759          * Check arguments; return immediately where possible. For
2760          * consistency, don't check for null str.
2761          */
2762         int rightIndex = srcCount - tgtCount;
2763         if (fromIndex > rightIndex) {
2764             fromIndex = rightIndex;
2765         }
2766         if (fromIndex < 0) {
2767             return -1;
2768         }
2769         /* Empty string always matches. */
2770         if (tgtCount == 0) {
2771             return fromIndex;
2772         }
2773         if (srcCoder == tgtCoder) {
2774             return srcCoder == LATIN1
2775                 ? StringLatin1.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex)
2776                 : StringUTF16.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex);
2777         }
2778         if (srcCoder == LATIN1) {    // && tgtCoder == UTF16
2779             return -1;
2780         }
2781         // srcCoder == UTF16 && tgtCoder == LATIN1
2782         return StringUTF16.lastIndexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex);
2783     }
2784 
2785     /**
2786      * Returns a string that is a substring of this string. The
2787      * substring begins with the character at the specified index and
2788      * extends to the end of this string. <p>
2789      * Examples:
2790      * <blockquote><pre>
2791      * "unhappy".substring(2) returns "happy"
2792      * "Harbison".substring(3) returns "bison"
2793      * "emptiness".substring(9) returns "" (an empty string)
2794      * </pre></blockquote>
2795      *
2796      * @param      beginIndex   the beginning index, inclusive.
2797      * @return     the specified substring.
2798      * @throws     IndexOutOfBoundsException  if
2799      *             {@code beginIndex} is negative or larger than the
2800      *             length of this {@code String} object.
2801      */
2802     public String substring(int beginIndex) {
2803         return substring(beginIndex, length());
2804     }
2805 
2806     /**
2807      * Returns a string that is a substring of this string. The
2808      * substring begins at the specified {@code beginIndex} and
2809      * extends to the character at index {@code endIndex - 1}.
2810      * Thus the length of the substring is {@code endIndex-beginIndex}.
2811      * <p>
2812      * Examples:
2813      * <blockquote><pre>
2814      * "hamburger".substring(4, 8) returns "urge"
2815      * "smiles".substring(1, 5) returns "mile"
2816      * </pre></blockquote>
2817      *
2818      * @param      beginIndex   the beginning index, inclusive.
2819      * @param      endIndex     the ending index, exclusive.
2820      * @return     the specified substring.
2821      * @throws     IndexOutOfBoundsException  if the
2822      *             {@code beginIndex} is negative, or
2823      *             {@code endIndex} is larger than the length of
2824      *             this {@code String} object, or
2825      *             {@code beginIndex} is larger than
2826      *             {@code endIndex}.
2827      */
2828     public String substring(int beginIndex, int endIndex) {
2829         int length = length();
2830         checkBoundsBeginEnd(beginIndex, endIndex, length);
2831         if (beginIndex == 0 && endIndex == length) {
2832             return this;
2833         }
2834         int subLen = endIndex - beginIndex;
2835         return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen)
2836                           : StringUTF16.newString(value, beginIndex, subLen);
2837     }
2838 
2839     /**
2840      * Returns a character sequence that is a subsequence of this sequence.
2841      *
2842      * <p> An invocation of this method of the form
2843      *
2844      * <blockquote><pre>
2845      * str.subSequence(begin,&nbsp;end)</pre></blockquote>
2846      *
2847      * behaves in exactly the same way as the invocation
2848      *
2849      * <blockquote><pre>
2850      * str.substring(begin,&nbsp;end)</pre></blockquote>
2851      *
2852      * @apiNote
2853      * This method is defined so that the {@code String} class can implement
2854      * the {@link CharSequence} interface.
2855      *
2856      * @param   beginIndex   the begin index, inclusive.
2857      * @param   endIndex     the end index, exclusive.
2858      * @return  the specified subsequence.
2859      *
2860      * @throws  IndexOutOfBoundsException
2861      *          if {@code beginIndex} or {@code endIndex} is negative,
2862      *          if {@code endIndex} is greater than {@code length()},
2863      *          or if {@code beginIndex} is greater than {@code endIndex}
2864      *
2865      * @since 1.4
2866      */
2867     public CharSequence subSequence(int beginIndex, int endIndex) {
2868         return this.substring(beginIndex, endIndex);
2869     }
2870 
2871     /**
2872      * Concatenates the specified string to the end of this string.
2873      * <p>
2874      * If the length of the argument string is {@code 0}, then this
2875      * {@code String} object is returned. Otherwise, a
2876      * {@code String} object is returned that represents a character
2877      * sequence that is the concatenation of the character sequence
2878      * represented by this {@code String} object and the character
2879      * sequence represented by the argument string.<p>
2880      * Examples:
2881      * <blockquote><pre>
2882      * "cares".concat("s") returns "caress"
2883      * "to".concat("get").concat("her") returns "together"
2884      * </pre></blockquote>
2885      *
2886      * @param   str   the {@code String} that is concatenated to the end
2887      *                of this {@code String}.
2888      * @return  a string that represents the concatenation of this object's
2889      *          characters followed by the string argument's characters.
2890      */
2891     public String concat(String str) {
2892         if (str.isEmpty()) {
2893             return this;
2894         }
2895         return StringConcatHelper.simpleConcat(this, str);
2896     }
2897 
2898     /**
2899      * Returns a string resulting from replacing all occurrences of
2900      * {@code oldChar} in this string with {@code newChar}.
2901      * <p>
2902      * If the character {@code oldChar} does not occur in the
2903      * character sequence represented by this {@code String} object,
2904      * then a reference to this {@code String} object is returned.
2905      * Otherwise, a {@code String} object is returned that
2906      * represents a character sequence identical to the character sequence
2907      * represented by this {@code String} object, except that every
2908      * occurrence of {@code oldChar} is replaced by an occurrence
2909      * of {@code newChar}.
2910      * <p>
2911      * Examples:
2912      * <blockquote><pre>
2913      * "mesquite in your cellar".replace('e', 'o')
2914      *         returns "mosquito in your collar"
2915      * "the war of baronets".replace('r', 'y')
2916      *         returns "the way of bayonets"
2917      * "sparring with a purple porpoise".replace('p', 't')
2918      *         returns "starring with a turtle tortoise"
2919      * "JonL".replace('q', 'x') returns "JonL" (no change)
2920      * </pre></blockquote>
2921      *
2922      * @param   oldChar   the old character.
2923      * @param   newChar   the new character.
2924      * @return  a string derived from this string by replacing every
2925      *          occurrence of {@code oldChar} with {@code newChar}.
2926      */
2927     public String replace(char oldChar, char newChar) {
2928         if (oldChar != newChar) {
2929             String ret = isLatin1() ? StringLatin1.replace(value, oldChar, newChar)
2930                                     : StringUTF16.replace(value, oldChar, newChar);
2931             if (ret != null) {
2932                 return ret;
2933             }
2934         }
2935         return this;
2936     }
2937 
2938     /**
2939      * Tells whether or not this string matches the given <a
2940      * href="../util/regex/Pattern.html#sum">regular expression</a>.
2941      *
2942      * <p> An invocation of this method of the form
2943      * <i>str</i>{@code .matches(}<i>regex</i>{@code )} yields exactly the
2944      * same result as the expression
2945      *
2946      * <blockquote>
2947      * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#matches(String,CharSequence)
2948      * matches(<i>regex</i>, <i>str</i>)}
2949      * </blockquote>
2950      *
2951      * @param   regex
2952      *          the regular expression to which this string is to be matched
2953      *
2954      * @return  {@code true} if, and only if, this string matches the
2955      *          given regular expression
2956      *
2957      * @throws  PatternSyntaxException
2958      *          if the regular expression's syntax is invalid
2959      *
2960      * @see java.util.regex.Pattern
2961      *
2962      * @since 1.4
2963      */
2964     public boolean matches(String regex) {
2965         return Pattern.matches(regex, this);
2966     }
2967 
2968     /**
2969      * Returns true if and only if this string contains the specified
2970      * sequence of char values.
2971      *
2972      * @param s the sequence to search for
2973      * @return true if this string contains {@code s}, false otherwise
2974      * @since 1.5
2975      */
2976     public boolean contains(CharSequence s) {
2977         return indexOf(s.toString()) >= 0;
2978     }
2979 
2980     /**
2981      * Replaces the first substring of this string that matches the given <a
2982      * href="../util/regex/Pattern.html#sum">regular expression</a> with the
2983      * given replacement.
2984      *
2985      * <p> An invocation of this method of the form
2986      * <i>str</i>{@code .replaceFirst(}<i>regex</i>{@code ,} <i>repl</i>{@code )}
2987      * yields exactly the same result as the expression
2988      *
2989      * <blockquote>
2990      * <code>
2991      * {@link java.util.regex.Pattern}.{@link
2992      * java.util.regex.Pattern#compile(String) compile}(<i>regex</i>).{@link
2993      * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link
2994      * java.util.regex.Matcher#replaceFirst(String) replaceFirst}(<i>repl</i>)
2995      * </code>
2996      * </blockquote>
2997      *
2998      *<p>
2999      * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the
3000      * replacement string may cause the results to be different than if it were
3001      * being treated as a literal replacement string; see
3002      * {@link java.util.regex.Matcher#replaceFirst}.
3003      * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
3004      * meaning of these characters, if desired.
3005      *
3006      * @param   regex
3007      *          the regular expression to which this string is to be matched
3008      * @param   replacement
3009      *          the string to be substituted for the first match
3010      *
3011      * @return  The resulting {@code String}
3012      *
3013      * @throws  PatternSyntaxException
3014      *          if the regular expression's syntax is invalid
3015      *
3016      * @see java.util.regex.Pattern
3017      *
3018      * @since 1.4
3019      */
3020     public String replaceFirst(String regex, String replacement) {
3021         return Pattern.compile(regex).matcher(this).replaceFirst(replacement);
3022     }
3023 
3024     /**
3025      * Replaces each substring of this string that matches the given <a
3026      * href="../util/regex/Pattern.html#sum">regular expression</a> with the
3027      * given replacement.
3028      *
3029      * <p> An invocation of this method of the form
3030      * <i>str</i>{@code .replaceAll(}<i>regex</i>{@code ,} <i>repl</i>{@code )}
3031      * yields exactly the same result as the expression
3032      *
3033      * <blockquote>
3034      * <code>
3035      * {@link java.util.regex.Pattern}.{@link
3036      * java.util.regex.Pattern#compile(String) compile}(<i>regex</i>).{@link
3037      * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link
3038      * java.util.regex.Matcher#replaceAll(String) replaceAll}(<i>repl</i>)
3039      * </code>
3040      * </blockquote>
3041      *
3042      *<p>
3043      * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the
3044      * replacement string may cause the results to be different than if it were
3045      * being treated as a literal replacement string; see
3046      * {@link java.util.regex.Matcher#replaceAll Matcher.replaceAll}.
3047      * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
3048      * meaning of these characters, if desired.
3049      *
3050      * @param   regex
3051      *          the regular expression to which this string is to be matched
3052      * @param   replacement
3053      *          the string to be substituted for each match
3054      *
3055      * @return  The resulting {@code String}
3056      *
3057      * @throws  PatternSyntaxException
3058      *          if the regular expression's syntax is invalid
3059      *
3060      * @see java.util.regex.Pattern
3061      *
3062      * @since 1.4
3063      */
3064     public String replaceAll(String regex, String replacement) {
3065         return Pattern.compile(regex).matcher(this).replaceAll(replacement);
3066     }
3067 
3068     /**
3069      * Replaces each substring of this string that matches the literal target
3070      * sequence with the specified literal replacement sequence. The
3071      * replacement proceeds from the beginning of the string to the end, for
3072      * example, replacing "aa" with "b" in the string "aaa" will result in
3073      * "ba" rather than "ab".
3074      *
3075      * @param  target The sequence of char values to be replaced
3076      * @param  replacement The replacement sequence of char values
3077      * @return  The resulting string
3078      * @since 1.5
3079      */
3080     public String replace(CharSequence target, CharSequence replacement) {
3081         String trgtStr = target.toString();
3082         String replStr = replacement.toString();
3083         int thisLen = length();
3084         int trgtLen = trgtStr.length();
3085         int replLen = replStr.length();
3086 
3087         if (trgtLen > 0) {
3088             if (trgtLen == 1 && replLen == 1) {
3089                 return replace(trgtStr.charAt(0), replStr.charAt(0));
3090             }
3091 
3092             boolean thisIsLatin1 = this.isLatin1();
3093             boolean trgtIsLatin1 = trgtStr.isLatin1();
3094             boolean replIsLatin1 = replStr.isLatin1();
3095             String ret = (thisIsLatin1 && trgtIsLatin1 && replIsLatin1)
3096                     ? StringLatin1.replace(value, thisLen,
3097                                            trgtStr.value, trgtLen,
3098                                            replStr.value, replLen)
3099                     : StringUTF16.replace(value, thisLen, thisIsLatin1,
3100                                           trgtStr.value, trgtLen, trgtIsLatin1,
3101                                           replStr.value, replLen, replIsLatin1);
3102             if (ret != null) {
3103                 return ret;
3104             }
3105             return this;
3106 
3107         } else { // trgtLen == 0
3108             int resultLen;
3109             try {
3110                 resultLen = Math.addExact(thisLen, Math.multiplyExact(
3111                         Math.addExact(thisLen, 1), replLen));
3112             } catch (ArithmeticException ignored) {
3113                 throw new OutOfMemoryError("Required length exceeds implementation limit");
3114             }
3115 
3116             StringBuilder sb = new StringBuilder(resultLen);
3117             sb.append(replStr);
3118             for (int i = 0; i < thisLen; ++i) {
3119                 sb.append(charAt(i)).append(replStr);
3120             }
3121             return sb.toString();
3122         }
3123     }
3124 
3125     /**
3126      * Splits this string around matches of the given
3127      * <a href="../util/regex/Pattern.html#sum">regular expression</a>.
3128      *
3129      * <p> The array returned by this method contains each substring of this
3130      * string that is terminated by another substring that matches the given
3131      * expression or is terminated by the end of the string.  The substrings in
3132      * the array are in the order in which they occur in this string.  If the
3133      * expression does not match any part of the input then the resulting array
3134      * has just one element, namely this string.
3135      *
3136      * <p> When there is a positive-width match at the beginning of this
3137      * string then an empty leading substring is included at the beginning
3138      * of the resulting array. A zero-width match at the beginning however
3139      * never produces such empty leading substring.
3140      *
3141      * <p> The {@code limit} parameter controls the number of times the
3142      * pattern is applied and therefore affects the length of the resulting
3143      * array.
3144      * <ul>
3145      *    <li><p>
3146      *    If the <i>limit</i> is positive then the pattern will be applied
3147      *    at most <i>limit</i>&nbsp;-&nbsp;1 times, the array's length will be
3148      *    no greater than <i>limit</i>, and the array's last entry will contain
3149      *    all input beyond the last matched delimiter.</p></li>
3150      *
3151      *    <li><p>
3152      *    If the <i>limit</i> is zero then the pattern will be applied as
3153      *    many times as possible, the array can have any length, and trailing
3154      *    empty strings will be discarded.</p></li>
3155      *
3156      *    <li><p>
3157      *    If the <i>limit</i> is negative then the pattern will be applied
3158      *    as many times as possible and the array can have any length.</p></li>
3159      * </ul>
3160      *
3161      * <p> The string {@code "boo:and:foo"}, for example, yields the
3162      * following results with these parameters:
3163      *
3164      * <blockquote><table class="plain">
3165      * <caption style="display:none">Split example showing regex, limit, and result</caption>
3166      * <thead>
3167      * <tr>
3168      *     <th scope="col">Regex</th>
3169      *     <th scope="col">Limit</th>
3170      *     <th scope="col">Result</th>
3171      * </tr>
3172      * </thead>
3173      * <tbody>
3174      * <tr><th scope="row" rowspan="3" style="font-weight:normal">:</th>
3175      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th>
3176      *     <td>{@code { "boo", "and:foo" }}</td></tr>
3177      * <tr><!-- : -->
3178      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
3179      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
3180      * <tr><!-- : -->
3181      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
3182      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
3183      * <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th>
3184      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
3185      *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
3186      * <tr><!-- o -->
3187      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
3188      *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
3189      * <tr><!-- o -->
3190      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th>
3191      *     <td>{@code { "b", "", ":and:f" }}</td></tr>
3192      * </tbody>
3193      * </table></blockquote>
3194      *
3195      * <p> An invocation of this method of the form
3196      * <i>str.</i>{@code split(}<i>regex</i>{@code ,}&nbsp;<i>n</i>{@code )}
3197      * yields the same result as the expression
3198      *
3199      * <blockquote>
3200      * <code>
3201      * {@link java.util.regex.Pattern}.{@link
3202      * java.util.regex.Pattern#compile(String) compile}(<i>regex</i>).{@link
3203      * java.util.regex.Pattern#split(java.lang.CharSequence,int) split}(<i>str</i>,&nbsp;<i>n</i>)
3204      * </code>
3205      * </blockquote>
3206      *
3207      *
3208      * @param  regex
3209      *         the delimiting regular expression
3210      *
3211      * @param  limit
3212      *         the result threshold, as described above
3213      *
3214      * @return  the array of strings computed by splitting this string
3215      *          around matches of the given regular expression
3216      *
3217      * @throws  PatternSyntaxException
3218      *          if the regular expression's syntax is invalid
3219      *
3220      * @see java.util.regex.Pattern
3221      *
3222      * @since 1.4
3223      */
3224     public String[] split(String regex, int limit) {
3225         return split(regex, limit, false);
3226     }
3227 
3228     /**
3229      * Splits this string around matches of the given regular expression and
3230      * returns both the strings and the matching delimiters.
3231      *
3232      * <p> The array returned by this method contains each substring of this
3233      * string that is terminated by another substring that matches the given
3234      * expression or is terminated by the end of the string.
3235      * Each substring is immediately followed by the subsequence (the delimiter)
3236      * that matches the given expression, <em>except</em> for the last
3237      * substring, which is not followed by anything.
3238      * The substrings in the array and the delimiters are in the order in which
3239      * they occur in the input.
3240      * If the expression does not match any part of the input then the resulting
3241      * array has just one element, namely this string.
3242      *
3243      * <p> When there is a positive-width match at the beginning of this
3244      * string then an empty leading substring is included at the beginning
3245      * of the resulting array. A zero-width match at the beginning however
3246      * never produces such empty leading substring nor the empty delimiter.
3247      *
3248      * <p> The {@code limit} parameter controls the number of times the
3249      * pattern is applied and therefore affects the length of the resulting
3250      * array.
3251      * <ul>
3252      *    <li> If the <i>limit</i> is positive then the pattern will be applied
3253      *    at most <i>limit</i>&nbsp;-&nbsp;1 times, the array's length will be
3254      *    no greater than 2 &times; <i>limit</i> - 1, and the array's last
3255      *    entry will contain all input beyond the last matched delimiter.</li>
3256      *
3257      *    <li> If the <i>limit</i> is zero then the pattern will be applied as
3258      *    many times as possible, the array can have any length, and trailing
3259      *    empty strings will be discarded.</li>
3260      *
3261      *    <li> If the <i>limit</i> is negative then the pattern will be applied
3262      *    as many times as possible and the array can have any length.</li>
3263      * </ul>
3264      *
3265      * <p> The input {@code "boo:::and::foo"}, for example, yields the following
3266      * results with these parameters:
3267      *
3268      * <table class="plain" style="margin-left:2em;">
3269      * <caption style="display:none">Split example showing regex, limit, and result</caption>
3270      * <thead>
3271      * <tr>
3272      *     <th scope="col">Regex</th>
3273      *     <th scope="col">Limit</th>
3274      *     <th scope="col">Result</th>
3275      * </tr>
3276      * </thead>
3277      * <tbody>
3278      * <tr><th scope="row" rowspan="3" style="font-weight:normal">:+</th>
3279      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th>
3280      *     <td>{@code { "boo", ":::", "and::foo" }}</td></tr>
3281      * <tr><!-- : -->
3282      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
3283      *     <td>{@code { "boo", ":::", "and", "::", "foo" }}</td></tr>
3284      * <tr><!-- : -->
3285      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-1</th>
3286      *     <td>{@code { "boo", ":::", "and", "::", "foo" }}</td></tr>
3287      * <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th>
3288      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
3289      *     <td>{@code { "b", "o", "", "o", ":::and::f", "o", "", "o", "" }}</td></tr>
3290      * <tr><!-- o -->
3291      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-1</th>
3292      *     <td>{@code { "b", "o", "", "o", ":::and::f", "o", "", "o", "" }}</td></tr>
3293      * <tr><!-- o -->
3294      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th>
3295      *     <td>{@code { "b", "o", "", "o", ":::and::f", "o", "", "o" }}</td></tr>
3296      * </tbody>
3297      * </table>
3298      *
3299      * @apiNote An invocation of this method of the form
3300      * <i>str.</i>{@code splitWithDelimiters(}<i>regex</i>{@code ,}&nbsp;<i>n</i>{@code )}
3301      * yields the same result as the expression
3302      *
3303      * <blockquote>
3304      * <code>
3305      * {@link java.util.regex.Pattern}.{@link
3306      * java.util.regex.Pattern#compile(String) compile}(<i>regex</i>).{@link
3307      * java.util.regex.Pattern#splitWithDelimiters(CharSequence,int) splitWithDelimiters}(<i>str</i>,&nbsp;<i>n</i>)
3308      * </code>
3309      * </blockquote>
3310      *
3311      * @param  regex
3312      *         the delimiting regular expression
3313      *
3314      * @param  limit
3315      *         the result threshold, as described above
3316      *
3317      * @return  the array of strings computed by splitting this string
3318      *          around matches of the given regular expression, alternating
3319      *          substrings and matching delimiters
3320      *
3321      * @since   21
3322      */
3323     public String[] splitWithDelimiters(String regex, int limit) {
3324         return split(regex, limit, true);
3325     }
3326 
3327     private String[] split(String regex, int limit, boolean withDelimiters) {
3328         /* fastpath if the regex is a
3329          * (1) one-char String and this character is not one of the
3330          *     RegEx's meta characters ".$|()[{^?*+\\", or
3331          * (2) two-char String and the first char is the backslash and
3332          *     the second is not the ascii digit or ascii letter.
3333          */
3334         char ch = 0;
3335         if (((regex.length() == 1 &&
3336                 ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
3337                 (regex.length() == 2 &&
3338                         regex.charAt(0) == '\\' &&
3339                         (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
3340                         ((ch-'a')|('z'-ch)) < 0 &&
3341                         ((ch-'A')|('Z'-ch)) < 0)) &&
3342                 (ch < Character.MIN_HIGH_SURROGATE ||
3343                         ch > Character.MAX_LOW_SURROGATE))
3344         {
3345             // All the checks above can potentially be constant folded by
3346             // a JIT/AOT compiler when the regex is a constant string.
3347             // That requires method inlining of the checks, which is only
3348             // possible when the actual split logic is in a separate method
3349             // because the large split loop can usually not be inlined.
3350             return split(ch, limit, withDelimiters);
3351         }
3352         Pattern pattern = Pattern.compile(regex);
3353         return withDelimiters
3354                 ? pattern.splitWithDelimiters(this, limit)
3355                 : pattern.split(this, limit);
3356     }
3357 
3358     private String[] split(char ch, int limit, boolean withDelimiters) {
3359         int matchCount = 0;
3360         int off = 0;
3361         int next;
3362         boolean limited = limit > 0;
3363         ArrayList<String> list = new ArrayList<>();
3364         String del = withDelimiters ? String.valueOf(ch) : null;
3365         while ((next = indexOf(ch, off)) != -1) {
3366             if (!limited || matchCount < limit - 1) {
3367                 list.add(substring(off, next));
3368                 if (withDelimiters) {
3369                     list.add(del);
3370                 }
3371                 off = next + 1;
3372                 ++matchCount;
3373             } else {    // last one
3374                 int last = length();
3375                 list.add(substring(off, last));
3376                 off = last;
3377                 ++matchCount;
3378                 break;
3379             }
3380         }
3381         // If no match was found, return this
3382         if (off == 0)
3383             return new String[] {this};
3384 
3385         // Add remaining segment
3386         if (!limited || matchCount < limit)
3387             list.add(substring(off, length()));
3388 
3389         // Construct result
3390         int resultSize = list.size();
3391         if (limit == 0) {
3392             while (resultSize > 0 && list.get(resultSize - 1).isEmpty()) {
3393                 resultSize--;
3394             }
3395         }
3396         String[] result = new String[resultSize];
3397         return list.subList(0, resultSize).toArray(result);
3398     }
3399 
3400     /**
3401      * Splits this string around matches of the given <a
3402      * href="../util/regex/Pattern.html#sum">regular expression</a>.
3403      *
3404      * <p> This method works as if by invoking the two-argument {@link
3405      * #split(String, int) split} method with the given expression and a limit
3406      * argument of zero.  Trailing empty strings are therefore not included in
3407      * the resulting array.
3408      *
3409      * <p> The string {@code "boo:and:foo"}, for example, yields the following
3410      * results with these expressions:
3411      *
3412      * <blockquote><table class="plain">
3413      * <caption style="display:none">Split examples showing regex and result</caption>
3414      * <thead>
3415      * <tr>
3416      *  <th scope="col">Regex</th>
3417      *  <th scope="col">Result</th>
3418      * </tr>
3419      * </thead>
3420      * <tbody>
3421      * <tr><th scope="row" style="text-weight:normal">:</th>
3422      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
3423      * <tr><th scope="row" style="text-weight:normal">o</th>
3424      *     <td>{@code { "b", "", ":and:f" }}</td></tr>
3425      * </tbody>
3426      * </table></blockquote>
3427      *
3428      *
3429      * @param  regex
3430      *         the delimiting regular expression
3431      *
3432      * @return  the array of strings computed by splitting this string
3433      *          around matches of the given regular expression
3434      *
3435      * @throws  PatternSyntaxException
3436      *          if the regular expression's syntax is invalid
3437      *
3438      * @see java.util.regex.Pattern
3439      *
3440      * @since 1.4
3441      */
3442     public String[] split(String regex) {
3443         return split(regex, 0, false);
3444     }
3445 
3446     /**
3447      * Returns a new String composed of copies of the
3448      * {@code CharSequence elements} joined together with a copy of
3449      * the specified {@code delimiter}.
3450      *
3451      * <blockquote>For example,
3452      * <pre>{@code
3453      *     String message = String.join("-", "Java", "is", "cool");
3454      *     // message returned is: "Java-is-cool"
3455      * }</pre></blockquote>
3456      *
3457      * Note that if an element is null, then {@code "null"} is added.
3458      *
3459      * @param  delimiter the delimiter that separates each element
3460      * @param  elements the elements to join together.
3461      *
3462      * @return a new {@code String} that is composed of the {@code elements}
3463      *         separated by the {@code delimiter}
3464      *
3465      * @throws NullPointerException If {@code delimiter} or {@code elements}
3466      *         is {@code null}
3467      *
3468      * @see java.util.StringJoiner
3469      * @since 1.8
3470      */
3471     public static String join(CharSequence delimiter, CharSequence... elements) {
3472         var delim = delimiter.toString();
3473         var elems = new String[elements.length];
3474         for (int i = 0; i < elements.length; i++) {
3475             elems[i] = String.valueOf(elements[i]);
3476         }
3477         return join("", "", delim, elems, elems.length);
3478     }
3479 
3480     /**
3481      * Designated join routine.
3482      *
3483      * @param prefix the non-null prefix
3484      * @param suffix the non-null suffix
3485      * @param delimiter the non-null delimiter
3486      * @param elements the non-null array of non-null elements
3487      * @param size the number of elements in the array (<= elements.length)
3488      * @return the joined string
3489      */
3490     @ForceInline
3491     static String join(String prefix, String suffix, String delimiter, String[] elements, int size) {
3492         int icoder = prefix.coder() | suffix.coder();
3493         long len = (long) prefix.length() + suffix.length();
3494         if (size > 1) { // when there are more than one element, size - 1 delimiters will be emitted
3495             len += (long) (size - 1) * delimiter.length();
3496             icoder |= delimiter.coder();
3497         }
3498         // assert len > 0L; // max: (long) Integer.MAX_VALUE << 32
3499         // following loop will add max: (long) Integer.MAX_VALUE * Integer.MAX_VALUE to len
3500         // so len can overflow at most once
3501         for (int i = 0; i < size; i++) {
3502             var el = elements[i];
3503             len += el.length();
3504             icoder |= el.coder();
3505         }
3506         byte coder = (byte) icoder;
3507         // long len overflow check, char -> byte length, int len overflow check
3508         if (len < 0L || (len <<= coder) != (int) len) {
3509             throw new OutOfMemoryError("Requested string length exceeds VM limit");
3510         }
3511         byte[] value = StringConcatHelper.newArray(len);
3512 
3513         int off = 0;
3514         prefix.getBytes(value, off, coder); off += prefix.length();
3515         if (size > 0) {
3516             var el = elements[0];
3517             el.getBytes(value, off, coder); off += el.length();
3518             for (int i = 1; i < size; i++) {
3519                 delimiter.getBytes(value, off, coder); off += delimiter.length();
3520                 el = elements[i];
3521                 el.getBytes(value, off, coder); off += el.length();
3522             }
3523         }
3524         suffix.getBytes(value, off, coder);
3525         // assert off + suffix.length() == value.length >> coder;
3526 
3527         return new String(value, coder);
3528     }
3529 
3530     /**
3531      * Returns a new {@code String} composed of copies of the
3532      * {@code CharSequence elements} joined together with a copy of the
3533      * specified {@code delimiter}.
3534      *
3535      * <blockquote>For example,
3536      * <pre>{@code
3537      *     List<String> strings = List.of("Java", "is", "cool");
3538      *     String message = String.join(" ", strings);
3539      *     // message returned is: "Java is cool"
3540      *
3541      *     Set<String> strings =
3542      *         new LinkedHashSet<>(List.of("Java", "is", "very", "cool"));
3543      *     String message = String.join("-", strings);
3544      *     // message returned is: "Java-is-very-cool"
3545      * }</pre></blockquote>
3546      *
3547      * Note that if an individual element is {@code null}, then {@code "null"} is added.
3548      *
3549      * @param  delimiter a sequence of characters that is used to separate each
3550      *         of the {@code elements} in the resulting {@code String}
3551      * @param  elements an {@code Iterable} that will have its {@code elements}
3552      *         joined together.
3553      *
3554      * @return a new {@code String} that is composed from the {@code elements}
3555      *         argument
3556      *
3557      * @throws NullPointerException If {@code delimiter} or {@code elements}
3558      *         is {@code null}
3559      *
3560      * @see    #join(CharSequence,CharSequence...)
3561      * @see    java.util.StringJoiner
3562      * @since 1.8
3563      */
3564     public static String join(CharSequence delimiter,
3565             Iterable<? extends CharSequence> elements) {
3566         Objects.requireNonNull(delimiter);
3567         Objects.requireNonNull(elements);
3568         var delim = delimiter.toString();
3569         var elems = new String[8];
3570         int size = 0;
3571         for (CharSequence cs: elements) {
3572             if (size >= elems.length) {
3573                 elems = Arrays.copyOf(elems, elems.length << 1);
3574             }
3575             elems[size++] = String.valueOf(cs);
3576         }
3577         return join("", "", delim, elems, size);
3578     }
3579 
3580     /**
3581      * Converts all of the characters in this {@code String} to lower
3582      * case using the rules of the given {@code Locale}.  Case mapping is based
3583      * on the Unicode Standard version specified by the {@link java.lang.Character Character}
3584      * class. Since case mappings are not always 1:1 char mappings, the resulting {@code String}
3585      * and this {@code String} may differ in length.
3586      * <p>
3587      * Examples of lowercase mappings are in the following table:
3588      * <table class="plain">
3589      * <caption style="display:none">Lowercase mapping examples showing language code of locale, upper case, lower case, and description</caption>
3590      * <thead>
3591      * <tr>
3592      *   <th scope="col">Language Code of Locale</th>
3593      *   <th scope="col">Upper Case</th>
3594      *   <th scope="col">Lower Case</th>
3595      *   <th scope="col">Description</th>
3596      * </tr>
3597      * </thead>
3598      * <tbody>
3599      * <tr>
3600      *   <td>tr (Turkish)</td>
3601      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0130</th>
3602      *   <td>&#92;u0069</td>
3603      *   <td>capital letter I with dot above -&gt; small letter i</td>
3604      * </tr>
3605      * <tr>
3606      *   <td>tr (Turkish)</td>
3607      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0049</th>
3608      *   <td>&#92;u0131</td>
3609      *   <td>capital letter I -&gt; small letter dotless i </td>
3610      * </tr>
3611      * <tr>
3612      *   <td>(all)</td>
3613      *   <th scope="row" style="font-weight:normal; text-align:left">French Fries</th>
3614      *   <td>french fries</td>
3615      *   <td>lowercased all chars in String</td>
3616      * </tr>
3617      * <tr>
3618      *   <td>(all)</td>
3619      *   <th scope="row" style="font-weight:normal; text-align:left">
3620      *       &Iota;&Chi;&Theta;&Upsilon;&Sigma;</th>
3621      *   <td>&iota;&chi;&theta;&upsilon;&sigma;</td>
3622      *   <td>lowercased all chars in String</td>
3623      * </tr>
3624      * </tbody>
3625      * </table>
3626      *
3627      * @param locale use the case transformation rules for this locale
3628      * @return the {@code String}, converted to lowercase.
3629      * @see     java.lang.String#toLowerCase()
3630      * @see     java.lang.String#toUpperCase()
3631      * @see     java.lang.String#toUpperCase(Locale)
3632      * @since   1.1
3633      */
3634     public String toLowerCase(Locale locale) {
3635         return isLatin1() ? StringLatin1.toLowerCase(this, value, locale)
3636                           : StringUTF16.toLowerCase(this, value, locale);
3637     }
3638 
3639     /**
3640      * Converts all of the characters in this {@code String} to lower
3641      * case using the rules of the default locale. This method is equivalent to
3642      * {@code toLowerCase(Locale.getDefault())}.
3643      *
3644      * @apiNote This method is locale sensitive, and may produce unexpected
3645      * results if used for strings that are intended to be interpreted locale
3646      * independently.
3647      * Examples are programming language identifiers, protocol keys, and HTML
3648      * tags.
3649      * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale
3650      * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the
3651      * LATIN SMALL LETTER DOTLESS I character.
3652      * To obtain correct results for locale insensitive strings, use
3653      * {@code toLowerCase(Locale.ROOT)}.
3654      *
3655      * @return  the {@code String}, converted to lowercase.
3656      * @see     java.lang.String#toLowerCase(Locale)
3657      */
3658     public String toLowerCase() {
3659         return toLowerCase(Locale.getDefault());
3660     }
3661 
3662     /**
3663      * Converts all of the characters in this {@code String} to upper
3664      * case using the rules of the given {@code Locale}. Case mapping is based
3665      * on the Unicode Standard version specified by the {@link java.lang.Character Character}
3666      * class. Since case mappings are not always 1:1 char mappings, the resulting {@code String}
3667      * and this {@code String} may differ in length.
3668      * <p>
3669      * Examples of locale-sensitive and 1:M case mappings are in the following table:
3670      * <table class="plain">
3671      * <caption style="display:none">Examples of locale-sensitive and 1:M case mappings. Shows Language code of locale, lower case, upper case, and description.</caption>
3672      * <thead>
3673      * <tr>
3674      *   <th scope="col">Language Code of Locale</th>
3675      *   <th scope="col">Lower Case</th>
3676      *   <th scope="col">Upper Case</th>
3677      *   <th scope="col">Description</th>
3678      * </tr>
3679      * </thead>
3680      * <tbody>
3681      * <tr>
3682      *   <td>tr (Turkish)</td>
3683      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0069</th>
3684      *   <td>&#92;u0130</td>
3685      *   <td>small letter i -&gt; capital letter I with dot above</td>
3686      * </tr>
3687      * <tr>
3688      *   <td>tr (Turkish)</td>
3689      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0131</th>
3690      *   <td>&#92;u0049</td>
3691      *   <td>small letter dotless i -&gt; capital letter I</td>
3692      * </tr>
3693      * <tr>
3694      *   <td>(all)</td>
3695      *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u00df</th>
3696      *   <td>&#92;u0053 &#92;u0053</td>
3697      *   <td>small letter sharp s -&gt; two letters: SS</td>
3698      * </tr>
3699      * <tr>
3700      *   <td>(all)</td>
3701      *   <th scope="row" style="font-weight:normal; text-align:left">Fahrvergn&uuml;gen</th>
3702      *   <td>FAHRVERGN&Uuml;GEN</td>
3703      *   <td></td>
3704      * </tr>
3705      * </tbody>
3706      * </table>
3707      * @param locale use the case transformation rules for this locale
3708      * @return the {@code String}, converted to uppercase.
3709      * @see     java.lang.String#toUpperCase()
3710      * @see     java.lang.String#toLowerCase()
3711      * @see     java.lang.String#toLowerCase(Locale)
3712      * @since   1.1
3713      */
3714     public String toUpperCase(Locale locale) {
3715         return isLatin1() ? StringLatin1.toUpperCase(this, value, locale)
3716                           : StringUTF16.toUpperCase(this, value, locale);
3717     }
3718 
3719     /**
3720      * Converts all of the characters in this {@code String} to upper
3721      * case using the rules of the default locale. This method is equivalent to
3722      * {@code toUpperCase(Locale.getDefault())}.
3723      *
3724      * @apiNote This method is locale sensitive, and may produce unexpected
3725      * results if used for strings that are intended to be interpreted locale
3726      * independently.
3727      * Examples are programming language identifiers, protocol keys, and HTML
3728      * tags.
3729      * For instance, {@code "title".toUpperCase()} in a Turkish locale
3730      * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the
3731      * LATIN CAPITAL LETTER I WITH DOT ABOVE character.
3732      * To obtain correct results for locale insensitive strings, use
3733      * {@code toUpperCase(Locale.ROOT)}.
3734      *
3735      * @return  the {@code String}, converted to uppercase.
3736      * @see     java.lang.String#toUpperCase(Locale)
3737      */
3738     public String toUpperCase() {
3739         return toUpperCase(Locale.getDefault());
3740     }
3741 
3742     /**
3743      * Returns a string whose value is this string, with all leading
3744      * and trailing space removed, where space is defined
3745      * as any character whose codepoint is less than or equal to
3746      * {@code 'U+0020'} (the space character).
3747      * <p>
3748      * If this {@code String} object represents an empty character
3749      * sequence, or the first and last characters of character sequence
3750      * represented by this {@code String} object both have codes
3751      * that are not space (as defined above), then a
3752      * reference to this {@code String} object is returned.
3753      * <p>
3754      * Otherwise, if all characters in this string are space (as
3755      * defined above), then a  {@code String} object representing an
3756      * empty string is returned.
3757      * <p>
3758      * Otherwise, let <i>k</i> be the index of the first character in the
3759      * string whose code is not a space (as defined above) and let
3760      * <i>m</i> be the index of the last character in the string whose code
3761      * is not a space (as defined above). A {@code String}
3762      * object is returned, representing the substring of this string that
3763      * begins with the character at index <i>k</i> and ends with the
3764      * character at index <i>m</i>-that is, the result of
3765      * {@code this.substring(k, m + 1)}.
3766      * <p>
3767      * This method may be used to trim space (as defined above) from
3768      * the beginning and end of a string.
3769      *
3770      * @return  a string whose value is this string, with all leading
3771      *          and trailing space removed, or this string if it
3772      *          has no leading or trailing space.
3773      */
3774     public String trim() {
3775         String ret = isLatin1() ? StringLatin1.trim(value)
3776                                 : StringUTF16.trim(value);
3777         return ret == null ? this : ret;
3778     }
3779 
3780     /**
3781      * Returns a string whose value is this string, with all leading
3782      * and trailing {@linkplain Character#isWhitespace(int) white space}
3783      * removed.
3784      * <p>
3785      * If this {@code String} object represents an empty string,
3786      * or if all code points in this string are
3787      * {@linkplain Character#isWhitespace(int) white space}, then an empty string
3788      * is returned.
3789      * <p>
3790      * Otherwise, returns a substring of this string beginning with the first
3791      * code point that is not a {@linkplain Character#isWhitespace(int) white space}
3792      * up to and including the last code point that is not a
3793      * {@linkplain Character#isWhitespace(int) white space}.
3794      * <p>
3795      * This method may be used to strip
3796      * {@linkplain Character#isWhitespace(int) white space} from
3797      * the beginning and end of a string.
3798      *
3799      * @return  a string whose value is this string, with all leading
3800      *          and trailing white space removed
3801      *
3802      * @see Character#isWhitespace(int)
3803      *
3804      * @since 11
3805      */
3806     public String strip() {
3807         String ret = isLatin1() ? StringLatin1.strip(value)
3808                                 : StringUTF16.strip(value);
3809         return ret == null ? this : ret;
3810     }
3811 
3812     /**
3813      * Returns a string whose value is this string, with all leading
3814      * {@linkplain Character#isWhitespace(int) white space} removed.
3815      * <p>
3816      * If this {@code String} object represents an empty string,
3817      * or if all code points in this string are
3818      * {@linkplain Character#isWhitespace(int) white space}, then an empty string
3819      * is returned.
3820      * <p>
3821      * Otherwise, returns a substring of this string beginning with the first
3822      * code point that is not a {@linkplain Character#isWhitespace(int) white space}
3823      * up to and including the last code point of this string.
3824      * <p>
3825      * This method may be used to trim
3826      * {@linkplain Character#isWhitespace(int) white space} from
3827      * the beginning of a string.
3828      *
3829      * @return  a string whose value is this string, with all leading white
3830      *          space removed
3831      *
3832      * @see Character#isWhitespace(int)
3833      *
3834      * @since 11
3835      */
3836     public String stripLeading() {
3837         String ret = isLatin1() ? StringLatin1.stripLeading(value)
3838                                 : StringUTF16.stripLeading(value);
3839         return ret == null ? this : ret;
3840     }
3841 
3842     /**
3843      * Returns a string whose value is this string, with all trailing
3844      * {@linkplain Character#isWhitespace(int) white space} removed.
3845      * <p>
3846      * If this {@code String} object represents an empty string,
3847      * or if all characters in this string are
3848      * {@linkplain Character#isWhitespace(int) white space}, then an empty string
3849      * is returned.
3850      * <p>
3851      * Otherwise, returns a substring of this string beginning with the first
3852      * code point of this string up to and including the last code point
3853      * that is not a {@linkplain Character#isWhitespace(int) white space}.
3854      * <p>
3855      * This method may be used to trim
3856      * {@linkplain Character#isWhitespace(int) white space} from
3857      * the end of a string.
3858      *
3859      * @return  a string whose value is this string, with all trailing white
3860      *          space removed
3861      *
3862      * @see Character#isWhitespace(int)
3863      *
3864      * @since 11
3865      */
3866     public String stripTrailing() {
3867         String ret = isLatin1() ? StringLatin1.stripTrailing(value)
3868                                 : StringUTF16.stripTrailing(value);
3869         return ret == null ? this : ret;
3870     }
3871 
3872     /**
3873      * Returns {@code true} if the string is empty or contains only
3874      * {@linkplain Character#isWhitespace(int) white space} codepoints,
3875      * otherwise {@code false}.
3876      *
3877      * @return {@code true} if the string is empty or contains only
3878      *         {@linkplain Character#isWhitespace(int) white space} codepoints,
3879      *         otherwise {@code false}
3880      *
3881      * @see Character#isWhitespace(int)
3882      *
3883      * @since 11
3884      */
3885     public boolean isBlank() {
3886         return indexOfNonWhitespace() == length();
3887     }
3888 
3889     /**
3890      * Returns a stream of lines extracted from this string,
3891      * separated by line terminators.
3892      * <p>
3893      * A <i>line terminator</i> is one of the following:
3894      * a line feed character {@code "\n"} (U+000A),
3895      * a carriage return character {@code "\r"} (U+000D),
3896      * or a carriage return followed immediately by a line feed
3897      * {@code "\r\n"} (U+000D U+000A).
3898      * <p>
3899      * A <i>line</i> is either a sequence of zero or more characters
3900      * followed by a line terminator, or it is a sequence of one or
3901      * more characters followed by the end of the string. A
3902      * line does not include the line terminator.
3903      * <p>
3904      * The stream returned by this method contains the lines from
3905      * this string in the order in which they occur.
3906      *
3907      * @apiNote This definition of <i>line</i> implies that an empty
3908      *          string has zero lines and that there is no empty line
3909      *          following a line terminator at the end of a string.
3910      *
3911      * @implNote This method provides better performance than
3912      *           split("\R") by supplying elements lazily and
3913      *           by faster search of new line terminators.
3914      *
3915      * @return  the stream of lines extracted from this string
3916      *
3917      * @since 11
3918      */
3919     public Stream<String> lines() {
3920         return isLatin1() ? StringLatin1.lines(value) : StringUTF16.lines(value);
3921     }
3922 
3923     /**
3924      * Adjusts the indentation of each line of this string based on the value of
3925      * {@code n}, and normalizes line termination characters.
3926      * <p>
3927      * This string is conceptually separated into lines using
3928      * {@link String#lines()}. Each line is then adjusted as described below
3929      * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting
3930      * lines are then concatenated and returned.
3931      * <p>
3932      * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the
3933      * beginning of each line.
3934      * <p>
3935      * If {@code n < 0} then up to {@code n}
3936      * {@linkplain Character#isWhitespace(int) white space characters} are removed
3937      * from the beginning of each line. If a given line does not contain
3938      * sufficient white space then all leading
3939      * {@linkplain Character#isWhitespace(int) white space characters} are removed.
3940      * Each white space character is treated as a single character. In
3941      * particular, the tab character {@code "\t"} (U+0009) is considered a
3942      * single character; it is not expanded.
3943      * <p>
3944      * If {@code n == 0} then the line remains unchanged. However, line
3945      * terminators are still normalized.
3946      *
3947      * @param n  number of leading
3948      *           {@linkplain Character#isWhitespace(int) white space characters}
3949      *           to add or remove
3950      *
3951      * @return string with indentation adjusted and line endings normalized
3952      *
3953      * @see String#lines()
3954      * @see String#isBlank()
3955      * @see Character#isWhitespace(int)
3956      *
3957      * @since 12
3958      */
3959     public String indent(int n) {
3960         if (isEmpty()) {
3961             return "";
3962         }
3963         Stream<String> stream = lines();
3964         if (n > 0) {
3965             final String spaces = " ".repeat(n);
3966             stream = stream.map(s -> spaces + s);
3967         } else if (n == Integer.MIN_VALUE) {
3968             stream = stream.map(s -> s.stripLeading());
3969         } else if (n < 0) {
3970             stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace())));
3971         }
3972         return stream.collect(Collectors.joining("\n", "", "\n"));
3973     }
3974 
3975     private int indexOfNonWhitespace() {
3976         return isLatin1() ? StringLatin1.indexOfNonWhitespace(value)
3977                           : StringUTF16.indexOfNonWhitespace(value);
3978     }
3979 
3980     private int lastIndexOfNonWhitespace() {
3981         return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value)
3982                           : StringUTF16.lastIndexOfNonWhitespace(value);
3983     }
3984 
3985     /**
3986      * Returns a string whose value is this string, with incidental
3987      * {@linkplain Character#isWhitespace(int) white space} removed from
3988      * the beginning and end of every line.
3989      * <p>
3990      * Incidental {@linkplain Character#isWhitespace(int) white space}
3991      * is often present in a text block to align the content with the opening
3992      * delimiter. For example, in the following code, dots represent incidental
3993      * {@linkplain Character#isWhitespace(int) white space}:
3994      * <blockquote><pre>
3995      * String html = """
3996      * ..............&lt;html&gt;
3997      * ..............    &lt;body&gt;
3998      * ..............        &lt;p&gt;Hello, world&lt;/p&gt;
3999      * ..............    &lt;/body&gt;
4000      * ..............&lt;/html&gt;
4001      * ..............""";
4002      * </pre></blockquote>
4003      * This method treats the incidental
4004      * {@linkplain Character#isWhitespace(int) white space} as indentation to be
4005      * stripped, producing a string that preserves the relative indentation of
4006      * the content. Using | to visualize the start of each line of the string:
4007      * <blockquote><pre>
4008      * |&lt;html&gt;
4009      * |    &lt;body&gt;
4010      * |        &lt;p&gt;Hello, world&lt;/p&gt;
4011      * |    &lt;/body&gt;
4012      * |&lt;/html&gt;
4013      * </pre></blockquote>
4014      * First, the individual lines of this string are extracted. A <i>line</i>
4015      * is a sequence of zero or more characters followed by either a line
4016      * terminator or the end of the string.
4017      * If the string has at least one line terminator, the last line consists
4018      * of the characters between the last terminator and the end of the string.
4019      * Otherwise, if the string has no terminators, the last line is the start
4020      * of the string to the end of the string, in other words, the entire
4021      * string.
4022      * A line does not include the line terminator.
4023      * <p>
4024      * Then, the <i>minimum indentation</i> (min) is determined as follows:
4025      * <ul>
4026      *   <li><p>For each non-blank line (as defined by {@link String#isBlank()}),
4027      *   the leading {@linkplain Character#isWhitespace(int) white space}
4028      *   characters are counted.</p>
4029      *   </li>
4030      *   <li><p>The leading {@linkplain Character#isWhitespace(int) white space}
4031      *   characters on the last line are also counted even if
4032      *   {@linkplain String#isBlank() blank}.</p>
4033      *   </li>
4034      * </ul>
4035      * <p>The <i>min</i> value is the smallest of these counts.
4036      * <p>
4037      * For each {@linkplain String#isBlank() non-blank} line, <i>min</i> leading
4038      * {@linkplain Character#isWhitespace(int) white space} characters are
4039      * removed, and any trailing {@linkplain Character#isWhitespace(int) white
4040      * space} characters are removed. {@linkplain String#isBlank() Blank} lines
4041      * are replaced with the empty string.
4042      *
4043      * <p>
4044      * Finally, the lines are joined into a new string, using the LF character
4045      * {@code "\n"} (U+000A) to separate lines.
4046      *
4047      * @apiNote
4048      * This method's primary purpose is to shift a block of lines as far as
4049      * possible to the left, while preserving relative indentation. Lines
4050      * that were indented the least will thus have no leading
4051      * {@linkplain Character#isWhitespace(int) white space}.
4052      * The result will have the same number of line terminators as this string.
4053      * If this string ends with a line terminator then the result will end
4054      * with a line terminator.
4055      *
4056      * @implSpec
4057      * This method treats all {@linkplain Character#isWhitespace(int) white space}
4058      * characters as having equal width. As long as the indentation on every
4059      * line is consistently composed of the same character sequences, then the
4060      * result will be as described above.
4061      *
4062      * @return string with incidental indentation removed and line
4063      *         terminators normalized
4064      *
4065      * @see String#lines()
4066      * @see String#isBlank()
4067      * @see String#indent(int)
4068      * @see Character#isWhitespace(int)
4069      *
4070      * @since 15
4071      *
4072      */
4073     public String stripIndent() {
4074         int length = length();
4075         if (length == 0) {
4076             return "";
4077         }
4078         char lastChar = charAt(length - 1);
4079         boolean optOut = lastChar == '\n' || lastChar == '\r';
4080         List<String> lines = lines().toList();
4081         final int outdent = optOut ? 0 : outdent(lines);
4082         return lines.stream()
4083             .map(line -> {
4084                 int firstNonWhitespace = line.indexOfNonWhitespace();
4085                 int lastNonWhitespace = line.lastIndexOfNonWhitespace();
4086                 int incidentalWhitespace = Math.min(outdent, firstNonWhitespace);
4087                 return firstNonWhitespace > lastNonWhitespace
4088                     ? "" : line.substring(incidentalWhitespace, lastNonWhitespace);
4089             })
4090             .collect(Collectors.joining("\n", "", optOut ? "\n" : ""));
4091     }
4092 
4093     private static int outdent(List<String> lines) {
4094         // Note: outdent is guaranteed to be zero or positive number.
4095         // If there isn't a non-blank line then the last must be blank
4096         int outdent = Integer.MAX_VALUE;
4097         for (String line : lines) {
4098             int leadingWhitespace = line.indexOfNonWhitespace();
4099             if (leadingWhitespace != line.length()) {
4100                 outdent = Integer.min(outdent, leadingWhitespace);
4101             }
4102         }
4103         String lastLine = lines.get(lines.size() - 1);
4104         if (lastLine.isBlank()) {
4105             outdent = Integer.min(outdent, lastLine.length());
4106         }
4107         return outdent;
4108     }
4109 
4110     /**
4111      * Returns a string whose value is this string, with escape sequences
4112      * translated as if in a string literal.
4113      * <p>
4114      * Escape sequences are translated as follows;
4115      * <table class="striped">
4116      *   <caption style="display:none">Translation</caption>
4117      *   <thead>
4118      *   <tr>
4119      *     <th scope="col">Escape</th>
4120      *     <th scope="col">Name</th>
4121      *     <th scope="col">Translation</th>
4122      *   </tr>
4123      *   </thead>
4124      *   <tbody>
4125      *   <tr>
4126      *     <th scope="row">{@code \u005Cb}</th>
4127      *     <td>backspace</td>
4128      *     <td>{@code U+0008}</td>
4129      *   </tr>
4130      *   <tr>
4131      *     <th scope="row">{@code \u005Ct}</th>
4132      *     <td>horizontal tab</td>
4133      *     <td>{@code U+0009}</td>
4134      *   </tr>
4135      *   <tr>
4136      *     <th scope="row">{@code \u005Cn}</th>
4137      *     <td>line feed</td>
4138      *     <td>{@code U+000A}</td>
4139      *   </tr>
4140      *   <tr>
4141      *     <th scope="row">{@code \u005Cf}</th>
4142      *     <td>form feed</td>
4143      *     <td>{@code U+000C}</td>
4144      *   </tr>
4145      *   <tr>
4146      *     <th scope="row">{@code \u005Cr}</th>
4147      *     <td>carriage return</td>
4148      *     <td>{@code U+000D}</td>
4149      *   </tr>
4150      *   <tr>
4151      *     <th scope="row">{@code \u005Cs}</th>
4152      *     <td>space</td>
4153      *     <td>{@code U+0020}</td>
4154      *   </tr>
4155      *   <tr>
4156      *     <th scope="row">{@code \u005C"}</th>
4157      *     <td>double quote</td>
4158      *     <td>{@code U+0022}</td>
4159      *   </tr>
4160      *   <tr>
4161      *     <th scope="row">{@code \u005C'}</th>
4162      *     <td>single quote</td>
4163      *     <td>{@code U+0027}</td>
4164      *   </tr>
4165      *   <tr>
4166      *     <th scope="row">{@code \u005C\u005C}</th>
4167      *     <td>backslash</td>
4168      *     <td>{@code U+005C}</td>
4169      *   </tr>
4170      *   <tr>
4171      *     <th scope="row">{@code \u005C0 - \u005C377}</th>
4172      *     <td>octal escape</td>
4173      *     <td>code point equivalents</td>
4174      *   </tr>
4175      *   <tr>
4176      *     <th scope="row">{@code \u005C<line-terminator>}</th>
4177      *     <td>continuation</td>
4178      *     <td>discard</td>
4179      *   </tr>
4180      *   </tbody>
4181      * </table>
4182      *
4183      * @implNote
4184      * This method does <em>not</em> translate Unicode escapes such as "{@code \u005cu2022}".
4185      * Unicode escapes are translated by the Java compiler when reading input characters and
4186      * are not part of the string literal specification.
4187      *
4188      * @throws IllegalArgumentException when an escape sequence is malformed.
4189      *
4190      * @return String with escape sequences translated.
4191      *
4192      * @jls 3.10.7 Escape Sequences
4193      *
4194      * @since 15
4195      */
4196     public String translateEscapes() {
4197         if (isEmpty()) {
4198             return "";
4199         }
4200         char[] chars = toCharArray();
4201         int length = chars.length;
4202         int from = 0;
4203         int to = 0;
4204         while (from < length) {
4205             char ch = chars[from++];
4206             if (ch == '\\') {
4207                 ch = from < length ? chars[from++] : '\0';
4208                 switch (ch) {
4209                 case 'b':
4210                     ch = '\b';
4211                     break;
4212                 case 'f':
4213                     ch = '\f';
4214                     break;
4215                 case 'n':
4216                     ch = '\n';
4217                     break;
4218                 case 'r':
4219                     ch = '\r';
4220                     break;
4221                 case 's':
4222                     ch = ' ';
4223                     break;
4224                 case 't':
4225                     ch = '\t';
4226                     break;
4227                 case '\'':
4228                 case '\"':
4229                 case '\\':
4230                     // as is
4231                     break;
4232                 case '0': case '1': case '2': case '3':
4233                 case '4': case '5': case '6': case '7':
4234                     int limit = Integer.min(from + (ch <= '3' ? 2 : 1), length);
4235                     int code = ch - '0';
4236                     while (from < limit) {
4237                         ch = chars[from];
4238                         if (ch < '0' || '7' < ch) {
4239                             break;
4240                         }
4241                         from++;
4242                         code = (code << 3) | (ch - '0');
4243                     }
4244                     ch = (char)code;
4245                     break;
4246                 case '\n':
4247                     continue;
4248                 case '\r':
4249                     if (from < length && chars[from] == '\n') {
4250                         from++;
4251                     }
4252                     continue;
4253                 default: {
4254                     String msg = String.format(
4255                         "Invalid escape sequence: \\%c \\\\u%04X",
4256                         ch, (int)ch);
4257                     throw new IllegalArgumentException(msg);
4258                 }
4259                 }
4260             }
4261 
4262             chars[to++] = ch;
4263         }
4264 
4265         return new String(chars, 0, to);
4266     }
4267 
4268     /**
4269      * This method allows the application of a function to {@code this}
4270      * string. The function should expect a single String argument
4271      * and produce an {@code R} result.
4272      * <p>
4273      * Any exception thrown by {@code f.apply()} will be propagated to the
4274      * caller.
4275      *
4276      * @param f    a function to apply
4277      *
4278      * @param <R>  the type of the result
4279      *
4280      * @return     the result of applying the function to this string
4281      *
4282      * @see java.util.function.Function
4283      *
4284      * @since 12
4285      */
4286     public <R> R transform(Function<? super String, ? extends R> f) {
4287         return f.apply(this);
4288     }
4289 
4290     /**
4291      * This object (which is already a string!) is itself returned.
4292      *
4293      * @return  the string itself.
4294      */
4295     public String toString() {
4296         return this;
4297     }
4298 
4299     /**
4300      * Returns a stream of {@code int} zero-extending the {@code char} values
4301      * from this sequence.  Any char which maps to a {@linkplain
4302      * Character##unicode surrogate code point} is passed through
4303      * uninterpreted.
4304      *
4305      * @return an IntStream of char values from this sequence
4306      * @since 9
4307      */
4308     @Override
4309     public IntStream chars() {
4310         return StreamSupport.intStream(
4311             isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE)
4312                        : new StringUTF16.CharsSpliterator(value, Spliterator.IMMUTABLE),
4313             false);
4314     }
4315 
4316 
4317     /**
4318      * Returns a stream of code point values from this sequence.  Any surrogate
4319      * pairs encountered in the sequence are combined as if by {@linkplain
4320      * Character#toCodePoint Character.toCodePoint} and the result is passed
4321      * to the stream. Any other code units, including ordinary BMP characters,
4322      * unpaired surrogates, and undefined code units, are zero-extended to
4323      * {@code int} values which are then passed to the stream.
4324      *
4325      * @return an IntStream of Unicode code points from this sequence
4326      * @since 9
4327      */
4328     @Override
4329     public IntStream codePoints() {
4330         return StreamSupport.intStream(
4331             isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE)
4332                        : new StringUTF16.CodePointsSpliterator(value, Spliterator.IMMUTABLE),
4333             false);
4334     }
4335 
4336     /**
4337      * Converts this string to a new character array.
4338      *
4339      * @return  a newly allocated character array whose length is the length
4340      *          of this string and whose contents are initialized to contain
4341      *          the character sequence represented by this string.
4342      */
4343     public char[] toCharArray() {
4344         return isLatin1() ? StringLatin1.toChars(value)
4345                           : StringUTF16.toChars(value);
4346     }
4347 
4348     /**
4349      * Returns a formatted string using the specified format string and
4350      * arguments.
4351      *
4352      * <p> The locale always used is the one returned by {@link
4353      * java.util.Locale#getDefault(java.util.Locale.Category)
4354      * Locale.getDefault(Locale.Category)} with
4355      * {@link java.util.Locale.Category#FORMAT FORMAT} category specified.
4356      *
4357      * @param  format
4358      *         A <a href="../util/Formatter.html#syntax">format string</a>
4359      *
4360      * @param  args
4361      *         Arguments referenced by the format specifiers in the format
4362      *         string.  If there are more arguments than format specifiers, the
4363      *         extra arguments are ignored.  The number of arguments is
4364      *         variable and may be zero.  The maximum number of arguments is
4365      *         limited by the maximum dimension of a Java array as defined by
4366      *         <cite>The Java Virtual Machine Specification</cite>.
4367      *         The behaviour on a
4368      *         {@code null} argument depends on the <a
4369      *         href="../util/Formatter.html#syntax">conversion</a>.
4370      *
4371      * @throws  java.util.IllegalFormatException
4372      *          If a format string contains an illegal syntax, a format
4373      *          specifier that is incompatible with the given arguments,
4374      *          insufficient arguments given the format string, or other
4375      *          illegal conditions.  For specification of all possible
4376      *          formatting errors, see the <a
4377      *          href="../util/Formatter.html#detail">Details</a> section of the
4378      *          formatter class specification.
4379      *
4380      * @return  A formatted string
4381      *
4382      * @see  java.util.Formatter
4383      * @since  1.5
4384      */
4385     public static String format(String format, Object... args) {
4386         return new Formatter().format(format, args).toString();
4387     }
4388 
4389     /**
4390      * Returns a formatted string using the specified locale, format string,
4391      * and arguments.
4392      *
4393      * @param  l
4394      *         The {@linkplain java.util.Locale locale} to apply during
4395      *         formatting.  If {@code l} is {@code null} then no localization
4396      *         is applied.
4397      *
4398      * @param  format
4399      *         A <a href="../util/Formatter.html#syntax">format string</a>
4400      *
4401      * @param  args
4402      *         Arguments referenced by the format specifiers in the format
4403      *         string.  If there are more arguments than format specifiers, the
4404      *         extra arguments are ignored.  The number of arguments is
4405      *         variable and may be zero.  The maximum number of arguments is
4406      *         limited by the maximum dimension of a Java array as defined by
4407      *         <cite>The Java Virtual Machine Specification</cite>.
4408      *         The behaviour on a
4409      *         {@code null} argument depends on the
4410      *         <a href="../util/Formatter.html#syntax">conversion</a>.
4411      *
4412      * @throws  java.util.IllegalFormatException
4413      *          If a format string contains an illegal syntax, a format
4414      *          specifier that is incompatible with the given arguments,
4415      *          insufficient arguments given the format string, or other
4416      *          illegal conditions.  For specification of all possible
4417      *          formatting errors, see the <a
4418      *          href="../util/Formatter.html#detail">Details</a> section of the
4419      *          formatter class specification
4420      *
4421      * @return  A formatted string
4422      *
4423      * @see  java.util.Formatter
4424      * @since  1.5
4425      */
4426     public static String format(Locale l, String format, Object... args) {
4427         return new Formatter(l).format(format, args).toString();
4428     }
4429 
4430     /**
4431      * Formats using this string as the format string, and the supplied
4432      * arguments.
4433      *
4434      * @implSpec This method is equivalent to {@code String.format(this, args)}.
4435      *
4436      * @param  args
4437      *         Arguments referenced by the format specifiers in this string.
4438      *
4439      * @return  A formatted string
4440      *
4441      * @see  java.lang.String#format(String,Object...)
4442      * @see  java.util.Formatter
4443      *
4444      * @since 15
4445      *
4446      */
4447     public String formatted(Object... args) {
4448         return new Formatter().format(this, args).toString();
4449     }
4450 
4451     /**
4452      * Returns the string representation of the {@code Object} argument.
4453      *
4454      * @param   obj   an {@code Object}.
4455      * @return  if the argument is {@code null}, then a string equal to
4456      *          {@code "null"}; otherwise, the value of
4457      *          {@code obj.toString()} is returned.
4458      * @see     java.lang.Object#toString()
4459      */
4460     public static String valueOf(Object obj) {
4461         return (obj == null) ? "null" : obj.toString();
4462     }
4463 
4464     /**
4465      * Returns the string representation of the {@code char} array
4466      * argument. The contents of the character array are copied; subsequent
4467      * modification of the character array does not affect the returned
4468      * string.
4469      *
4470      * @param   data     the character array.
4471      * @return  a {@code String} that contains the characters of the
4472      *          character array.
4473      */
4474     public static String valueOf(char[] data) {
4475         return new String(data);
4476     }
4477 
4478     /**
4479      * Returns the string representation of a specific subarray of the
4480      * {@code char} array argument.
4481      * <p>
4482      * The {@code offset} argument is the index of the first
4483      * character of the subarray. The {@code count} argument
4484      * specifies the length of the subarray. The contents of the subarray
4485      * are copied; subsequent modification of the character array does not
4486      * affect the returned string.
4487      *
4488      * @param   data     the character array.
4489      * @param   offset   initial offset of the subarray.
4490      * @param   count    length of the subarray.
4491      * @return  a {@code String} that contains the characters of the
4492      *          specified subarray of the character array.
4493      * @throws    IndexOutOfBoundsException if {@code offset} is
4494      *          negative, or {@code count} is negative, or
4495      *          {@code offset+count} is larger than
4496      *          {@code data.length}.
4497      */
4498     public static String valueOf(char[] data, int offset, int count) {
4499         return new String(data, offset, count);
4500     }
4501 
4502     /**
4503      * Equivalent to {@link #valueOf(char[], int, int)}.
4504      *
4505      * @param   data     the character array.
4506      * @param   offset   initial offset of the subarray.
4507      * @param   count    length of the subarray.
4508      * @return  a {@code String} that contains the characters of the
4509      *          specified subarray of the character array.
4510      * @throws    IndexOutOfBoundsException if {@code offset} is
4511      *          negative, or {@code count} is negative, or
4512      *          {@code offset+count} is larger than
4513      *          {@code data.length}.
4514      */
4515     public static String copyValueOf(char[] data, int offset, int count) {
4516         return new String(data, offset, count);
4517     }
4518 
4519     /**
4520      * Equivalent to {@link #valueOf(char[])}.
4521      *
4522      * @param   data   the character array.
4523      * @return  a {@code String} that contains the characters of the
4524      *          character array.
4525      */
4526     public static String copyValueOf(char[] data) {
4527         return new String(data);
4528     }
4529 
4530     /**
4531      * Returns the string representation of the {@code boolean} argument.
4532      *
4533      * @param   b   a {@code boolean}.
4534      * @return  if the argument is {@code true}, a string equal to
4535      *          {@code "true"} is returned; otherwise, a string equal to
4536      *          {@code "false"} is returned.
4537      */
4538     public static String valueOf(boolean b) {
4539         return b ? "true" : "false";
4540     }
4541 
4542     /**
4543      * Returns the string representation of the {@code char}
4544      * argument.
4545      *
4546      * @param   c   a {@code char}.
4547      * @return  a string of length {@code 1} containing
4548      *          as its single character the argument {@code c}.
4549      */
4550     public static String valueOf(char c) {
4551         if (COMPACT_STRINGS && StringLatin1.canEncode(c)) {
4552             return new String(StringLatin1.toBytes(c), LATIN1);
4553         }
4554         return new String(StringUTF16.toBytes(c), UTF16);
4555     }
4556 
4557     /**
4558      * Returns the string representation of the {@code int} argument.
4559      * <p>
4560      * The representation is exactly the one returned by the
4561      * {@code Integer.toString} method of one argument.
4562      *
4563      * @param   i   an {@code int}.
4564      * @return  a string representation of the {@code int} argument.
4565      * @see     java.lang.Integer#toString(int, int)
4566      */
4567     public static String valueOf(int i) {
4568         return Integer.toString(i);
4569     }
4570 
4571     /**
4572      * Returns the string representation of the {@code long} argument.
4573      * <p>
4574      * The representation is exactly the one returned by the
4575      * {@code Long.toString} method of one argument.
4576      *
4577      * @param   l   a {@code long}.
4578      * @return  a string representation of the {@code long} argument.
4579      * @see     java.lang.Long#toString(long)
4580      */
4581     public static String valueOf(long l) {
4582         return Long.toString(l);
4583     }
4584 
4585     /**
4586      * Returns the string representation of the {@code float} argument.
4587      * <p>
4588      * The representation is exactly the one returned by the
4589      * {@code Float.toString} method of one argument.
4590      *
4591      * @param   f   a {@code float}.
4592      * @return  a string representation of the {@code float} argument.
4593      * @see     java.lang.Float#toString(float)
4594      */
4595     public static String valueOf(float f) {
4596         return Float.toString(f);
4597     }
4598 
4599     /**
4600      * Returns the string representation of the {@code double} argument.
4601      * <p>
4602      * The representation is exactly the one returned by the
4603      * {@code Double.toString} method of one argument.
4604      *
4605      * @param   d   a {@code double}.
4606      * @return  a  string representation of the {@code double} argument.
4607      * @see     java.lang.Double#toString(double)
4608      */
4609     public static String valueOf(double d) {
4610         return Double.toString(d);
4611     }
4612 
4613     /**
4614      * Returns a canonical representation for the string object.
4615      * <p>
4616      * A pool of strings, initially empty, is maintained privately by the
4617      * class {@code String}.
4618      * <p>
4619      * When the intern method is invoked, if the pool already contains a
4620      * string equal to this {@code String} object as determined by
4621      * the {@link #equals(Object)} method, then the string from the pool is
4622      * returned. Otherwise, this {@code String} object is added to the
4623      * pool and a reference to this {@code String} object is returned.
4624      * <p>
4625      * It follows that for any two strings {@code s} and {@code t},
4626      * {@code s.intern() == t.intern()} is {@code true}
4627      * if and only if {@code s.equals(t)} is {@code true}.
4628      * <p>
4629      * All literal strings and string-valued constant expressions are
4630      * interned. String literals are defined in section {@jls 3.10.5} of the
4631      * <cite>The Java Language Specification</cite>.
4632      *
4633      * @return  a string that has the same contents as this string, but is
4634      *          guaranteed to be from a pool of unique strings.
4635      */
4636     public native String intern();
4637 
4638     /**
4639      * Returns a string whose value is the concatenation of this
4640      * string repeated {@code count} times.
4641      * <p>
4642      * If this string is empty or count is zero then the empty
4643      * string is returned.
4644      *
4645      * @param   count number of times to repeat
4646      *
4647      * @return  A string composed of this string repeated
4648      *          {@code count} times or the empty string if this
4649      *          string is empty or count is zero
4650      *
4651      * @throws  IllegalArgumentException if the {@code count} is
4652      *          negative.
4653      *
4654      * @since 11
4655      */
4656     public String repeat(int count) {
4657         if (count < 0) {
4658             throw new IllegalArgumentException("count is negative: " + count);
4659         }
4660         if (count == 1) {
4661             return this;
4662         }
4663         final int len = value.length;
4664         if (len == 0 || count == 0) {
4665             return "";
4666         }
4667         if (Integer.MAX_VALUE / count < len) {
4668             throw new OutOfMemoryError("Required length exceeds implementation limit");
4669         }
4670         if (len == 1) {
4671             final byte[] single = new byte[count];
4672             Arrays.fill(single, value[0]);
4673             return new String(single, coder);
4674         }
4675         final int limit = len * count;
4676         final byte[] multiple = new byte[limit];
4677         System.arraycopy(value, 0, multiple, 0, len);
4678         repeatCopyRest(multiple, 0, limit, len);
4679         return new String(multiple, coder);
4680     }
4681 
4682     /**
4683      * Used to perform copying after the initial insertion. Copying is optimized
4684      * by using power of two duplication. First pass duplicates original copy,
4685      * second pass then duplicates the original and the copy yielding four copies,
4686      * third pass duplicates four copies yielding eight copies, and so on.
4687      * Finally, the remainder is filled in with prior copies.
4688      *
4689      * @implNote The technique used here is significantly faster than hand-rolled
4690      * loops or special casing small numbers due to the intensive optimization
4691      * done by intrinsic {@code System.arraycopy}.
4692      *
4693      * @param buffer    destination buffer
4694      * @param offset    offset in the destination buffer
4695      * @param limit     total replicated including what is already in the buffer
4696      * @param copied    number of bytes that have already in the buffer
4697      */
4698     static void repeatCopyRest(byte[] buffer, int offset, int limit, int copied) {
4699         // Initial copy is in the buffer.
4700         for (; copied < limit - copied; copied <<= 1) {
4701             // Power of two duplicate.
4702             System.arraycopy(buffer, offset, buffer, offset + copied, copied);
4703         }
4704         // Duplicate remainder.
4705         System.arraycopy(buffer, offset, buffer, offset + copied, limit - copied);
4706     }
4707 
4708     ////////////////////////////////////////////////////////////////
4709 
4710     /**
4711      * Copy character bytes from this string into dst starting at dstBegin.
4712      * This method doesn't perform any range checking.
4713      *
4714      * Invoker guarantees: dst is in UTF16 (inflate itself for asb), if two
4715      * coders are different, and dst is big enough (range check)
4716      *
4717      * @param dstBegin  the char index, not offset of byte[]
4718      * @param coder     the coder of dst[]
4719      */
4720     void getBytes(byte[] dst, int dstBegin, byte coder) {
4721         if (coder() == coder) {
4722             System.arraycopy(value, 0, dst, dstBegin << coder, value.length);
4723         } else {    // this.coder == LATIN && coder == UTF16
4724             StringLatin1.inflate(value, 0, dst, dstBegin, value.length);
4725         }
4726     }
4727 
4728     /**
4729      * Copy character bytes from this string into dst starting at dstBegin.
4730      * This method doesn't perform any range checking.
4731      *
4732      * Invoker guarantees: dst is in UTF16 (inflate itself for asb), if two
4733      * coders are different, and dst is big enough (range check)
4734      *
4735      * @param srcPos    the char index, not offset of byte[]
4736      * @param dstBegin  the char index to start from
4737      * @param coder     the coder of dst[]
4738      * @param length    the amount of copied chars
4739      */
4740     void getBytes(byte[] dst, int srcPos, int dstBegin, byte coder, int length) {
4741         if (coder() == coder) {
4742             System.arraycopy(value, srcPos << coder, dst, dstBegin << coder, length << coder);
4743         } else {    // this.coder == LATIN && coder == UTF16
4744             StringLatin1.inflate(value, srcPos, dst, dstBegin, length);
4745         }
4746     }
4747 
4748     /*
4749      * Package private constructor. Trailing Void argument is there for
4750      * disambiguating it against other (public) constructors.
4751      *
4752      * Stores the char[] value into a byte[] that each byte represents
4753      * the8 low-order bits of the corresponding character, if the char[]
4754      * contains only latin1 character. Or a byte[] that stores all
4755      * characters in their byte sequences defined by the {@code StringUTF16}.
4756      */
4757     String(char[] value, int off, int len, Void sig) {
4758         if (len == 0) {
4759             this.value = "".value;
4760             this.coder = "".coder;
4761             return;
4762         }
4763         if (COMPACT_STRINGS) {
4764             byte[] val = StringUTF16.compress(value, off, len);
4765             if (val != null) {
4766                 this.value = val;
4767                 this.coder = LATIN1;
4768                 return;
4769             }
4770         }
4771         this.coder = UTF16;
4772         this.value = StringUTF16.toBytes(value, off, len);
4773     }
4774 
4775     /*
4776      * Package private constructor. Trailing Void argument is there for
4777      * disambiguating it against other (public) constructors.
4778      */
4779     String(AbstractStringBuilder asb, Void sig) {
4780         byte[] val = asb.getValue();
4781         int length = asb.length();
4782         if (asb.isLatin1()) {
4783             this.coder = LATIN1;
4784             this.value = Arrays.copyOfRange(val, 0, length);
4785         } else {
4786             // only try to compress val if some characters were deleted.
4787             if (COMPACT_STRINGS && asb.maybeLatin1) {
4788                 byte[] buf = StringUTF16.compress(val, 0, length);
4789                 if (buf != null) {
4790                     this.coder = LATIN1;
4791                     this.value = buf;
4792                     return;
4793                 }
4794             }
4795             this.coder = UTF16;
4796             this.value = Arrays.copyOfRange(val, 0, length << 1);
4797         }
4798     }
4799 
4800    /*
4801     * Package private constructor which shares value array for speed.
4802     */
4803     String(byte[] value, byte coder) {
4804         this.value = value;
4805         this.coder = coder;
4806     }
4807 
4808     byte coder() {
4809         return COMPACT_STRINGS ? coder : UTF16;
4810     }
4811 
4812     byte[] value() {
4813         return value;
4814     }
4815 
4816     boolean isLatin1() {
4817         return COMPACT_STRINGS && coder == LATIN1;
4818     }
4819 
4820     @Native static final byte LATIN1 = 0;
4821     @Native static final byte UTF16  = 1;
4822 
4823     /*
4824      * StringIndexOutOfBoundsException  if {@code index} is
4825      * negative or greater than or equal to {@code length}.
4826      */
4827     static void checkIndex(int index, int length) {
4828         Preconditions.checkIndex(index, length, Preconditions.SIOOBE_FORMATTER);
4829     }
4830 
4831     /*
4832      * StringIndexOutOfBoundsException  if {@code offset}
4833      * is negative or greater than {@code length}.
4834      */
4835     static void checkOffset(int offset, int length) {
4836         Preconditions.checkFromToIndex(offset, length, length, Preconditions.SIOOBE_FORMATTER);
4837     }
4838 
4839     /*
4840      * Check {@code offset}, {@code count} against {@code 0} and {@code length}
4841      * bounds.
4842      *
4843      * @return  {@code offset} if the sub-range within bounds of the range
4844      * @throws  StringIndexOutOfBoundsException
4845      *          If {@code offset} is negative, {@code count} is negative,
4846      *          or {@code offset} is greater than {@code length - count}
4847      */
4848     static int checkBoundsOffCount(int offset, int count, int length) {
4849         return Preconditions.checkFromIndexSize(offset, count, length, Preconditions.SIOOBE_FORMATTER);
4850     }
4851 
4852     /*
4853      * Check {@code begin}, {@code end} against {@code 0} and {@code length}
4854      * bounds.
4855      *
4856      * @throws  StringIndexOutOfBoundsException
4857      *          If {@code begin} is negative, {@code begin} is greater than
4858      *          {@code end}, or {@code end} is greater than {@code length}.
4859      */
4860     static void checkBoundsBeginEnd(int begin, int end, int length) {
4861         Preconditions.checkFromToIndex(begin, end, length, Preconditions.SIOOBE_FORMATTER);
4862     }
4863 
4864     /**
4865      * Returns the string representation of the {@code codePoint}
4866      * argument.
4867      *
4868      * @param   codePoint a {@code codePoint}.
4869      * @return  a string of length {@code 1} or {@code 2} containing
4870      *          as its single character the argument {@code codePoint}.
4871      * @throws IllegalArgumentException if the specified
4872      *          {@code codePoint} is not a {@linkplain Character#isValidCodePoint
4873      *          valid Unicode code point}.
4874      */
4875     static String valueOfCodePoint(int codePoint) {
4876         if (COMPACT_STRINGS && StringLatin1.canEncode(codePoint)) {
4877             return new String(StringLatin1.toBytes((char)codePoint), LATIN1);
4878         } else if (Character.isBmpCodePoint(codePoint)) {
4879             return new String(StringUTF16.toBytes((char)codePoint), UTF16);
4880         } else if (Character.isSupplementaryCodePoint(codePoint)) {
4881             return new String(StringUTF16.toBytesSupplementary(codePoint), UTF16);
4882         }
4883 
4884         throw new IllegalArgumentException(
4885             format("Not a valid Unicode code point: 0x%X", codePoint));
4886     }
4887 
4888     /**
4889      * Returns an {@link Optional} containing the nominal descriptor for this
4890      * instance, which is the instance itself.
4891      *
4892      * @return an {@link Optional} describing the {@linkplain String} instance
4893      * @since 12
4894      */
4895     @Override
4896     public Optional<String> describeConstable() {
4897         return Optional.of(this);
4898     }
4899 
4900     /**
4901      * Resolves this instance as a {@link ConstantDesc}, the result of which is
4902      * the instance itself.
4903      *
4904      * @param lookup ignored
4905      * @return the {@linkplain String} instance
4906      * @since 12
4907      */
4908     @Override
4909     public String resolveConstantDesc(MethodHandles.Lookup lookup) {
4910         return this;
4911     }
4912 
4913 }