< prev index next >

src/java.base/share/classes/java/lang/String.java

Print this page




  58  * The {@code String} class represents character strings. All
  59  * string literals in Java programs, such as {@code "abc"}, are
  60  * implemented as instances of this class.
  61  * <p>
  62  * Strings are constant; their values cannot be changed after they
  63  * are created. String buffers support mutable strings.
  64  * Because String objects are immutable they can be shared. For example:
  65  * <blockquote><pre>
  66  *     String str = "abc";
  67  * </pre></blockquote><p>
  68  * is equivalent to:
  69  * <blockquote><pre>
  70  *     char data[] = {'a', 'b', 'c'};
  71  *     String str = new String(data);
  72  * </pre></blockquote><p>
  73  * Here are some more examples of how strings can be used:
  74  * <blockquote><pre>
  75  *     System.out.println("abc");
  76  *     String cde = "cde";
  77  *     System.out.println("abc" + cde);
  78  *     String c = "abc".substring(2, 3);
  79  *     String d = cde.substring(1, 2);
  80  * </pre></blockquote>
  81  * <p>
  82  * The class {@code String} includes methods for examining
  83  * individual characters of the sequence, for comparing strings, for
  84  * searching strings, for extracting substrings, and for creating a
  85  * copy of a string with all characters translated to uppercase or to
  86  * lowercase. Case mapping is based on the Unicode Standard version
  87  * specified by the {@link java.lang.Character Character} class.
  88  * <p>
  89  * The Java language provides special support for the string
  90  * concatenation operator (&nbsp;+&nbsp;), and for conversion of
  91  * other objects to strings. For additional information on string
  92  * concatenation and conversion, see <i>The Java&trade; Language Specification</i>.
  93  *
  94  * <p> Unless otherwise noted, passing a {@code null} argument to a constructor
  95  * or method in this class will cause a {@link NullPointerException} to be
  96  * thrown.
  97  *
  98  * <p>A {@code String} represents a string in the UTF-16 format


 147      */
 148     @Stable
 149     private final byte[] value;
 150 
 151     /**
 152      * The identifier of the encoding used to encode the bytes in
 153      * {@code value}. The supported values in this implementation are
 154      *
 155      * LATIN1
 156      * UTF16
 157      *
 158      * @implNote This field is trusted by the VM, and is a subject to
 159      * constant folding if String instance is constant. Overwriting this
 160      * field after construction will cause problems.
 161      */
 162     private final byte coder;
 163 
 164     /** Cache the hash code for the string */
 165     private int hash; // Default to 0
 166 
 167     /**
 168      * Cache if the hash has been calculated as actually being zero, enabling
 169      * us to avoid recalculating this.
 170      */
 171     private boolean hashIsZero; // Default to false;
 172 
 173     /** use serialVersionUID from JDK 1.0.2 for interoperability */
 174     private static final long serialVersionUID = -6849794470754667710L;
 175 
 176     /**
 177      * If String compaction is disabled, the bytes in {@code value} are
 178      * always encoded in UTF16.
 179      *
 180      * For methods with several possible implementation paths, when String
 181      * compaction is disabled, only one code path is taken.
 182      *
 183      * The instance field value is generally opaque to optimizing JIT
 184      * compilers. Therefore, in performance-sensitive place, an explicit
 185      * check of the static boolean {@code COMPACT_STRINGS} is done first
 186      * before checking the {@code coder} field since the static boolean
 187      * {@code COMPACT_STRINGS} would be constant folded away by an
 188      * optimizing JIT compiler. The idioms for these cases are as follows.
 189      *
 190      * For code such as:
 191      *
 192      *    if (coder == LATIN1) { ... }


1003      * object.
1004      *
1005      * <p>For finer-grained String comparison, refer to
1006      * {@link java.text.Collator}.
1007      *
1008      * @param  anObject
1009      *         The object to compare this {@code String} against
1010      *
1011      * @return  {@code true} if the given object represents a {@code String}
1012      *          equivalent to this string, {@code false} otherwise
1013      *
1014      * @see  #compareTo(String)
1015      * @see  #equalsIgnoreCase(String)
1016      */
1017     public boolean equals(Object anObject) {
1018         if (this == anObject) {
1019             return true;
1020         }
1021         if (anObject instanceof String) {
1022             String aString = (String)anObject;
1023             if (!COMPACT_STRINGS || this.coder == aString.coder) {
1024                 return StringLatin1.equals(value, aString.value);

1025             }
1026         }
1027         return false;
1028     }
1029 
1030     /**
1031      * Compares this string to the specified {@code StringBuffer}.  The result
1032      * is {@code true} if and only if this {@code String} represents the same
1033      * sequence of characters as the specified {@code StringBuffer}. This method
1034      * synchronizes on the {@code StringBuffer}.
1035      *
1036      * <p>For finer-grained String comparison, refer to
1037      * {@link java.text.Collator}.
1038      *
1039      * @param  sb
1040      *         The {@code StringBuffer} to compare this {@code String} against
1041      *
1042      * @return  {@code true} if this {@code String} represents the same
1043      *          sequence of characters as the specified {@code StringBuffer},
1044      *          {@code false} otherwise
1045      *
1046      * @since  1.4
1047      */
1048     public boolean contentEquals(StringBuffer sb) {
1049         return contentEquals((CharSequence)sb);
1050     }
1051 
1052     private boolean nonSyncContentEquals(AbstractStringBuilder sb) {
1053         int len = length();
1054         if (len != sb.length()) {
1055             return false;
1056         }
1057         byte v1[] = value;
1058         byte v2[] = sb.getValue();
1059         byte coder = coder();
1060         if (coder == sb.getCoder()) {
1061             int n = v1.length;
1062             for (int i = 0; i < n; i++) {
1063                 if (v1[i] != v2[i]) {
1064                     return false;
1065                 }
1066             }
1067         } else {
1068             if (coder != LATIN1) {  // utf16 str and latin1 abs can never be "equal"
1069                 return false;
1070             }
1071             return StringUTF16.contentEquals(v1, v2, len);
1072         }
1073         return true;
1074     }
1075 
1076     /**
1077      * Compares this string to the specified {@code CharSequence}.  The
1078      * result is {@code true} if and only if this {@code String} represents the
1079      * same sequence of char values as the specified sequence. Note that if the
1080      * {@code CharSequence} is a {@code StringBuffer} then the method
1081      * synchronizes on it.
1082      *
1083      * <p>For finer-grained String comparison, refer to
1084      * {@link java.text.Collator}.
1085      *
1086      * @param  cs
1087      *         The sequence to compare this {@code String} against
1088      *


1192      * string lexicographically precedes the longer string. In this case,
1193      * {@code compareTo} returns the difference of the lengths of the
1194      * strings -- that is, the value:
1195      * <blockquote><pre>
1196      * this.length()-anotherString.length()
1197      * </pre></blockquote>
1198      *
1199      * <p>For finer-grained String comparison, refer to
1200      * {@link java.text.Collator}.
1201      *
1202      * @param   anotherString   the {@code String} to be compared.
1203      * @return  the value {@code 0} if the argument string is equal to
1204      *          this string; a value less than {@code 0} if this string
1205      *          is lexicographically less than the string argument; and a
1206      *          value greater than {@code 0} if this string is
1207      *          lexicographically greater than the string argument.
1208      */
1209     public int compareTo(String anotherString) {
1210         byte v1[] = value;
1211         byte v2[] = anotherString.value;
1212         byte coder = coder();
1213         if (coder == anotherString.coder()) {
1214             return coder == LATIN1 ? StringLatin1.compareTo(v1, v2)
1215                                    : StringUTF16.compareTo(v1, v2);
1216         }
1217         return coder == LATIN1 ? StringLatin1.compareToUTF16(v1, v2)
1218                                : StringUTF16.compareToLatin1(v1, v2);
1219      }
1220 
1221     /**
1222      * A Comparator that orders {@code String} objects as by
1223      * {@code compareToIgnoreCase}. This comparator is serializable.
1224      * <p>
1225      * Note that this Comparator does <em>not</em> take locale into account,
1226      * and will result in an unsatisfactory ordering for certain locales.
1227      * The {@link java.text.Collator} class provides locale-sensitive comparison.
1228      *
1229      * @see     java.text.Collator
1230      * @since   1.2
1231      */
1232     public static final Comparator<String> CASE_INSENSITIVE_ORDER
1233                                          = new CaseInsensitiveComparator();
1234     private static class CaseInsensitiveComparator
1235             implements Comparator<String>, java.io.Serializable {
1236         // use serialVersionUID from JDK 1.2.2 for interoperability
1237         private static final long serialVersionUID = 8575799808933029326L;
1238 
1239         public int compare(String s1, String s2) {
1240             byte v1[] = s1.value;
1241             byte v2[] = s2.value;
1242             byte coder = s1.coder();
1243             if (coder == s2.coder()) {
1244                 return coder == LATIN1 ? StringLatin1.compareToCI(v1, v2)
1245                                        : StringUTF16.compareToCI(v1, v2);
1246             }
1247             return coder == LATIN1 ? StringLatin1.compareToCI_UTF16(v1, v2)
1248                                    : StringUTF16.compareToCI_Latin1(v1, v2);
1249         }
1250 
1251         /** Replaces the de-serialized object. */
1252         private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
1253     }
1254 
1255     /**
1256      * Compares two strings lexicographically, ignoring case
1257      * differences. This method returns an integer whose sign is that of
1258      * calling {@code compareTo} with normalized versions of the strings
1259      * where case differences have been eliminated by calling
1260      * {@code Character.toLowerCase(Character.toUpperCase(character))} on
1261      * each character.
1262      * <p>
1263      * Note that this method does <em>not</em> take locale into account,
1264      * and will result in an unsatisfactory ordering for certain locales.
1265      * The {@link java.text.Collator} class provides locale-sensitive comparison.
1266      *
1267      * @param   str   the {@code String} to be compared.
1268      * @return  a negative integer, zero, or a positive integer as the


1302      * {@link java.text.Collator} class provides locale-sensitive comparison.
1303      *
1304      * @param   toffset   the starting offset of the subregion in this string.
1305      * @param   other     the string argument.
1306      * @param   ooffset   the starting offset of the subregion in the string
1307      *                    argument.
1308      * @param   len       the number of characters to compare.
1309      * @return  {@code true} if the specified subregion of this string
1310      *          exactly matches the specified subregion of the string argument;
1311      *          {@code false} otherwise.
1312      */
1313     public boolean regionMatches(int toffset, String other, int ooffset, int len) {
1314         byte tv[] = value;
1315         byte ov[] = other.value;
1316         // Note: toffset, ooffset, or len might be near -1>>>1.
1317         if ((ooffset < 0) || (toffset < 0) ||
1318              (toffset > (long)length() - len) ||
1319              (ooffset > (long)other.length() - len)) {
1320             return false;
1321         }
1322         byte coder = coder();
1323         if (coder == other.coder()) {
1324             if (!isLatin1() && (len > 0)) {
1325                 toffset = toffset << 1;
1326                 ooffset = ooffset << 1;
1327                 len = len << 1;
1328             }
1329             while (len-- > 0) {
1330                 if (tv[toffset++] != ov[ooffset++]) {
1331                     return false;
1332                 }
1333             }
1334         } else {
1335             if (coder == LATIN1) {
1336                 while (len-- > 0) {
1337                     if (StringLatin1.getChar(tv, toffset++) !=
1338                         StringUTF16.getChar(ov, ooffset++)) {
1339                         return false;
1340                     }
1341                 }
1342             } else {
1343                 while (len-- > 0) {
1344                     if (StringUTF16.getChar(tv, toffset++) !=
1345                         StringLatin1.getChar(ov, ooffset++)) {
1346                         return false;
1347                     }
1348                 }
1349             }
1350         }
1351         return true;
1352     }
1353 
1354     /**
1355      * Tests if two string regions are equal.


1397      * @param   len          the number of characters to compare.
1398      * @return  {@code true} if the specified subregion of this string
1399      *          matches the specified subregion of the string argument;
1400      *          {@code false} otherwise. Whether the matching is exact
1401      *          or case insensitive depends on the {@code ignoreCase}
1402      *          argument.
1403      */
1404     public boolean regionMatches(boolean ignoreCase, int toffset,
1405             String other, int ooffset, int len) {
1406         if (!ignoreCase) {
1407             return regionMatches(toffset, other, ooffset, len);
1408         }
1409         // Note: toffset, ooffset, or len might be near -1>>>1.
1410         if ((ooffset < 0) || (toffset < 0)
1411                 || (toffset > (long)length() - len)
1412                 || (ooffset > (long)other.length() - len)) {
1413             return false;
1414         }
1415         byte tv[] = value;
1416         byte ov[] = other.value;
1417         byte coder = coder();
1418         if (coder == other.coder()) {
1419             return coder == LATIN1
1420               ? StringLatin1.regionMatchesCI(tv, toffset, ov, ooffset, len)
1421               : StringUTF16.regionMatchesCI(tv, toffset, ov, ooffset, len);
1422         }
1423         return coder == LATIN1
1424               ? StringLatin1.regionMatchesCI_UTF16(tv, toffset, ov, ooffset, len)
1425               : StringUTF16.regionMatchesCI_Latin1(tv, toffset, ov, ooffset, len);
1426     }
1427 
1428     /**
1429      * Tests if the substring of this string beginning at the
1430      * specified index starts with the specified prefix.
1431      *
1432      * @param   prefix    the prefix.
1433      * @param   toffset   where to begin looking in this string.
1434      * @return  {@code true} if the character sequence represented by the
1435      *          argument is a prefix of the substring of this object starting
1436      *          at index {@code toffset}; {@code false} otherwise.
1437      *          The result is {@code false} if {@code toffset} is
1438      *          negative or greater than the length of this
1439      *          {@code String} object; otherwise the result is the same
1440      *          as the result of the expression
1441      *          <pre>
1442      *          this.substring(toffset).startsWith(prefix)
1443      *          </pre>
1444      */
1445     public boolean startsWith(String prefix, int toffset) {
1446         // Note: toffset might be near -1>>>1.
1447         if (toffset < 0 || toffset > length() - prefix.length()) {
1448             return false;
1449         }
1450         byte ta[] = value;
1451         byte pa[] = prefix.value;
1452         int po = 0;
1453         int pc = pa.length;
1454         byte coder = coder();
1455         if (coder == prefix.coder()) {
1456             int to = (coder == LATIN1) ? toffset : toffset << 1;
1457             while (po < pc) {
1458                 if (ta[to++] != pa[po++]) {
1459                     return false;
1460                 }
1461             }
1462         } else {
1463             if (coder == LATIN1) {  // && pcoder == UTF16
1464                 return false;
1465             }
1466             // coder == UTF16 && pcoder == LATIN1)
1467             while (po < pc) {
1468                 if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) {
1469                     return false;
1470                }
1471             }
1472         }
1473         return true;
1474     }
1475 
1476     /**
1477      * Tests if this string starts with the specified prefix.
1478      *
1479      * @param   prefix   the prefix.
1480      * @return  {@code true} if the character sequence represented by the
1481      *          argument is a prefix of the character sequence represented by
1482      *          this string; {@code false} otherwise.
1483      *          Note also that {@code true} will be returned if the


1502      *          as determined by the {@link #equals(Object)} method.
1503      */
1504     public boolean endsWith(String suffix) {
1505         return startsWith(suffix, length() - suffix.length());
1506     }
1507 
1508     /**
1509      * Returns a hash code for this string. The hash code for a
1510      * {@code String} object is computed as
1511      * <blockquote><pre>
1512      * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
1513      * </pre></blockquote>
1514      * using {@code int} arithmetic, where {@code s[i]} is the
1515      * <i>i</i>th character of the string, {@code n} is the length of
1516      * the string, and {@code ^} indicates exponentiation.
1517      * (The hash value of the empty string is zero.)
1518      *
1519      * @return  a hash code value for this object.
1520      */
1521     public int hashCode() {
1522         // The hash or hashIsZero fields are subject to a benign data race,
1523         // making it crucial to ensure that any observable result of the
1524         // calculation in this method stays correct under any possible read of
1525         // these fields. Necessary restrictions to allow this to be correct
1526         // without explicit memory fences or similar concurrency primitives is
1527         // that we can ever only write to one of these two fields for a given
1528         // String instance, and that the computation is idempotent and derived
1529         // from immutable state
1530         int h = hash;
1531         if (h == 0 && !hashIsZero) {
1532             h = isLatin1() ? StringLatin1.hashCode(value)
1533                            : StringUTF16.hashCode(value);
1534             if (h == 0) {
1535                 hashIsZero = true;
1536             } else {

1537                 hash = h;
1538             }
1539         }
1540         return h;
1541     }
1542 
1543     /**
1544      * Returns the index within this string of the first occurrence of
1545      * the specified character. If a character with value
1546      * {@code ch} occurs in the character sequence represented by
1547      * this {@code String} object, then the index (in Unicode
1548      * code units) of the first such occurrence is returned. For
1549      * values of {@code ch} in the range from 0 to 0xFFFF
1550      * (inclusive), this is the smallest value <i>k</i> such that:
1551      * <blockquote><pre>
1552      * this.charAt(<i>k</i>) == ch
1553      * </pre></blockquote>
1554      * is true. For other values of {@code ch}, it is the
1555      * smallest value <i>k</i> such that:
1556      * <blockquote><pre>


1676     public int lastIndexOf(int ch, int fromIndex) {
1677         return isLatin1() ? StringLatin1.lastIndexOf(value, ch, fromIndex)
1678                           : StringUTF16.lastIndexOf(value, ch, fromIndex);
1679     }
1680 
1681     /**
1682      * Returns the index within this string of the first occurrence of the
1683      * specified substring.
1684      *
1685      * <p>The returned index is the smallest value {@code k} for which:
1686      * <pre>{@code
1687      * this.startsWith(str, k)
1688      * }</pre>
1689      * If no such value of {@code k} exists, then {@code -1} is returned.
1690      *
1691      * @param   str   the substring to search for.
1692      * @return  the index of the first occurrence of the specified substring,
1693      *          or {@code -1} if there is no such occurrence.
1694      */
1695     public int indexOf(String str) {
1696         byte coder = coder();
1697         if (coder == str.coder()) {
1698             return isLatin1() ? StringLatin1.indexOf(value, str.value)
1699                               : StringUTF16.indexOf(value, str.value);
1700         }
1701         if (coder == LATIN1) {  // str.coder == UTF16
1702             return -1;
1703         }
1704         return StringUTF16.indexOfLatin1(value, str.value);
1705     }
1706 
1707     /**
1708      * Returns the index within this string of the first occurrence of the
1709      * specified substring, starting at the specified index.
1710      *
1711      * <p>The returned index is the smallest value {@code k} for which:
1712      * <pre>{@code
1713      *     k >= Math.min(fromIndex, this.length()) &&
1714      *                   this.startsWith(str, k)
1715      * }</pre>
1716      * If no such value of {@code k} exists, then {@code -1} is returned.
1717      *
1718      * @param   str         the substring to search for.
1719      * @param   fromIndex   the index from which to start the search.
1720      * @return  the index of the first occurrence of the specified substring,
1721      *          starting at the specified index,


1954      * {@code String} object is returned. Otherwise, a
1955      * {@code String} object is returned that represents a character
1956      * sequence that is the concatenation of the character sequence
1957      * represented by this {@code String} object and the character
1958      * sequence represented by the argument string.<p>
1959      * Examples:
1960      * <blockquote><pre>
1961      * "cares".concat("s") returns "caress"
1962      * "to".concat("get").concat("her") returns "together"
1963      * </pre></blockquote>
1964      *
1965      * @param   str   the {@code String} that is concatenated to the end
1966      *                of this {@code String}.
1967      * @return  a string that represents the concatenation of this object's
1968      *          characters followed by the string argument's characters.
1969      */
1970     public String concat(String str) {
1971         if (str.isEmpty()) {
1972             return this;
1973         }
1974         return StringConcatHelper.simpleConcat(this, str);













1975     }
1976 
1977     /**
1978      * Returns a string resulting from replacing all occurrences of
1979      * {@code oldChar} in this string with {@code newChar}.
1980      * <p>
1981      * If the character {@code oldChar} does not occur in the
1982      * character sequence represented by this {@code String} object,
1983      * then a reference to this {@code String} object is returned.
1984      * Otherwise, a {@code String} object is returned that
1985      * represents a character sequence identical to the character sequence
1986      * represented by this {@code String} object, except that every
1987      * occurrence of {@code oldChar} is replaced by an occurrence
1988      * of {@code newChar}.
1989      * <p>
1990      * Examples:
1991      * <blockquote><pre>
1992      * "mesquite in your cellar".replace('e', 'o')
1993      *         returns "mosquito in your collar"
1994      * "the war of baronets".replace('r', 'y')


2143      * @since 1.4
2144      * @spec JSR-51
2145      */
2146     public String replaceAll(String regex, String replacement) {
2147         return Pattern.compile(regex).matcher(this).replaceAll(replacement);
2148     }
2149 
2150     /**
2151      * Replaces each substring of this string that matches the literal target
2152      * sequence with the specified literal replacement sequence. The
2153      * replacement proceeds from the beginning of the string to the end, for
2154      * example, replacing "aa" with "b" in the string "aaa" will result in
2155      * "ba" rather than "ab".
2156      *
2157      * @param  target The sequence of char values to be replaced
2158      * @param  replacement The replacement sequence of char values
2159      * @return  The resulting string
2160      * @since 1.5
2161      */
2162     public String replace(CharSequence target, CharSequence replacement) {
2163         String trgtStr = target.toString();
2164         String replStr = replacement.toString();
2165         int thisLen = length();
2166         int trgtLen = trgtStr.length();
2167         int replLen = replStr.length();
2168 
2169         if (trgtLen > 0) {
2170             if (trgtLen == 1 && replLen == 1) {
2171                 return replace(trgtStr.charAt(0), replStr.charAt(0));
2172             }
2173 
2174             boolean thisIsLatin1 = this.isLatin1();
2175             boolean trgtIsLatin1 = trgtStr.isLatin1();
2176             boolean replIsLatin1 = replStr.isLatin1();
2177             String ret = (thisIsLatin1 && trgtIsLatin1 && replIsLatin1)
2178                     ? StringLatin1.replace(value, thisLen,
2179                                            trgtStr.value, trgtLen,
2180                                            replStr.value, replLen)
2181                     : StringUTF16.replace(value, thisLen, thisIsLatin1,
2182                                           trgtStr.value, trgtLen, trgtIsLatin1,
2183                                           replStr.value, replLen, replIsLatin1);
2184             if (ret != null) {
2185                 return ret;
2186             }
2187             return this;
2188 
2189         } else { // trgtLen == 0
2190             int resultLen;
2191             try {
2192                 resultLen = Math.addExact(thisLen, Math.multiplyExact(
2193                         Math.addExact(thisLen, 1), replLen));
2194             } catch (ArithmeticException ignored) {
2195                 throw new OutOfMemoryError();
2196             }
2197 
2198             StringBuilder sb = new StringBuilder(resultLen);
2199             sb.append(replStr);
2200             for (int i = 0; i < thisLen; ++i) {
2201                 sb.append(charAt(i)).append(replStr);
2202             }
2203             return sb.toString();
2204         }















2205     }
2206 
2207     /**
2208      * Splits this string around matches of the given
2209      * <a href="../util/regex/Pattern.html#sum">regular expression</a>.
2210      *
2211      * <p> The array returned by this method contains each substring of this
2212      * string that is terminated by another substring that matches the given
2213      * expression or is terminated by the end of the string.  The substrings in
2214      * the array are in the order in which they occur in this string.  If the
2215      * expression does not match any part of the input then the resulting array
2216      * has just one element, namely this string.
2217      *
2218      * <p> When there is a positive-width match at the beginning of this
2219      * string then an empty leading substring is included at the beginning
2220      * of the resulting array. A zero-width match at the beginning however
2221      * never produces such empty leading substring.
2222      *
2223      * <p> The {@code limit} parameter controls the number of times the
2224      * pattern is applied and therefore affects the length of the resulting




  58  * The {@code String} class represents character strings. All
  59  * string literals in Java programs, such as {@code "abc"}, are
  60  * implemented as instances of this class.
  61  * <p>
  62  * Strings are constant; their values cannot be changed after they
  63  * are created. String buffers support mutable strings.
  64  * Because String objects are immutable they can be shared. For example:
  65  * <blockquote><pre>
  66  *     String str = "abc";
  67  * </pre></blockquote><p>
  68  * is equivalent to:
  69  * <blockquote><pre>
  70  *     char data[] = {'a', 'b', 'c'};
  71  *     String str = new String(data);
  72  * </pre></blockquote><p>
  73  * Here are some more examples of how strings can be used:
  74  * <blockquote><pre>
  75  *     System.out.println("abc");
  76  *     String cde = "cde";
  77  *     System.out.println("abc" + cde);
  78  *     String c = "abc".substring(2,3);
  79  *     String d = cde.substring(1, 2);
  80  * </pre></blockquote>
  81  * <p>
  82  * The class {@code String} includes methods for examining
  83  * individual characters of the sequence, for comparing strings, for
  84  * searching strings, for extracting substrings, and for creating a
  85  * copy of a string with all characters translated to uppercase or to
  86  * lowercase. Case mapping is based on the Unicode Standard version
  87  * specified by the {@link java.lang.Character Character} class.
  88  * <p>
  89  * The Java language provides special support for the string
  90  * concatenation operator (&nbsp;+&nbsp;), and for conversion of
  91  * other objects to strings. For additional information on string
  92  * concatenation and conversion, see <i>The Java&trade; Language Specification</i>.
  93  *
  94  * <p> Unless otherwise noted, passing a {@code null} argument to a constructor
  95  * or method in this class will cause a {@link NullPointerException} to be
  96  * thrown.
  97  *
  98  * <p>A {@code String} represents a string in the UTF-16 format


 147      */
 148     @Stable
 149     private final byte[] value;
 150 
 151     /**
 152      * The identifier of the encoding used to encode the bytes in
 153      * {@code value}. The supported values in this implementation are
 154      *
 155      * LATIN1
 156      * UTF16
 157      *
 158      * @implNote This field is trusted by the VM, and is a subject to
 159      * constant folding if String instance is constant. Overwriting this
 160      * field after construction will cause problems.
 161      */
 162     private final byte coder;
 163 
 164     /** Cache the hash code for the string */
 165     private int hash; // Default to 0
 166 






 167     /** use serialVersionUID from JDK 1.0.2 for interoperability */
 168     private static final long serialVersionUID = -6849794470754667710L;
 169 
 170     /**
 171      * If String compaction is disabled, the bytes in {@code value} are
 172      * always encoded in UTF16.
 173      *
 174      * For methods with several possible implementation paths, when String
 175      * compaction is disabled, only one code path is taken.
 176      *
 177      * The instance field value is generally opaque to optimizing JIT
 178      * compilers. Therefore, in performance-sensitive place, an explicit
 179      * check of the static boolean {@code COMPACT_STRINGS} is done first
 180      * before checking the {@code coder} field since the static boolean
 181      * {@code COMPACT_STRINGS} would be constant folded away by an
 182      * optimizing JIT compiler. The idioms for these cases are as follows.
 183      *
 184      * For code such as:
 185      *
 186      *    if (coder == LATIN1) { ... }


 997      * object.
 998      *
 999      * <p>For finer-grained String comparison, refer to
1000      * {@link java.text.Collator}.
1001      *
1002      * @param  anObject
1003      *         The object to compare this {@code String} against
1004      *
1005      * @return  {@code true} if the given object represents a {@code String}
1006      *          equivalent to this string, {@code false} otherwise
1007      *
1008      * @see  #compareTo(String)
1009      * @see  #equalsIgnoreCase(String)
1010      */
1011     public boolean equals(Object anObject) {
1012         if (this == anObject) {
1013             return true;
1014         }
1015         if (anObject instanceof String) {
1016             String aString = (String)anObject;
1017             if (coder() == aString.coder()) {
1018                 return isLatin1() ? StringLatin1.equals(value, aString.value)
1019                                   : StringUTF16.equals(value, aString.value);
1020             }
1021         }
1022         return false;
1023     }
1024 
1025     /**
1026      * Compares this string to the specified {@code StringBuffer}.  The result
1027      * is {@code true} if and only if this {@code String} represents the same
1028      * sequence of characters as the specified {@code StringBuffer}. This method
1029      * synchronizes on the {@code StringBuffer}.
1030      *
1031      * <p>For finer-grained String comparison, refer to
1032      * {@link java.text.Collator}.
1033      *
1034      * @param  sb
1035      *         The {@code StringBuffer} to compare this {@code String} against
1036      *
1037      * @return  {@code true} if this {@code String} represents the same
1038      *          sequence of characters as the specified {@code StringBuffer},
1039      *          {@code false} otherwise
1040      *
1041      * @since  1.4
1042      */
1043     public boolean contentEquals(StringBuffer sb) {
1044         return contentEquals((CharSequence)sb);
1045     }
1046 
1047     private boolean nonSyncContentEquals(AbstractStringBuilder sb) {
1048         int len = length();
1049         if (len != sb.length()) {
1050             return false;
1051         }
1052         byte v1[] = value;
1053         byte v2[] = sb.getValue();
1054         if (coder() == sb.getCoder()) {

1055             int n = v1.length;
1056             for (int i = 0; i < n; i++) {
1057                 if (v1[i] != v2[i]) {
1058                     return false;
1059                 }
1060             }
1061         } else {
1062             if (!isLatin1()) {  // utf16 str and latin1 abs can never be "equal"
1063                 return false;
1064             }
1065             return StringUTF16.contentEquals(v1, v2, len);
1066         }
1067         return true;
1068     }
1069 
1070     /**
1071      * Compares this string to the specified {@code CharSequence}.  The
1072      * result is {@code true} if and only if this {@code String} represents the
1073      * same sequence of char values as the specified sequence. Note that if the
1074      * {@code CharSequence} is a {@code StringBuffer} then the method
1075      * synchronizes on it.
1076      *
1077      * <p>For finer-grained String comparison, refer to
1078      * {@link java.text.Collator}.
1079      *
1080      * @param  cs
1081      *         The sequence to compare this {@code String} against
1082      *


1186      * string lexicographically precedes the longer string. In this case,
1187      * {@code compareTo} returns the difference of the lengths of the
1188      * strings -- that is, the value:
1189      * <blockquote><pre>
1190      * this.length()-anotherString.length()
1191      * </pre></blockquote>
1192      *
1193      * <p>For finer-grained String comparison, refer to
1194      * {@link java.text.Collator}.
1195      *
1196      * @param   anotherString   the {@code String} to be compared.
1197      * @return  the value {@code 0} if the argument string is equal to
1198      *          this string; a value less than {@code 0} if this string
1199      *          is lexicographically less than the string argument; and a
1200      *          value greater than {@code 0} if this string is
1201      *          lexicographically greater than the string argument.
1202      */
1203     public int compareTo(String anotherString) {
1204         byte v1[] = value;
1205         byte v2[] = anotherString.value;
1206         if (coder() == anotherString.coder()) {
1207             return isLatin1() ? StringLatin1.compareTo(v1, v2)
1208                               : StringUTF16.compareTo(v1, v2);

1209         }
1210         return isLatin1() ? StringLatin1.compareToUTF16(v1, v2)
1211                           : StringUTF16.compareToLatin1(v1, v2);
1212      }
1213 
1214     /**
1215      * A Comparator that orders {@code String} objects as by
1216      * {@code compareToIgnoreCase}. This comparator is serializable.
1217      * <p>
1218      * Note that this Comparator does <em>not</em> take locale into account,
1219      * and will result in an unsatisfactory ordering for certain locales.
1220      * The {@link java.text.Collator} class provides locale-sensitive comparison.
1221      *
1222      * @see     java.text.Collator
1223      * @since   1.2
1224      */
1225     public static final Comparator<String> CASE_INSENSITIVE_ORDER
1226                                          = new CaseInsensitiveComparator();
1227     private static class CaseInsensitiveComparator
1228             implements Comparator<String>, java.io.Serializable {
1229         // use serialVersionUID from JDK 1.2.2 for interoperability
1230         private static final long serialVersionUID = 8575799808933029326L;
1231 
1232         public int compare(String s1, String s2) {
1233             byte v1[] = s1.value;
1234             byte v2[] = s2.value;
1235             if (s1.coder() == s2.coder()) {
1236                 return s1.isLatin1() ? StringLatin1.compareToCI(v1, v2)
1237                                      : StringUTF16.compareToCI(v1, v2);

1238             }
1239             return s1.isLatin1() ? StringLatin1.compareToCI_UTF16(v1, v2)
1240                                  : StringUTF16.compareToCI_Latin1(v1, v2);
1241         }
1242 
1243         /** Replaces the de-serialized object. */
1244         private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
1245     }
1246 
1247     /**
1248      * Compares two strings lexicographically, ignoring case
1249      * differences. This method returns an integer whose sign is that of
1250      * calling {@code compareTo} with normalized versions of the strings
1251      * where case differences have been eliminated by calling
1252      * {@code Character.toLowerCase(Character.toUpperCase(character))} on
1253      * each character.
1254      * <p>
1255      * Note that this method does <em>not</em> take locale into account,
1256      * and will result in an unsatisfactory ordering for certain locales.
1257      * The {@link java.text.Collator} class provides locale-sensitive comparison.
1258      *
1259      * @param   str   the {@code String} to be compared.
1260      * @return  a negative integer, zero, or a positive integer as the


1294      * {@link java.text.Collator} class provides locale-sensitive comparison.
1295      *
1296      * @param   toffset   the starting offset of the subregion in this string.
1297      * @param   other     the string argument.
1298      * @param   ooffset   the starting offset of the subregion in the string
1299      *                    argument.
1300      * @param   len       the number of characters to compare.
1301      * @return  {@code true} if the specified subregion of this string
1302      *          exactly matches the specified subregion of the string argument;
1303      *          {@code false} otherwise.
1304      */
1305     public boolean regionMatches(int toffset, String other, int ooffset, int len) {
1306         byte tv[] = value;
1307         byte ov[] = other.value;
1308         // Note: toffset, ooffset, or len might be near -1>>>1.
1309         if ((ooffset < 0) || (toffset < 0) ||
1310              (toffset > (long)length() - len) ||
1311              (ooffset > (long)other.length() - len)) {
1312             return false;
1313         }
1314         if (coder() == other.coder()) {

1315             if (!isLatin1() && (len > 0)) {
1316                 toffset = toffset << 1;
1317                 ooffset = ooffset << 1;
1318                 len = len << 1;
1319             }
1320             while (len-- > 0) {
1321                 if (tv[toffset++] != ov[ooffset++]) {
1322                     return false;
1323                 }
1324             }
1325         } else {
1326             if (coder() == LATIN1) {
1327                 while (len-- > 0) {
1328                     if (StringLatin1.getChar(tv, toffset++) !=
1329                         StringUTF16.getChar(ov, ooffset++)) {
1330                         return false;
1331                     }
1332                 }
1333             } else {
1334                 while (len-- > 0) {
1335                     if (StringUTF16.getChar(tv, toffset++) !=
1336                         StringLatin1.getChar(ov, ooffset++)) {
1337                         return false;
1338                     }
1339                 }
1340             }
1341         }
1342         return true;
1343     }
1344 
1345     /**
1346      * Tests if two string regions are equal.


1388      * @param   len          the number of characters to compare.
1389      * @return  {@code true} if the specified subregion of this string
1390      *          matches the specified subregion of the string argument;
1391      *          {@code false} otherwise. Whether the matching is exact
1392      *          or case insensitive depends on the {@code ignoreCase}
1393      *          argument.
1394      */
1395     public boolean regionMatches(boolean ignoreCase, int toffset,
1396             String other, int ooffset, int len) {
1397         if (!ignoreCase) {
1398             return regionMatches(toffset, other, ooffset, len);
1399         }
1400         // Note: toffset, ooffset, or len might be near -1>>>1.
1401         if ((ooffset < 0) || (toffset < 0)
1402                 || (toffset > (long)length() - len)
1403                 || (ooffset > (long)other.length() - len)) {
1404             return false;
1405         }
1406         byte tv[] = value;
1407         byte ov[] = other.value;
1408         if (coder() == other.coder()) {
1409             return isLatin1()

1410               ? StringLatin1.regionMatchesCI(tv, toffset, ov, ooffset, len)
1411               : StringUTF16.regionMatchesCI(tv, toffset, ov, ooffset, len);
1412         }
1413         return isLatin1()
1414               ? StringLatin1.regionMatchesCI_UTF16(tv, toffset, ov, ooffset, len)
1415               : StringUTF16.regionMatchesCI_Latin1(tv, toffset, ov, ooffset, len);
1416     }
1417 
1418     /**
1419      * Tests if the substring of this string beginning at the
1420      * specified index starts with the specified prefix.
1421      *
1422      * @param   prefix    the prefix.
1423      * @param   toffset   where to begin looking in this string.
1424      * @return  {@code true} if the character sequence represented by the
1425      *          argument is a prefix of the substring of this object starting
1426      *          at index {@code toffset}; {@code false} otherwise.
1427      *          The result is {@code false} if {@code toffset} is
1428      *          negative or greater than the length of this
1429      *          {@code String} object; otherwise the result is the same
1430      *          as the result of the expression
1431      *          <pre>
1432      *          this.substring(toffset).startsWith(prefix)
1433      *          </pre>
1434      */
1435     public boolean startsWith(String prefix, int toffset) {
1436         // Note: toffset might be near -1>>>1.
1437         if (toffset < 0 || toffset > length() - prefix.length()) {
1438             return false;
1439         }
1440         byte ta[] = value;
1441         byte pa[] = prefix.value;
1442         int po = 0;
1443         int pc = pa.length;
1444         if (coder() == prefix.coder()) {
1445             int to = isLatin1() ? toffset : toffset << 1;

1446             while (po < pc) {
1447                 if (ta[to++] != pa[po++]) {
1448                     return false;
1449                 }
1450             }
1451         } else {
1452             if (isLatin1()) {  // && pcoder == UTF16
1453                 return false;
1454             }
1455             // coder == UTF16 && pcoder == LATIN1)
1456             while (po < pc) {
1457                 if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) {
1458                     return false;
1459                }
1460             }
1461         }
1462         return true;
1463     }
1464 
1465     /**
1466      * Tests if this string starts with the specified prefix.
1467      *
1468      * @param   prefix   the prefix.
1469      * @return  {@code true} if the character sequence represented by the
1470      *          argument is a prefix of the character sequence represented by
1471      *          this string; {@code false} otherwise.
1472      *          Note also that {@code true} will be returned if the


1491      *          as determined by the {@link #equals(Object)} method.
1492      */
1493     public boolean endsWith(String suffix) {
1494         return startsWith(suffix, length() - suffix.length());
1495     }
1496 
1497     /**
1498      * Returns a hash code for this string. The hash code for a
1499      * {@code String} object is computed as
1500      * <blockquote><pre>
1501      * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
1502      * </pre></blockquote>
1503      * using {@code int} arithmetic, where {@code s[i]} is the
1504      * <i>i</i>th character of the string, {@code n} is the length of
1505      * the string, and {@code ^} indicates exponentiation.
1506      * (The hash value of the empty string is zero.)
1507      *
1508      * @return  a hash code value for this object.
1509      */
1510     public int hashCode() {








1511         int h = hash;
1512         if (h == 0 && value.length > 0) {
1513             h = isLatin1() ? StringLatin1.hashCode(value)
1514                            : StringUTF16.hashCode(value);
1515             // Avoid issuing a store if the calculated value is also zero:
1516             // in addition to a minor performance benefit, this allows storing
1517             // Strings with zero hash code in read-only memory.
1518             if (h != 0) {
1519                 hash = h;
1520             }
1521         }
1522         return h;
1523     }
1524 
1525     /**
1526      * Returns the index within this string of the first occurrence of
1527      * the specified character. If a character with value
1528      * {@code ch} occurs in the character sequence represented by
1529      * this {@code String} object, then the index (in Unicode
1530      * code units) of the first such occurrence is returned. For
1531      * values of {@code ch} in the range from 0 to 0xFFFF
1532      * (inclusive), this is the smallest value <i>k</i> such that:
1533      * <blockquote><pre>
1534      * this.charAt(<i>k</i>) == ch
1535      * </pre></blockquote>
1536      * is true. For other values of {@code ch}, it is the
1537      * smallest value <i>k</i> such that:
1538      * <blockquote><pre>


1658     public int lastIndexOf(int ch, int fromIndex) {
1659         return isLatin1() ? StringLatin1.lastIndexOf(value, ch, fromIndex)
1660                           : StringUTF16.lastIndexOf(value, ch, fromIndex);
1661     }
1662 
1663     /**
1664      * Returns the index within this string of the first occurrence of the
1665      * specified substring.
1666      *
1667      * <p>The returned index is the smallest value {@code k} for which:
1668      * <pre>{@code
1669      * this.startsWith(str, k)
1670      * }</pre>
1671      * If no such value of {@code k} exists, then {@code -1} is returned.
1672      *
1673      * @param   str   the substring to search for.
1674      * @return  the index of the first occurrence of the specified substring,
1675      *          or {@code -1} if there is no such occurrence.
1676      */
1677     public int indexOf(String str) {
1678         if (coder() == str.coder()) {

1679             return isLatin1() ? StringLatin1.indexOf(value, str.value)
1680                               : StringUTF16.indexOf(value, str.value);
1681         }
1682         if (coder() == LATIN1) {  // str.coder == UTF16
1683             return -1;
1684         }
1685         return StringUTF16.indexOfLatin1(value, str.value);
1686     }
1687 
1688     /**
1689      * Returns the index within this string of the first occurrence of the
1690      * specified substring, starting at the specified index.
1691      *
1692      * <p>The returned index is the smallest value {@code k} for which:
1693      * <pre>{@code
1694      *     k >= Math.min(fromIndex, this.length()) &&
1695      *                   this.startsWith(str, k)
1696      * }</pre>
1697      * If no such value of {@code k} exists, then {@code -1} is returned.
1698      *
1699      * @param   str         the substring to search for.
1700      * @param   fromIndex   the index from which to start the search.
1701      * @return  the index of the first occurrence of the specified substring,
1702      *          starting at the specified index,


1935      * {@code String} object is returned. Otherwise, a
1936      * {@code String} object is returned that represents a character
1937      * sequence that is the concatenation of the character sequence
1938      * represented by this {@code String} object and the character
1939      * sequence represented by the argument string.<p>
1940      * Examples:
1941      * <blockquote><pre>
1942      * "cares".concat("s") returns "caress"
1943      * "to".concat("get").concat("her") returns "together"
1944      * </pre></blockquote>
1945      *
1946      * @param   str   the {@code String} that is concatenated to the end
1947      *                of this {@code String}.
1948      * @return  a string that represents the concatenation of this object's
1949      *          characters followed by the string argument's characters.
1950      */
1951     public String concat(String str) {
1952         if (str.isEmpty()) {
1953             return this;
1954         }
1955         if (coder() == str.coder()) {
1956             byte[] val = this.value;
1957             byte[] oval = str.value;
1958             int len = val.length + oval.length;
1959             byte[] buf = Arrays.copyOf(val, len);
1960             System.arraycopy(oval, 0, buf, val.length, oval.length);
1961             return new String(buf, coder);
1962         }
1963         int len = length();
1964         int olen = str.length();
1965         byte[] buf = StringUTF16.newBytesFor(len + olen);
1966         getBytes(buf, 0, UTF16);
1967         str.getBytes(buf, len, UTF16);
1968         return new String(buf, UTF16);
1969     }
1970 
1971     /**
1972      * Returns a string resulting from replacing all occurrences of
1973      * {@code oldChar} in this string with {@code newChar}.
1974      * <p>
1975      * If the character {@code oldChar} does not occur in the
1976      * character sequence represented by this {@code String} object,
1977      * then a reference to this {@code String} object is returned.
1978      * Otherwise, a {@code String} object is returned that
1979      * represents a character sequence identical to the character sequence
1980      * represented by this {@code String} object, except that every
1981      * occurrence of {@code oldChar} is replaced by an occurrence
1982      * of {@code newChar}.
1983      * <p>
1984      * Examples:
1985      * <blockquote><pre>
1986      * "mesquite in your cellar".replace('e', 'o')
1987      *         returns "mosquito in your collar"
1988      * "the war of baronets".replace('r', 'y')


2137      * @since 1.4
2138      * @spec JSR-51
2139      */
2140     public String replaceAll(String regex, String replacement) {
2141         return Pattern.compile(regex).matcher(this).replaceAll(replacement);
2142     }
2143 
2144     /**
2145      * Replaces each substring of this string that matches the literal target
2146      * sequence with the specified literal replacement sequence. The
2147      * replacement proceeds from the beginning of the string to the end, for
2148      * example, replacing "aa" with "b" in the string "aaa" will result in
2149      * "ba" rather than "ab".
2150      *
2151      * @param  target The sequence of char values to be replaced
2152      * @param  replacement The replacement sequence of char values
2153      * @return  The resulting string
2154      * @since 1.5
2155      */
2156     public String replace(CharSequence target, CharSequence replacement) {
2157         String tgtStr = target.toString();
2158         String replStr = replacement.toString();
2159         int j = indexOf(tgtStr);
2160         if (j < 0) {




















2161             return this;
















2162         }
2163         int tgtLen = tgtStr.length();
2164         int tgtLen1 = Math.max(tgtLen, 1);
2165         int thisLen = length();
2166 
2167         int newLenHint = thisLen - tgtLen + replStr.length();
2168         if (newLenHint < 0) {
2169             throw new OutOfMemoryError();
2170         }
2171         StringBuilder sb = new StringBuilder(newLenHint);
2172         int i = 0;
2173         do {
2174             sb.append(this, i, j).append(replStr);
2175             i = j + tgtLen;
2176         } while (j < thisLen && (j = indexOf(tgtStr, j + tgtLen1)) > 0);
2177         return sb.append(this, i, thisLen).toString();
2178     }
2179 
2180     /**
2181      * Splits this string around matches of the given
2182      * <a href="../util/regex/Pattern.html#sum">regular expression</a>.
2183      *
2184      * <p> The array returned by this method contains each substring of this
2185      * string that is terminated by another substring that matches the given
2186      * expression or is terminated by the end of the string.  The substrings in
2187      * the array are in the order in which they occur in this string.  If the
2188      * expression does not match any part of the input then the resulting array
2189      * has just one element, namely this string.
2190      *
2191      * <p> When there is a positive-width match at the beginning of this
2192      * string then an empty leading substring is included at the beginning
2193      * of the resulting array. A zero-width match at the beginning however
2194      * never produces such empty leading substring.
2195      *
2196      * <p> The {@code limit} parameter controls the number of times the
2197      * pattern is applied and therefore affects the length of the resulting


< prev index next >