< prev index next >

src/java.base/share/classes/java/lang/String.java

Print this page

 557                         int b2 = bytes[offset];
 558                         if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
 559                             dst[dp++] = (byte)decode2(b1, b2);
 560                             offset++;
 561                             continue;
 562                         }
 563                     }
 564                     // anything not a latin1, including the REPL
 565                     // we have to go with the utf16
 566                     offset--;
 567                     break;
 568                 }
 569                 if (offset == sl) {
 570                     if (dp != dst.length) {
 571                         dst = Arrays.copyOf(dst, dp);
 572                     }
 573                     this.value = dst;
 574                     this.coder = LATIN1;
 575                     return;
 576                 }
 577                 byte[] buf = new byte[length << 1];
 578                 StringLatin1.inflate(dst, 0, buf, 0, dp);
 579                 dst = buf;
 580                 dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, true);
 581                 if (dp != length) {
 582                     dst = Arrays.copyOf(dst, dp << 1);
 583                 }
 584                 this.value = dst;
 585                 this.coder = UTF16;
 586             } else { // !COMPACT_STRINGS
 587                 byte[] dst = new byte[length << 1];
 588                 int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true);
 589                 if (dp != length) {
 590                     dst = Arrays.copyOf(dst, dp << 1);
 591                 }
 592                 this.value = dst;
 593                 this.coder = UTF16;
 594             }
 595         } else if (charset == ISO_8859_1.INSTANCE) {
 596             if (COMPACT_STRINGS) {
 597                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 598                 this.coder = LATIN1;
 599             } else {
 600                 this.value = StringLatin1.inflate(bytes, offset, length);
 601                 this.coder = UTF16;
 602             }
 603         } else if (charset == US_ASCII.INSTANCE) {
 604             if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
 605                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 606                 this.coder = LATIN1;
 607             } else {
 608                 byte[] dst = new byte[length << 1];
 609                 int dp = 0;
 610                 while (dp < length) {
 611                     int b = bytes[offset++];
 612                     StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
 613                 }
 614                 this.value = dst;
 615                 this.coder = UTF16;
 616             }
 617         } else {
 618             // (1)We never cache the "external" cs, the only benefit of creating
 619             // an additional StringDe/Encoder object to wrap it is to share the
 620             // de/encode() method. These SD/E objects are short-lived, the young-gen
 621             // gc should be able to take care of them well. But the best approach
 622             // is still not to generate them if not really necessary.
 623             // (2)The defensive copy of the input byte/char[] has a big performance
 624             // impact, as well as the outgoing result byte/char[]. Need to do the
 625             // optimization check of (sm==null && classLoader0==null) for both.
 626             CharsetDecoder cd = charset.newDecoder();
 627             // ArrayDecoder fastpaths
 628             if (cd instanceof ArrayDecoder ad) {

 733                 if ((b1 & 0xfe) == 0xc2 && offset < sl) { // b1 either 0xc2 or 0xc3
 734                     int b2 = bytes[offset];
 735                     if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
 736                         dst[dp++] = (byte)decode2(b1, b2);
 737                         offset++;
 738                         continue;
 739                     }
 740                 }
 741                 // anything not a latin1, including the REPL
 742                 // we have to go with the utf16
 743                 offset--;
 744                 break;
 745             }
 746             if (offset == sl) {
 747                 if (dp != dst.length) {
 748                     dst = Arrays.copyOf(dst, dp);
 749                 }
 750                 return new String(dst, LATIN1);
 751             }
 752             if (dp == 0) {
 753                 dst = new byte[length << 1];
 754             } else {
 755                 byte[] buf = new byte[length << 1];
 756                 StringLatin1.inflate(dst, 0, buf, 0, dp);
 757                 dst = buf;
 758             }
 759             dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
 760         } else { // !COMPACT_STRINGS
 761             dst = new byte[length << 1];
 762             dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, false);
 763         }
 764         if (dp != length) {
 765             dst = Arrays.copyOf(dst, dp << 1);
 766         }
 767         return new String(dst, UTF16);
 768     }
 769 
 770     static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {
 771         try {
 772             return newStringNoRepl1(src, cs);
 773         } catch (IllegalArgumentException e) {
 774             //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause
 775             Throwable cause = e.getCause();
 776             if (cause instanceof MalformedInputException mie) {
 777                 throw mie;
 778             }
 779             throw (CharacterCodingException)cause;
 780         }
 781     }

1287     private static void throwUnmappable(int off) {
1288         String msg = "malformed input off : " + off + ", length : 1";
1289         throw new IllegalArgumentException(msg, new UnmappableCharacterException(1));
1290     }
1291 
1292     private static void throwUnmappable(byte[] val) {
1293         int dp = StringCoding.countPositives(val, 0, val.length);
1294         throwUnmappable(dp);
1295     }
1296 
1297     private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
1298         if (coder == UTF16) {
1299             return encodeUTF8_UTF16(val, doReplace);
1300         }
1301 
1302         if (!StringCoding.hasNegatives(val, 0, val.length)) {
1303             return val.clone();
1304         }
1305 
1306         int dp = 0;
1307         byte[] dst = new byte[val.length << 1];
1308         for (byte c : val) {
1309             if (c < 0) {
1310                 dst[dp++] = (byte) (0xc0 | ((c & 0xff) >> 6));
1311                 dst[dp++] = (byte) (0x80 | (c & 0x3f));
1312             } else {
1313                 dst[dp++] = c;
1314             }
1315         }
1316         if (dp == dst.length) {
1317             return dst;
1318         }
1319         return Arrays.copyOf(dst, dp);
1320     }
1321 
1322     private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
1323         int dp = 0;
1324         int sp = 0;
1325         int sl = val.length >> 1;
1326         byte[] dst = new byte[sl * 3];
1327         while (sp < sl) {

 557                         int b2 = bytes[offset];
 558                         if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
 559                             dst[dp++] = (byte)decode2(b1, b2);
 560                             offset++;
 561                             continue;
 562                         }
 563                     }
 564                     // anything not a latin1, including the REPL
 565                     // we have to go with the utf16
 566                     offset--;
 567                     break;
 568                 }
 569                 if (offset == sl) {
 570                     if (dp != dst.length) {
 571                         dst = Arrays.copyOf(dst, dp);
 572                     }
 573                     this.value = dst;
 574                     this.coder = LATIN1;
 575                     return;
 576                 }
 577                 byte[] buf = StringUTF16.newBytesFor(length);
 578                 StringLatin1.inflate(dst, 0, buf, 0, dp);
 579                 dst = buf;
 580                 dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, true);
 581                 if (dp != length) {
 582                     dst = Arrays.copyOf(dst, dp << 1);
 583                 }
 584                 this.value = dst;
 585                 this.coder = UTF16;
 586             } else { // !COMPACT_STRINGS
 587                 byte[] dst = StringUTF16.newBytesFor(length);
 588                 int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true);
 589                 if (dp != length) {
 590                     dst = Arrays.copyOf(dst, dp << 1);
 591                 }
 592                 this.value = dst;
 593                 this.coder = UTF16;
 594             }
 595         } else if (charset == ISO_8859_1.INSTANCE) {
 596             if (COMPACT_STRINGS) {
 597                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 598                 this.coder = LATIN1;
 599             } else {
 600                 this.value = StringLatin1.inflate(bytes, offset, length);
 601                 this.coder = UTF16;
 602             }
 603         } else if (charset == US_ASCII.INSTANCE) {
 604             if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
 605                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
 606                 this.coder = LATIN1;
 607             } else {
 608                 byte[] dst = StringUTF16.newBytesFor(length);
 609                 int dp = 0;
 610                 while (dp < length) {
 611                     int b = bytes[offset++];
 612                     StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
 613                 }
 614                 this.value = dst;
 615                 this.coder = UTF16;
 616             }
 617         } else {
 618             // (1)We never cache the "external" cs, the only benefit of creating
 619             // an additional StringDe/Encoder object to wrap it is to share the
 620             // de/encode() method. These SD/E objects are short-lived, the young-gen
 621             // gc should be able to take care of them well. But the best approach
 622             // is still not to generate them if not really necessary.
 623             // (2)The defensive copy of the input byte/char[] has a big performance
 624             // impact, as well as the outgoing result byte/char[]. Need to do the
 625             // optimization check of (sm==null && classLoader0==null) for both.
 626             CharsetDecoder cd = charset.newDecoder();
 627             // ArrayDecoder fastpaths
 628             if (cd instanceof ArrayDecoder ad) {

 733                 if ((b1 & 0xfe) == 0xc2 && offset < sl) { // b1 either 0xc2 or 0xc3
 734                     int b2 = bytes[offset];
 735                     if (b2 < -64) { // continuation bytes are always negative values in the range -128 to -65
 736                         dst[dp++] = (byte)decode2(b1, b2);
 737                         offset++;
 738                         continue;
 739                     }
 740                 }
 741                 // anything not a latin1, including the REPL
 742                 // we have to go with the utf16
 743                 offset--;
 744                 break;
 745             }
 746             if (offset == sl) {
 747                 if (dp != dst.length) {
 748                     dst = Arrays.copyOf(dst, dp);
 749                 }
 750                 return new String(dst, LATIN1);
 751             }
 752             if (dp == 0) {
 753                 dst = StringUTF16.newBytesFor(length);
 754             } else {
 755                 byte[] buf = StringUTF16.newBytesFor(length);
 756                 StringLatin1.inflate(dst, 0, buf, 0, dp);
 757                 dst = buf;
 758             }
 759             dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
 760         } else { // !COMPACT_STRINGS
 761             dst = StringUTF16.newBytesFor(length);
 762             dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, false);
 763         }
 764         if (dp != length) {
 765             dst = Arrays.copyOf(dst, dp << 1);
 766         }
 767         return new String(dst, UTF16);
 768     }
 769 
 770     static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {
 771         try {
 772             return newStringNoRepl1(src, cs);
 773         } catch (IllegalArgumentException e) {
 774             //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause
 775             Throwable cause = e.getCause();
 776             if (cause instanceof MalformedInputException mie) {
 777                 throw mie;
 778             }
 779             throw (CharacterCodingException)cause;
 780         }
 781     }

1287     private static void throwUnmappable(int off) {
1288         String msg = "malformed input off : " + off + ", length : 1";
1289         throw new IllegalArgumentException(msg, new UnmappableCharacterException(1));
1290     }
1291 
1292     private static void throwUnmappable(byte[] val) {
1293         int dp = StringCoding.countPositives(val, 0, val.length);
1294         throwUnmappable(dp);
1295     }
1296 
1297     private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
1298         if (coder == UTF16) {
1299             return encodeUTF8_UTF16(val, doReplace);
1300         }
1301 
1302         if (!StringCoding.hasNegatives(val, 0, val.length)) {
1303             return val.clone();
1304         }
1305 
1306         int dp = 0;
1307         byte[] dst = StringUTF16.newBytesFor(val.length);
1308         for (byte c : val) {
1309             if (c < 0) {
1310                 dst[dp++] = (byte) (0xc0 | ((c & 0xff) >> 6));
1311                 dst[dp++] = (byte) (0x80 | (c & 0x3f));
1312             } else {
1313                 dst[dp++] = c;
1314             }
1315         }
1316         if (dp == dst.length) {
1317             return dst;
1318         }
1319         return Arrays.copyOf(dst, dp);
1320     }
1321 
1322     private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
1323         int dp = 0;
1324         int sp = 0;
1325         int sl = val.length >> 1;
1326         byte[] dst = new byte[sl * 3];
1327         while (sp < sl) {
< prev index next >