1 /* 2 * Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 import java.io.IOException; 26 import java.io.RandomAccessFile; 27 import java.lang.foreign.Arena; 28 import java.lang.foreign.FunctionDescriptor; 29 import java.lang.foreign.Linker; 30 import java.lang.foreign.MemoryLayout; 31 import java.lang.foreign.MemorySegment; 32 import java.lang.foreign.SegmentAllocator; 33 import java.lang.foreign.ValueLayout; 34 import java.lang.invoke.MethodHandle; 35 import java.lang.reflect.Field; 36 import java.nio.channels.FileChannel; 37 import java.nio.charset.Charset; 38 import java.nio.charset.StandardCharsets; 39 import java.nio.file.Files; 40 import java.nio.file.Path; 41 import java.nio.file.Paths; 42 import java.util.Arrays; 43 import java.util.List; 44 import java.util.Random; 45 import java.util.function.Consumer; 46 import java.util.function.UnaryOperator; 47 48 import jdk.internal.foreign.StringSupport; 49 import org.testng.annotations.*; 50 51 import static java.lang.foreign.ValueLayout.*; 52 import static org.testng.Assert.*; 53 54 /* 55 * @test 56 * @modules java.base/jdk.internal.foreign 57 * @run testng TestStringEncoding 58 */ 59 60 public class TestStringEncoding { 61 62 @Test(dataProvider = "strings") 63 public void testStrings(String testString) { 64 for (Charset charset : Charset.availableCharsets().values()) { 65 if (isStandard(charset)) { 66 for (Arena arena : arenas()) { 67 try (arena) { 68 MemorySegment text = arena.allocateFrom(testString, charset); 69 70 int terminatorSize = "\0".getBytes(charset).length; 71 if (charset == StandardCharsets.UTF_16) { 72 terminatorSize -= 2; // drop BOM 73 } 74 // Note that the JDK's UTF_32 encoder doesn't add a BOM. 75 // This is legal under the Unicode standard, and means the byte order is BE. 76 // See: https://unicode.org/faq/utf_bom.html#gen7 77 78 int expectedByteLength = 79 testString.getBytes(charset).length + 80 terminatorSize; 81 82 assertEquals(text.byteSize(), expectedByteLength); 83 84 String roundTrip = text.getString(0, charset); 85 if (charset.newEncoder().canEncode(testString)) { 86 assertEquals(roundTrip, testString); 87 } 88 } 89 } 90 } else { 91 assertThrows(UnsupportedOperationException.class, () -> Arena.global().allocateFrom(testString, charset)); 92 } 93 } 94 } 95 96 97 @Test(dataProvider = "strings") 98 public void testStringsHeap(String testString) { 99 for (Charset charset : singleByteCharsets()) { 100 for (var arena : arenas()) { 101 try (arena) { 102 MemorySegment text = arena.allocateFrom(testString, charset); 103 text = toHeapSegment(text); 104 105 int expectedByteLength = 106 testString.getBytes(charset).length + 1; 107 108 assertEquals(text.byteSize(), expectedByteLength); 109 110 String roundTrip = text.getString(0, charset); 111 if (charset.newEncoder().canEncode(testString)) { 112 assertEquals(roundTrip, testString); 113 } 114 } 115 } 116 } 117 } 118 119 MemorySegment toHeapSegment(MemorySegment segment) { 120 var heapArray = segment.toArray(JAVA_BYTE); 121 return MemorySegment.ofArray(heapArray); 122 } 123 124 @Test(dataProvider = "strings") 125 public void unboundedSegment(String testString) { 126 testModifyingSegment(testString, 127 standardCharsets(), 128 s -> s.reinterpret(Long.MAX_VALUE), 129 UnaryOperator.identity()); 130 } 131 132 @Test(dataProvider = "strings") 133 public void unalignedSegmentSingleByte(String testString) { 134 testModifyingSegment(testString, 135 singleByteCharsets(), 136 s -> s.byteSize() > 1 ? s.asSlice(1) : s, 137 s -> s.length() > 0 ? s.substring(1) : s); 138 } 139 140 @Test(dataProvider = "strings") 141 public void expandedSegment(String testString) { 142 try (var arena = Arena.ofConfined()) { 143 for (int i = 0; i < Long.BYTES; i++) { 144 int extra = i; 145 testModifyingSegment(testString, 146 // Single byte charsets 147 standardCharsets(), 148 s -> { 149 var s2 = arena.allocate(s.byteSize() + extra); 150 MemorySegment.copy(s, 0, s2, 0, s.byteSize()); 151 return s2; 152 }, 153 UnaryOperator.identity()); 154 } 155 } 156 } 157 158 public void testModifyingSegment(String testString, 159 List<Charset> charsets, 160 UnaryOperator<MemorySegment> segmentMapper, 161 UnaryOperator<String> stringMapper) { 162 for (var charset : charsets) { 163 try (Arena arena = Arena.ofConfined()) { 164 MemorySegment text = arena.allocateFrom(testString, charset); 165 text = segmentMapper.apply(text); 166 String roundTrip = text.getString(0, charset); 167 String expected = stringMapper.apply(testString); 168 if (charset.newEncoder().canEncode(testString)) { 169 assertEquals(roundTrip, expected); 170 } 171 } 172 } 173 } 174 175 @Test() 176 public void testPeculiarContentSingleByte() { 177 Random random = new Random(42); 178 for (int len = 7; len < 71; len++) { 179 for (var arena : arenas()) { 180 try (arena) { 181 var segment = arena.allocate(len, 1); 182 var arr = new byte[len]; 183 random.nextBytes(arr); 184 segment.copyFrom(MemorySegment.ofArray(arr)); 185 int terminatorIndex = random.nextInt(len); 186 segment.set(ValueLayout.JAVA_BYTE, terminatorIndex, (byte) 0); 187 for (Charset charset : singleByteCharsets()) { 188 var s = segment.getString(0, charset); 189 var ref = referenceImpl(segment, 0, charset); 190 assertEquals(s, ref); 191 } 192 } 193 } 194 } 195 } 196 197 @Test(dataProvider = "strings") 198 public void testOffset(String testString) { 199 if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) { 200 return; 201 } 202 for (var charset : singleByteCharsets()) { 203 for (var arena: arenas()) { 204 try (arena) { 205 MemorySegment inSegment = arena.allocateFrom(testString, charset); 206 for (int i = 0; i < 3; i++) { 207 String actual = inSegment.getString(i, charset); 208 assertEquals(actual, testString.substring(i)); 209 } 210 } 211 } 212 } 213 } 214 215 @Test() 216 public void testJumboSegment() { 217 testWithJumboSegment("testJumboSegment", segment -> { 218 segment.fill((byte) 1); 219 segment.set(JAVA_BYTE, Integer.MAX_VALUE + 10L, (byte) 0); 220 String big = segment.getString(100); 221 }); 222 } 223 224 @Test() 225 public void testStringLargerThanMaxInt() { 226 testWithJumboSegment("testStringLargerThanMaxInt", segment -> { 227 segment.fill((byte) 1); 228 segment.set(JAVA_BYTE, Integer.MAX_VALUE + 10L, (byte) 0); 229 assertThrows(IllegalArgumentException.class, () -> { 230 segment.getString(0); 231 }); 232 }); 233 } 234 235 private static void testWithJumboSegment(String testName, Consumer<MemorySegment> tester) { 236 Path path = Paths.get("mapped_file"); 237 try { 238 // Relly try to make sure the file is deleted after use 239 path.toFile().deleteOnExit(); 240 deleteIfExistsOrThrow(path); 241 try (RandomAccessFile raf = new RandomAccessFile(path.toFile(), "rw")) { 242 FileChannel fc = raf.getChannel(); 243 try (Arena arena = Arena.ofConfined()) { 244 var segment = fc.map(FileChannel.MapMode.READ_WRITE, 0L, (long) Integer.MAX_VALUE + 100, arena); 245 tester.accept(segment); 246 } 247 } 248 } catch (Exception e) { 249 throw new AssertionError(e); 250 } catch (OutOfMemoryError oome) { 251 // Unfortunately, we run out of memory and cannot run this test in this configuration 252 System.out.println("Skipping test because of insufficient memory: " + testName); 253 } finally { 254 deleteIfExistsOrThrow(path); 255 } 256 } 257 258 private static void deleteIfExistsOrThrow(Path file) { 259 try { 260 Files.deleteIfExists(file); 261 } catch (IOException ioe) { 262 throw new AssertionError("Unable to delete mapped file: " + file); 263 } 264 } 265 266 private static final MemoryLayout CHAR_POINTER = ADDRESS 267 .withTargetLayout(MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE)); 268 private static final Linker LINKER = Linker.nativeLinker(); 269 private static final MethodHandle STRCAT = LINKER.downcallHandle( 270 LINKER.defaultLookup().find("strcat").orElseThrow(), 271 FunctionDescriptor.of(CHAR_POINTER, CHAR_POINTER, CHAR_POINTER)); 272 273 @Test(dataProvider = "strings") 274 public void nativeSegFromNativeCall(String testString) { 275 String addition = "123"; 276 try (var arena = Arena.ofConfined()) { 277 try { 278 var testStringSegment = arena.allocateFrom(testString); 279 var additionSegment = arena.allocateFrom(addition); 280 var destination = arena.allocate(testStringSegment.byteSize() + additionSegment.byteSize() - 1); 281 destination.copyFrom(testStringSegment); 282 283 MemorySegment concatenation = (MemorySegment) STRCAT.invokeExact(destination, arena.allocateFrom(addition)); 284 var actual = concatenation.getString(0); 285 assertEquals(actual, testString + addition); 286 } catch (Throwable t) { 287 throw new AssertionError(t); 288 } 289 } 290 } 291 292 @Test 293 public void segmentationFault() { 294 for (int i = 1; i < 18; i++) { 295 var size = 1 << i; 296 try (var arena = Arena.ofConfined()) { 297 var seg = arena.allocate(size, size); 298 seg.fill((byte) 1); 299 try { 300 var s = seg.getString(0); 301 System.out.println("s.length() = " + s.length()); 302 } catch (IndexOutOfBoundsException e) { 303 // we will end up here if strlen finds a zero outside the MS 304 } 305 } 306 } 307 } 308 309 private static final int TEST_LENGTH_MAX = 277; 310 311 private Random deterministicRandom() { 312 return new Random(42); 313 } 314 315 @Test 316 public void chunked_strlen_byte() { 317 Random random = deterministicRandom(); 318 for (int skew = 0; skew < Long.BYTES; skew++) { 319 for (int len = 0; len < TEST_LENGTH_MAX; len++) { 320 try (var arena = Arena.ofConfined()) { 321 var segment = arena.allocate(len + 1 + skew) 322 .asSlice(skew); 323 for (int i = 0; i < len; i++) { 324 byte value; 325 while ((value = (byte) random.nextInt()) == 0) { 326 } 327 segment.setAtIndex(JAVA_BYTE, i, value); 328 } 329 segment.setAtIndex(JAVA_BYTE, len, (byte) 0); 330 for (int j = 0; j < len; j++) { 331 int actual = StringSupport.chunkedStrlenByte(segment, j); 332 assertEquals(actual, len - j); 333 } 334 } 335 } 336 } 337 } 338 339 @Test 340 public void chunked_strlen_short() { 341 Random random = deterministicRandom(); 342 for (int skew = 0; skew < Long.BYTES; skew += Short.BYTES) { 343 for (int len = 0; len < TEST_LENGTH_MAX; len++) { 344 try (var arena = Arena.ofConfined()) { 345 var segment = arena.allocate((len + 1) * Short.BYTES + skew, JAVA_SHORT.byteAlignment()) 346 .asSlice(skew); 347 for (int i = 0; i < len; i++) { 348 short value; 349 while ((value = (short) random.nextInt()) == 0) { 350 } 351 segment.setAtIndex(JAVA_SHORT, i, value); 352 } 353 segment.setAtIndex(JAVA_SHORT, len, (short) 0); 354 for (int j = 0; j < len; j++) { 355 int actual = StringSupport.chunkedStrlenShort(segment, j * Short.BYTES); 356 assertEquals(actual, (len - j) * Short.BYTES); 357 } 358 } 359 } 360 } 361 } 362 363 @Test 364 public void strlen_int() { 365 Random random = deterministicRandom(); 366 for (int skew = 0; skew < Long.BYTES; skew += Integer.BYTES) { 367 for (int len = 0; len < TEST_LENGTH_MAX; len++) { 368 try (var arena = Arena.ofConfined()) { 369 var segment = arena.allocate((len + 1) * Integer.BYTES + skew, JAVA_INT.byteAlignment()) 370 .asSlice(skew); 371 for (int i = 0; i < len; i++) { 372 int value; 373 while ((value = random.nextInt()) == 0) { 374 } 375 segment.setAtIndex(JAVA_INT, i, value); 376 } 377 segment.setAtIndex(JAVA_INT, len, 0); 378 for (int j = 0; j < len; j++) { 379 int actual = StringSupport.strlenInt(segment, j * Integer.BYTES); 380 assertEquals(actual, (len - j) * Integer.BYTES); 381 } 382 } 383 } 384 } 385 } 386 387 @DataProvider 388 public static Object[][] strings() { 389 return new Object[][]{ 390 {"testing"}, 391 {""}, 392 {"X"}, 393 {"12345"}, 394 {"yen \u00A5"}, 395 {"snowman \u26C4"}, 396 {"rainbow \uD83C\uDF08"}, 397 {"0"}, 398 {"01"}, 399 {"012"}, 400 {"0123"}, 401 {"01234"}, 402 {"012345"}, 403 {"0123456"}, 404 {"01234567"}, 405 {"012345678"}, 406 {"0123456789"} 407 }; 408 } 409 410 public static boolean containsOnlyRegularCharacters(String s) { 411 return s.chars() 412 .allMatch(c -> Character.isLetterOrDigit((char) c)); 413 } 414 415 boolean isStandard(Charset charset) { 416 for (Field standardCharset : StandardCharsets.class.getDeclaredFields()) { 417 try { 418 if (standardCharset.get(null) == charset) { 419 return true; 420 } 421 } catch (ReflectiveOperationException e) { 422 throw new AssertionError(e); 423 } 424 } 425 return false; 426 } 427 428 List<Charset> standardCharsets() { 429 return Charset.availableCharsets().values().stream() 430 .filter(this::isStandard) 431 .toList(); 432 } 433 434 List<Charset> singleByteCharsets() { 435 return Arrays.asList(StandardCharsets.UTF_8, StandardCharsets.ISO_8859_1, StandardCharsets.US_ASCII); 436 } 437 438 static String referenceImpl(MemorySegment segment, long offset, Charset charset) { 439 long len = strlen_byte(segment, offset); 440 byte[] bytes = new byte[(int) len]; 441 MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int) len); 442 return new String(bytes, charset); 443 } 444 445 // Reference implementation 446 private static int strlen_byte(MemorySegment segment, long start) { 447 // iterate until overflow (String can only hold a byte[], whose length can be expressed as an int) 448 for (int offset = 0; offset >= 0; offset++) { 449 byte curr = segment.get(JAVA_BYTE, start + offset); 450 if (curr == 0) { 451 return offset; 452 } 453 } 454 throw new IllegalArgumentException("String too large"); 455 } 456 457 private static List<Arena> arenas() { 458 return Arrays.asList( 459 Arena.ofConfined(), // Native memory 460 new HeapArena(byte.class), // Heap memory backed by a byte array 461 new HeapArena(short.class), // Heap memory backed by a short array 462 new HeapArena(int.class), // Heap memory backed by an int array 463 new HeapArena(long.class)); // Heap memory backed by a long array 464 } 465 466 private static final class HeapArena implements Arena { 467 468 private static final int ELEMENT_SIZE = 1_000; 469 470 private final MemorySegment backingSegment; 471 private final SegmentAllocator allocator; 472 473 public HeapArena(Class<?> type) { 474 backingSegment = switch (type) { 475 case Class<?> c when byte.class.equals(c) -> MemorySegment.ofArray(new byte[ELEMENT_SIZE]); 476 case Class<?> c when short.class.equals(c) -> 477 MemorySegment.ofArray(new short[ELEMENT_SIZE]); 478 case Class<?> c when int.class.equals(c) -> 479 MemorySegment.ofArray(new int[ELEMENT_SIZE]); 480 case Class<?> c when long.class.equals(c) -> 481 MemorySegment.ofArray(new long[ELEMENT_SIZE]); 482 default -> throw new IllegalArgumentException(type.toString()); 483 }; 484 allocator = SegmentAllocator.slicingAllocator(backingSegment); 485 } 486 487 @Override 488 public MemorySegment allocate(long byteSize, long byteAlignment) { 489 return allocator.allocate(byteSize, byteAlignment); 490 } 491 492 @Override 493 public MemorySegment.Scope scope() { 494 return backingSegment.scope(); 495 } 496 497 @Override 498 public void close() { 499 // Do nothing 500 } 501 502 @Override 503 public String toString() { 504 return "HeapArena{" + 505 "type=" + backingSegment.heapBase().orElseThrow().getClass().getName() + 506 '}'; 507 } 508 } 509 510 }