< prev index next > test/jdk/java/foreign/TestStringEncoding.java
Print this page
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import org.testng.annotations.*;
import static org.testng.Assert.*;
/*
* @test
! * @enablePreview
- * @requires jdk.foreign.linker != "UNSUPPORTED"
* @run testng TestStringEncoding
*/
public class TestStringEncoding {
@Test(dataProvider = "strings")
! public void testStrings(String testString, int expectedByteLength) {
! try (Arena arena = Arena.ofConfined()) {
! MemorySegment text = arena.allocateUtf8String(testString);
! assertEquals(text.byteSize(), expectedByteLength);
! String roundTrip = text.getUtf8String(0);
! assertEquals(roundTrip, testString);
}
}
@DataProvider
public static Object[][] strings() {
! return new Object[][] {
! { "testing", 8 },
! { "", 1 },
! { "X", 2 },
! { "12345", 6 },
! { "yen \u00A5", 7 }, // in UTF-8 2 bytes: 0xC2 0xA5
! { "snowman \u26C4", 12 }, // in UTF-8 three bytes: 0xE2 0x9B 0x84
! { "rainbow \uD83C\uDF08", 13 } // in UTF-8 four bytes: 0xF0 0x9F 0x8C 0x88
};
}
}
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
+ import java.io.IOException;
+ import java.io.RandomAccessFile;
import java.lang.foreign.Arena;
+ import java.lang.foreign.FunctionDescriptor;
+ import java.lang.foreign.Linker;
+ import java.lang.foreign.MemoryLayout;
import java.lang.foreign.MemorySegment;
+ import java.lang.foreign.SegmentAllocator;
+ import java.lang.foreign.ValueLayout;
+ import java.lang.invoke.MethodHandle;
+ import java.lang.reflect.Field;
+ import java.nio.channels.FileChannel;
+ import java.nio.charset.Charset;
+ import java.nio.charset.StandardCharsets;
+ import java.nio.file.Files;
+ import java.nio.file.Path;
+ import java.nio.file.Paths;
+ import java.util.Arrays;
+ import java.util.List;
+ import java.util.Random;
+ import java.util.function.Consumer;
+ import java.util.function.UnaryOperator;
+ import jdk.internal.foreign.StringSupport;
import org.testng.annotations.*;
+
+ import static java.lang.foreign.ValueLayout.*;
import static org.testng.Assert.*;
/*
* @test
! * @modules java.base/jdk.internal.foreign
* @run testng TestStringEncoding
*/
public class TestStringEncoding {
@Test(dataProvider = "strings")
! public void testStrings(String testString) {
! for (Charset charset : Charset.availableCharsets().values()) {
! if (isStandard(charset)) {
+ for (Arena arena : arenas()) {
+ try (arena) {
+ MemorySegment text = arena.allocateFrom(testString, charset);
+
+ int terminatorSize = "\0".getBytes(charset).length;
+ if (charset == StandardCharsets.UTF_16) {
+ terminatorSize -= 2; // drop BOM
+ }
+ // Note that the JDK's UTF_32 encoder doesn't add a BOM.
+ // This is legal under the Unicode standard, and means the byte order is BE.
+ // See: https://unicode.org/faq/utf_bom.html#gen7
+
+ int expectedByteLength =
+ testString.getBytes(charset).length +
+ terminatorSize;
+
+ assertEquals(text.byteSize(), expectedByteLength);
+
+ String roundTrip = text.getString(0, charset);
+ if (charset.newEncoder().canEncode(testString)) {
+ assertEquals(roundTrip, testString);
+ }
+ }
+ }
+ } else {
+ assertThrows(UnsupportedOperationException.class, () -> Arena.global().allocateFrom(testString, charset));
+ }
+ }
+ }
+
+
+ @Test(dataProvider = "strings")
+ public void testStringsHeap(String testString) {
+ for (Charset charset : singleByteCharsets()) {
+ for (var arena : arenas()) {
+ try (arena) {
+ MemorySegment text = arena.allocateFrom(testString, charset);
+ text = toHeapSegment(text);
+
+ int expectedByteLength =
+ testString.getBytes(charset).length + 1;
+
+ assertEquals(text.byteSize(), expectedByteLength);
+
+ String roundTrip = text.getString(0, charset);
+ if (charset.newEncoder().canEncode(testString)) {
+ assertEquals(roundTrip, testString);
+ }
+ }
+ }
+ }
+ }
+
+ MemorySegment toHeapSegment(MemorySegment segment) {
+ var heapArray = segment.toArray(JAVA_BYTE);
+ return MemorySegment.ofArray(heapArray);
+ }
+
+ @Test(dataProvider = "strings")
+ public void unboundedSegment(String testString) {
+ testModifyingSegment(testString,
+ standardCharsets(),
+ s -> s.reinterpret(Long.MAX_VALUE),
+ UnaryOperator.identity());
+ }
+
+ @Test(dataProvider = "strings")
+ public void unalignedSegmentSingleByte(String testString) {
+ testModifyingSegment(testString,
+ singleByteCharsets(),
+ s -> s.byteSize() > 1 ? s.asSlice(1) : s,
+ s -> s.length() > 0 ? s.substring(1) : s);
+ }
+
+ @Test(dataProvider = "strings")
+ public void expandedSegment(String testString) {
+ try (var arena = Arena.ofConfined()) {
+ for (int i = 0; i < Long.BYTES; i++) {
+ int extra = i;
+ testModifyingSegment(testString,
+ // Single byte charsets
+ standardCharsets(),
+ s -> {
+ var s2 = arena.allocate(s.byteSize() + extra);
+ MemorySegment.copy(s, 0, s2, 0, s.byteSize());
+ return s2;
+ },
+ UnaryOperator.identity());
+ }
+ }
+ }
+
+ public void testModifyingSegment(String testString,
+ List<Charset> charsets,
+ UnaryOperator<MemorySegment> segmentMapper,
+ UnaryOperator<String> stringMapper) {
+ for (var charset : charsets) {
+ try (Arena arena = Arena.ofConfined()) {
+ MemorySegment text = arena.allocateFrom(testString, charset);
+ text = segmentMapper.apply(text);
+ String roundTrip = text.getString(0, charset);
+ String expected = stringMapper.apply(testString);
+ if (charset.newEncoder().canEncode(testString)) {
+ assertEquals(roundTrip, expected);
+ }
+ }
+ }
+ }
+
+ @Test()
+ public void testPeculiarContentSingleByte() {
+ Random random = new Random(42);
+ for (int len = 7; len < 71; len++) {
+ for (var arena : arenas()) {
+ try (arena) {
+ var segment = arena.allocate(len, 1);
+ var arr = new byte[len];
+ random.nextBytes(arr);
+ segment.copyFrom(MemorySegment.ofArray(arr));
+ int terminatorIndex = random.nextInt(len);
+ segment.set(ValueLayout.JAVA_BYTE, terminatorIndex, (byte) 0);
+ for (Charset charset : singleByteCharsets()) {
+ var s = segment.getString(0, charset);
+ var ref = referenceImpl(segment, 0, charset);
+ assertEquals(s, ref);
+ }
+ }
+ }
+ }
+ }
+
+ @Test(dataProvider = "strings")
+ public void testOffset(String testString) {
+ if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
+ return;
+ }
+ for (var charset : singleByteCharsets()) {
+ for (var arena: arenas()) {
+ try (arena) {
+ MemorySegment inSegment = arena.allocateFrom(testString, charset);
+ for (int i = 0; i < 3; i++) {
+ String actual = inSegment.getString(i, charset);
+ assertEquals(actual, testString.substring(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test()
+ public void testJumboSegment() {
+ testWithJumboSegment("testJumboSegment", segment -> {
+ segment.fill((byte) 1);
+ segment.set(JAVA_BYTE, Integer.MAX_VALUE + 10L, (byte) 0);
+ String big = segment.getString(100);
+ });
+ }
+
+ @Test()
+ public void testStringLargerThanMaxInt() {
+ testWithJumboSegment("testStringLargerThanMaxInt", segment -> {
+ segment.fill((byte) 1);
+ segment.set(JAVA_BYTE, Integer.MAX_VALUE + 10L, (byte) 0);
+ assertThrows(IllegalArgumentException.class, () -> {
+ segment.getString(0);
+ });
+ });
+ }
+
+ private static void testWithJumboSegment(String testName, Consumer<MemorySegment> tester) {
+ Path path = Paths.get("mapped_file");
+ try {
+ // Relly try to make sure the file is deleted after use
+ path.toFile().deleteOnExit();
+ deleteIfExistsOrThrow(path);
+ try (RandomAccessFile raf = new RandomAccessFile(path.toFile(), "rw")) {
+ FileChannel fc = raf.getChannel();
+ try (Arena arena = Arena.ofConfined()) {
+ var segment = fc.map(FileChannel.MapMode.READ_WRITE, 0L, (long) Integer.MAX_VALUE + 100, arena);
+ tester.accept(segment);
+ }
+ }
+ } catch (Exception e) {
+ throw new AssertionError(e);
+ } catch (OutOfMemoryError oome) {
+ // Unfortunately, we run out of memory and cannot run this test in this configuration
+ System.out.println("Skipping test because of insufficient memory: " + testName);
+ } finally {
+ deleteIfExistsOrThrow(path);
+ }
+ }
+
+ private static void deleteIfExistsOrThrow(Path file) {
+ try {
+ Files.deleteIfExists(file);
+ } catch (IOException ioe) {
+ throw new AssertionError("Unable to delete mapped file: " + file);
+ }
+ }
+
+ private static final MemoryLayout CHAR_POINTER = ADDRESS
+ .withTargetLayout(MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE));
+ private static final Linker LINKER = Linker.nativeLinker();
+ private static final MethodHandle STRCAT = LINKER.downcallHandle(
+ LINKER.defaultLookup().find("strcat").orElseThrow(),
+ FunctionDescriptor.of(CHAR_POINTER, CHAR_POINTER, CHAR_POINTER));
+
+ @Test(dataProvider = "strings")
+ public void nativeSegFromNativeCall(String testString) {
+ String addition = "123";
+ try (var arena = Arena.ofConfined()) {
+ try {
+ var testStringSegment = arena.allocateFrom(testString);
+ var additionSegment = arena.allocateFrom(addition);
+ var destination = arena.allocate(testStringSegment.byteSize() + additionSegment.byteSize() - 1);
+ destination.copyFrom(testStringSegment);
+
+ MemorySegment concatenation = (MemorySegment) STRCAT.invokeExact(destination, arena.allocateFrom(addition));
+ var actual = concatenation.getString(0);
+ assertEquals(actual, testString + addition);
+ } catch (Throwable t) {
+ throw new AssertionError(t);
+ }
+ }
+ }
+
+ @Test
+ public void segmentationFault() {
+ for (int i = 1; i < 18; i++) {
+ var size = 1 << i;
+ try (var arena = Arena.ofConfined()) {
+ var seg = arena.allocate(size, size);
+ seg.fill((byte) 1);
+ try {
+ var s = seg.getString(0);
+ System.out.println("s.length() = " + s.length());
+ } catch (IndexOutOfBoundsException e) {
+ // we will end up here if strlen finds a zero outside the MS
+ }
+ }
+ }
+ }
+
+ private static final int TEST_LENGTH_MAX = 277;
+
+ private Random deterministicRandom() {
+ return new Random(42);
+ }
! @Test
+ public void chunked_strlen_byte() {
+ Random random = deterministicRandom();
+ for (int skew = 0; skew < Long.BYTES; skew++) {
+ for (int len = 0; len < TEST_LENGTH_MAX; len++) {
+ try (var arena = Arena.ofConfined()) {
+ var segment = arena.allocate(len + 1 + skew)
+ .asSlice(skew);
+ for (int i = 0; i < len; i++) {
+ byte value;
+ while ((value = (byte) random.nextInt()) == 0) {
+ }
+ segment.setAtIndex(JAVA_BYTE, i, value);
+ }
+ segment.setAtIndex(JAVA_BYTE, len, (byte) 0);
+ for (int j = 0; j < len; j++) {
+ int actual = StringSupport.chunkedStrlenByte(segment, j);
+ assertEquals(actual, len - j);
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void chunked_strlen_short() {
+ Random random = deterministicRandom();
+ for (int skew = 0; skew < Long.BYTES; skew += Short.BYTES) {
+ for (int len = 0; len < TEST_LENGTH_MAX; len++) {
+ try (var arena = Arena.ofConfined()) {
+ var segment = arena.allocate((len + 1) * Short.BYTES + skew, JAVA_SHORT.byteAlignment())
+ .asSlice(skew);
+ for (int i = 0; i < len; i++) {
+ short value;
+ while ((value = (short) random.nextInt()) == 0) {
+ }
+ segment.setAtIndex(JAVA_SHORT, i, value);
+ }
+ segment.setAtIndex(JAVA_SHORT, len, (short) 0);
+ for (int j = 0; j < len; j++) {
+ int actual = StringSupport.chunkedStrlenShort(segment, j * Short.BYTES);
+ assertEquals(actual, (len - j) * Short.BYTES);
+ }
+ }
+ }
+ }
+ }
! @Test
! public void strlen_int() {
+ Random random = deterministicRandom();
+ for (int skew = 0; skew < Long.BYTES; skew += Integer.BYTES) {
+ for (int len = 0; len < TEST_LENGTH_MAX; len++) {
+ try (var arena = Arena.ofConfined()) {
+ var segment = arena.allocate((len + 1) * Integer.BYTES + skew, JAVA_INT.byteAlignment())
+ .asSlice(skew);
+ for (int i = 0; i < len; i++) {
+ int value;
+ while ((value = random.nextInt()) == 0) {
+ }
+ segment.setAtIndex(JAVA_INT, i, value);
+ }
+ segment.setAtIndex(JAVA_INT, len, 0);
+ for (int j = 0; j < len; j++) {
+ int actual = StringSupport.strlenInt(segment, j * Integer.BYTES);
+ assertEquals(actual, (len - j) * Integer.BYTES);
+ }
+ }
+ }
}
}
@DataProvider
public static Object[][] strings() {
! return new Object[][]{
! {"testing"},
! {""},
! {"X"},
! {"12345"},
! {"yen \u00A5"},
! {"snowman \u26C4"},
! {"rainbow \uD83C\uDF08"},
+ {"0"},
+ {"01"},
+ {"012"},
+ {"0123"},
+ {"01234"},
+ {"012345"},
+ {"0123456"},
+ {"01234567"},
+ {"012345678"},
+ {"0123456789"}
};
}
+
+ public static boolean containsOnlyRegularCharacters(String s) {
+ return s.chars()
+ .allMatch(c -> Character.isLetterOrDigit((char) c));
+ }
+
+ boolean isStandard(Charset charset) {
+ for (Field standardCharset : StandardCharsets.class.getDeclaredFields()) {
+ try {
+ if (standardCharset.get(null) == charset) {
+ return true;
+ }
+ } catch (ReflectiveOperationException e) {
+ throw new AssertionError(e);
+ }
+ }
+ return false;
+ }
+
+ List<Charset> standardCharsets() {
+ return Charset.availableCharsets().values().stream()
+ .filter(this::isStandard)
+ .toList();
+ }
+
+ List<Charset> singleByteCharsets() {
+ return Arrays.asList(StandardCharsets.UTF_8, StandardCharsets.ISO_8859_1, StandardCharsets.US_ASCII);
+ }
+
+ static String referenceImpl(MemorySegment segment, long offset, Charset charset) {
+ long len = strlen_byte(segment, offset);
+ byte[] bytes = new byte[(int) len];
+ MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int) len);
+ return new String(bytes, charset);
+ }
+
+ // Reference implementation
+ private static int strlen_byte(MemorySegment segment, long start) {
+ // iterate until overflow (String can only hold a byte[], whose length can be expressed as an int)
+ for (int offset = 0; offset >= 0; offset++) {
+ byte curr = segment.get(JAVA_BYTE, start + offset);
+ if (curr == 0) {
+ return offset;
+ }
+ }
+ throw new IllegalArgumentException("String too large");
+ }
+
+ private static List<Arena> arenas() {
+ return Arrays.asList(
+ Arena.ofConfined(), // Native memory
+ new HeapArena(byte.class), // Heap memory backed by a byte array
+ new HeapArena(short.class), // Heap memory backed by a short array
+ new HeapArena(int.class), // Heap memory backed by an int array
+ new HeapArena(long.class)); // Heap memory backed by a long array
+ }
+
+ private static final class HeapArena implements Arena {
+
+ private static final int ELEMENT_SIZE = 1_000;
+
+ private final MemorySegment backingSegment;
+ private final SegmentAllocator allocator;
+
+ public HeapArena(Class<?> type) {
+ backingSegment = switch (type) {
+ case Class<?> c when byte.class.equals(c) -> MemorySegment.ofArray(new byte[ELEMENT_SIZE]);
+ case Class<?> c when short.class.equals(c) ->
+ MemorySegment.ofArray(new short[ELEMENT_SIZE]);
+ case Class<?> c when int.class.equals(c) ->
+ MemorySegment.ofArray(new int[ELEMENT_SIZE]);
+ case Class<?> c when long.class.equals(c) ->
+ MemorySegment.ofArray(new long[ELEMENT_SIZE]);
+ default -> throw new IllegalArgumentException(type.toString());
+ };
+ allocator = SegmentAllocator.slicingAllocator(backingSegment);
+ }
+
+ @Override
+ public MemorySegment allocate(long byteSize, long byteAlignment) {
+ return allocator.allocate(byteSize, byteAlignment);
+ }
+
+ @Override
+ public MemorySegment.Scope scope() {
+ return backingSegment.scope();
+ }
+
+ @Override
+ public void close() {
+ // Do nothing
+ }
+
+ @Override
+ public String toString() {
+ return "HeapArena{" +
+ "type=" + backingSegment.heapBase().orElseThrow().getClass().getName() +
+ '}';
+ }
+ }
+
}
< prev index next >