5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 import java.lang.foreign.Arena;
26 import java.lang.foreign.MemorySegment;
27
28 import org.testng.annotations.*;
29 import static org.testng.Assert.*;
30
31 /*
32 * @test
33 * @enablePreview
34 * @requires jdk.foreign.linker != "UNSUPPORTED"
35 * @run testng TestStringEncoding
36 */
37
38 public class TestStringEncoding {
39
40 @Test(dataProvider = "strings")
41 public void testStrings(String testString, int expectedByteLength) {
42 try (Arena arena = Arena.ofConfined()) {
43 MemorySegment text = arena.allocateUtf8String(testString);
44
45 assertEquals(text.byteSize(), expectedByteLength);
46
47 String roundTrip = text.getUtf8String(0);
48 assertEquals(roundTrip, testString);
49 }
50 }
51
52 @DataProvider
53 public static Object[][] strings() {
54 return new Object[][] {
55 { "testing", 8 },
56 { "", 1 },
57 { "X", 2 },
58 { "12345", 6 },
59 { "yen \u00A5", 7 }, // in UTF-8 2 bytes: 0xC2 0xA5
60 { "snowman \u26C4", 12 }, // in UTF-8 three bytes: 0xE2 0x9B 0x84
61 { "rainbow \uD83C\uDF08", 13 } // in UTF-8 four bytes: 0xF0 0x9F 0x8C 0x88
62 };
63 }
64 }
|
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 import java.io.IOException;
26 import java.io.RandomAccessFile;
27 import java.lang.foreign.Arena;
28 import java.lang.foreign.FunctionDescriptor;
29 import java.lang.foreign.Linker;
30 import java.lang.foreign.MemoryLayout;
31 import java.lang.foreign.MemorySegment;
32 import java.lang.foreign.SegmentAllocator;
33 import java.lang.foreign.ValueLayout;
34 import java.lang.invoke.MethodHandle;
35 import java.lang.reflect.Field;
36 import java.nio.channels.FileChannel;
37 import java.nio.charset.Charset;
38 import java.nio.charset.StandardCharsets;
39 import java.nio.file.Files;
40 import java.nio.file.Path;
41 import java.nio.file.Paths;
42 import java.util.Arrays;
43 import java.util.List;
44 import java.util.Random;
45 import java.util.function.Consumer;
46 import java.util.function.UnaryOperator;
47
48 import jdk.internal.foreign.StringSupport;
49 import org.testng.annotations.*;
50
51 import static java.lang.foreign.ValueLayout.*;
52 import static org.testng.Assert.*;
53
54 /*
55 * @test
56 * @modules java.base/jdk.internal.foreign
57 * @run testng TestStringEncoding
58 */
59
60 public class TestStringEncoding {
61
62 @Test(dataProvider = "strings")
63 public void testStrings(String testString) {
64 for (Charset charset : Charset.availableCharsets().values()) {
65 if (isStandard(charset)) {
66 for (Arena arena : arenas()) {
67 try (arena) {
68 MemorySegment text = arena.allocateFrom(testString, charset);
69
70 int terminatorSize = "\0".getBytes(charset).length;
71 if (charset == StandardCharsets.UTF_16) {
72 terminatorSize -= 2; // drop BOM
73 }
74 // Note that the JDK's UTF_32 encoder doesn't add a BOM.
75 // This is legal under the Unicode standard, and means the byte order is BE.
76 // See: https://unicode.org/faq/utf_bom.html#gen7
77
78 int expectedByteLength =
79 testString.getBytes(charset).length +
80 terminatorSize;
81
82 assertEquals(text.byteSize(), expectedByteLength);
83
84 String roundTrip = text.getString(0, charset);
85 if (charset.newEncoder().canEncode(testString)) {
86 assertEquals(roundTrip, testString);
87 }
88 }
89 }
90 } else {
91 assertThrows(UnsupportedOperationException.class, () -> Arena.global().allocateFrom(testString, charset));
92 }
93 }
94 }
95
96
97 @Test(dataProvider = "strings")
98 public void testStringsHeap(String testString) {
99 for (Charset charset : singleByteCharsets()) {
100 for (var arena : arenas()) {
101 try (arena) {
102 MemorySegment text = arena.allocateFrom(testString, charset);
103 text = toHeapSegment(text);
104
105 int expectedByteLength =
106 testString.getBytes(charset).length + 1;
107
108 assertEquals(text.byteSize(), expectedByteLength);
109
110 String roundTrip = text.getString(0, charset);
111 if (charset.newEncoder().canEncode(testString)) {
112 assertEquals(roundTrip, testString);
113 }
114 }
115 }
116 }
117 }
118
119 MemorySegment toHeapSegment(MemorySegment segment) {
120 var heapArray = segment.toArray(JAVA_BYTE);
121 return MemorySegment.ofArray(heapArray);
122 }
123
124 @Test(dataProvider = "strings")
125 public void unboundedSegment(String testString) {
126 testModifyingSegment(testString,
127 standardCharsets(),
128 s -> s.reinterpret(Long.MAX_VALUE),
129 UnaryOperator.identity());
130 }
131
132 @Test(dataProvider = "strings")
133 public void unalignedSegmentSingleByte(String testString) {
134 testModifyingSegment(testString,
135 singleByteCharsets(),
136 s -> s.byteSize() > 1 ? s.asSlice(1) : s,
137 s -> s.length() > 0 ? s.substring(1) : s);
138 }
139
140 @Test(dataProvider = "strings")
141 public void expandedSegment(String testString) {
142 try (var arena = Arena.ofConfined()) {
143 for (int i = 0; i < Long.BYTES; i++) {
144 int extra = i;
145 testModifyingSegment(testString,
146 // Single byte charsets
147 standardCharsets(),
148 s -> {
149 var s2 = arena.allocate(s.byteSize() + extra);
150 MemorySegment.copy(s, 0, s2, 0, s.byteSize());
151 return s2;
152 },
153 UnaryOperator.identity());
154 }
155 }
156 }
157
158 public void testModifyingSegment(String testString,
159 List<Charset> charsets,
160 UnaryOperator<MemorySegment> segmentMapper,
161 UnaryOperator<String> stringMapper) {
162 for (var charset : charsets) {
163 try (Arena arena = Arena.ofConfined()) {
164 MemorySegment text = arena.allocateFrom(testString, charset);
165 text = segmentMapper.apply(text);
166 String roundTrip = text.getString(0, charset);
167 String expected = stringMapper.apply(testString);
168 if (charset.newEncoder().canEncode(testString)) {
169 assertEquals(roundTrip, expected);
170 }
171 }
172 }
173 }
174
175 @Test()
176 public void testPeculiarContentSingleByte() {
177 Random random = new Random(42);
178 for (int len = 7; len < 71; len++) {
179 for (var arena : arenas()) {
180 try (arena) {
181 var segment = arena.allocate(len, 1);
182 var arr = new byte[len];
183 random.nextBytes(arr);
184 segment.copyFrom(MemorySegment.ofArray(arr));
185 int terminatorIndex = random.nextInt(len);
186 segment.set(ValueLayout.JAVA_BYTE, terminatorIndex, (byte) 0);
187 for (Charset charset : singleByteCharsets()) {
188 var s = segment.getString(0, charset);
189 var ref = referenceImpl(segment, 0, charset);
190 assertEquals(s, ref);
191 }
192 }
193 }
194 }
195 }
196
197 @Test(dataProvider = "strings")
198 public void testOffset(String testString) {
199 if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
200 return;
201 }
202 for (var charset : singleByteCharsets()) {
203 for (var arena: arenas()) {
204 try (arena) {
205 MemorySegment inSegment = arena.allocateFrom(testString, charset);
206 for (int i = 0; i < 3; i++) {
207 String actual = inSegment.getString(i, charset);
208 assertEquals(actual, testString.substring(i));
209 }
210 }
211 }
212 }
213 }
214
215 @Test()
216 public void testJumboSegment() {
217 testWithJumboSegment("testJumboSegment", segment -> {
218 segment.fill((byte) 1);
219 segment.set(JAVA_BYTE, Integer.MAX_VALUE + 10L, (byte) 0);
220 String big = segment.getString(100);
221 });
222 }
223
224 @Test()
225 public void testStringLargerThanMaxInt() {
226 testWithJumboSegment("testStringLargerThanMaxInt", segment -> {
227 segment.fill((byte) 1);
228 segment.set(JAVA_BYTE, Integer.MAX_VALUE + 10L, (byte) 0);
229 assertThrows(IllegalArgumentException.class, () -> {
230 segment.getString(0);
231 });
232 });
233 }
234
235 private static void testWithJumboSegment(String testName, Consumer<MemorySegment> tester) {
236 Path path = Paths.get("mapped_file");
237 try {
238 // Relly try to make sure the file is deleted after use
239 path.toFile().deleteOnExit();
240 deleteIfExistsOrThrow(path);
241 try (RandomAccessFile raf = new RandomAccessFile(path.toFile(), "rw")) {
242 FileChannel fc = raf.getChannel();
243 try (Arena arena = Arena.ofConfined()) {
244 var segment = fc.map(FileChannel.MapMode.READ_WRITE, 0L, (long) Integer.MAX_VALUE + 100, arena);
245 tester.accept(segment);
246 }
247 }
248 } catch (Exception e) {
249 throw new AssertionError(e);
250 } catch (OutOfMemoryError oome) {
251 // Unfortunately, we run out of memory and cannot run this test in this configuration
252 System.out.println("Skipping test because of insufficient memory: " + testName);
253 } finally {
254 deleteIfExistsOrThrow(path);
255 }
256 }
257
258 private static void deleteIfExistsOrThrow(Path file) {
259 try {
260 Files.deleteIfExists(file);
261 } catch (IOException ioe) {
262 throw new AssertionError("Unable to delete mapped file: " + file);
263 }
264 }
265
266 private static final MemoryLayout CHAR_POINTER = ADDRESS
267 .withTargetLayout(MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE));
268 private static final Linker LINKER = Linker.nativeLinker();
269 private static final MethodHandle STRCAT = LINKER.downcallHandle(
270 LINKER.defaultLookup().find("strcat").orElseThrow(),
271 FunctionDescriptor.of(CHAR_POINTER, CHAR_POINTER, CHAR_POINTER));
272
273 @Test(dataProvider = "strings")
274 public void nativeSegFromNativeCall(String testString) {
275 String addition = "123";
276 try (var arena = Arena.ofConfined()) {
277 try {
278 var testStringSegment = arena.allocateFrom(testString);
279 var additionSegment = arena.allocateFrom(addition);
280 var destination = arena.allocate(testStringSegment.byteSize() + additionSegment.byteSize() - 1);
281 destination.copyFrom(testStringSegment);
282
283 MemorySegment concatenation = (MemorySegment) STRCAT.invokeExact(destination, arena.allocateFrom(addition));
284 var actual = concatenation.getString(0);
285 assertEquals(actual, testString + addition);
286 } catch (Throwable t) {
287 throw new AssertionError(t);
288 }
289 }
290 }
291
292 @Test
293 public void segmentationFault() {
294 for (int i = 1; i < 18; i++) {
295 var size = 1 << i;
296 try (var arena = Arena.ofConfined()) {
297 var seg = arena.allocate(size, size);
298 seg.fill((byte) 1);
299 try {
300 var s = seg.getString(0);
301 System.out.println("s.length() = " + s.length());
302 } catch (IndexOutOfBoundsException e) {
303 // we will end up here if strlen finds a zero outside the MS
304 }
305 }
306 }
307 }
308
309 private static final int TEST_LENGTH_MAX = 277;
310
311 private Random deterministicRandom() {
312 return new Random(42);
313 }
314
315 @Test
316 public void chunked_strlen_byte() {
317 Random random = deterministicRandom();
318 for (int skew = 0; skew < Long.BYTES; skew++) {
319 for (int len = 0; len < TEST_LENGTH_MAX; len++) {
320 try (var arena = Arena.ofConfined()) {
321 var segment = arena.allocate(len + 1 + skew)
322 .asSlice(skew);
323 for (int i = 0; i < len; i++) {
324 byte value;
325 while ((value = (byte) random.nextInt()) == 0) {
326 }
327 segment.setAtIndex(JAVA_BYTE, i, value);
328 }
329 segment.setAtIndex(JAVA_BYTE, len, (byte) 0);
330 for (int j = 0; j < len; j++) {
331 int actual = StringSupport.chunkedStrlenByte(segment, j);
332 assertEquals(actual, len - j);
333 }
334 }
335 }
336 }
337 }
338
339 @Test
340 public void chunked_strlen_short() {
341 Random random = deterministicRandom();
342 for (int skew = 0; skew < Long.BYTES; skew += Short.BYTES) {
343 for (int len = 0; len < TEST_LENGTH_MAX; len++) {
344 try (var arena = Arena.ofConfined()) {
345 var segment = arena.allocate((len + 1) * Short.BYTES + skew, JAVA_SHORT.byteAlignment())
346 .asSlice(skew);
347 for (int i = 0; i < len; i++) {
348 short value;
349 while ((value = (short) random.nextInt()) == 0) {
350 }
351 segment.setAtIndex(JAVA_SHORT, i, value);
352 }
353 segment.setAtIndex(JAVA_SHORT, len, (short) 0);
354 for (int j = 0; j < len; j++) {
355 int actual = StringSupport.chunkedStrlenShort(segment, j * Short.BYTES);
356 assertEquals(actual, (len - j) * Short.BYTES);
357 }
358 }
359 }
360 }
361 }
362
363 @Test
364 public void strlen_int() {
365 Random random = deterministicRandom();
366 for (int skew = 0; skew < Long.BYTES; skew += Integer.BYTES) {
367 for (int len = 0; len < TEST_LENGTH_MAX; len++) {
368 try (var arena = Arena.ofConfined()) {
369 var segment = arena.allocate((len + 1) * Integer.BYTES + skew, JAVA_INT.byteAlignment())
370 .asSlice(skew);
371 for (int i = 0; i < len; i++) {
372 int value;
373 while ((value = random.nextInt()) == 0) {
374 }
375 segment.setAtIndex(JAVA_INT, i, value);
376 }
377 segment.setAtIndex(JAVA_INT, len, 0);
378 for (int j = 0; j < len; j++) {
379 int actual = StringSupport.strlenInt(segment, j * Integer.BYTES);
380 assertEquals(actual, (len - j) * Integer.BYTES);
381 }
382 }
383 }
384 }
385 }
386
387 @DataProvider
388 public static Object[][] strings() {
389 return new Object[][]{
390 {"testing"},
391 {""},
392 {"X"},
393 {"12345"},
394 {"yen \u00A5"},
395 {"snowman \u26C4"},
396 {"rainbow \uD83C\uDF08"},
397 {"0"},
398 {"01"},
399 {"012"},
400 {"0123"},
401 {"01234"},
402 {"012345"},
403 {"0123456"},
404 {"01234567"},
405 {"012345678"},
406 {"0123456789"}
407 };
408 }
409
410 public static boolean containsOnlyRegularCharacters(String s) {
411 return s.chars()
412 .allMatch(c -> Character.isLetterOrDigit((char) c));
413 }
414
415 boolean isStandard(Charset charset) {
416 for (Field standardCharset : StandardCharsets.class.getDeclaredFields()) {
417 try {
418 if (standardCharset.get(null) == charset) {
419 return true;
420 }
421 } catch (ReflectiveOperationException e) {
422 throw new AssertionError(e);
423 }
424 }
425 return false;
426 }
427
428 List<Charset> standardCharsets() {
429 return Charset.availableCharsets().values().stream()
430 .filter(this::isStandard)
431 .toList();
432 }
433
434 List<Charset> singleByteCharsets() {
435 return Arrays.asList(StandardCharsets.UTF_8, StandardCharsets.ISO_8859_1, StandardCharsets.US_ASCII);
436 }
437
438 static String referenceImpl(MemorySegment segment, long offset, Charset charset) {
439 long len = strlen_byte(segment, offset);
440 byte[] bytes = new byte[(int) len];
441 MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int) len);
442 return new String(bytes, charset);
443 }
444
445 // Reference implementation
446 private static int strlen_byte(MemorySegment segment, long start) {
447 // iterate until overflow (String can only hold a byte[], whose length can be expressed as an int)
448 for (int offset = 0; offset >= 0; offset++) {
449 byte curr = segment.get(JAVA_BYTE, start + offset);
450 if (curr == 0) {
451 return offset;
452 }
453 }
454 throw new IllegalArgumentException("String too large");
455 }
456
457 private static List<Arena> arenas() {
458 return Arrays.asList(
459 Arena.ofConfined(), // Native memory
460 new HeapArena(byte.class), // Heap memory backed by a byte array
461 new HeapArena(short.class), // Heap memory backed by a short array
462 new HeapArena(int.class), // Heap memory backed by an int array
463 new HeapArena(long.class)); // Heap memory backed by a long array
464 }
465
466 private static final class HeapArena implements Arena {
467
468 private static final int ELEMENT_SIZE = 1_000;
469
470 private final MemorySegment backingSegment;
471 private final SegmentAllocator allocator;
472
473 public HeapArena(Class<?> type) {
474 backingSegment = switch (type) {
475 case Class<?> c when byte.class.equals(c) -> MemorySegment.ofArray(new byte[ELEMENT_SIZE]);
476 case Class<?> c when short.class.equals(c) ->
477 MemorySegment.ofArray(new short[ELEMENT_SIZE]);
478 case Class<?> c when int.class.equals(c) ->
479 MemorySegment.ofArray(new int[ELEMENT_SIZE]);
480 case Class<?> c when long.class.equals(c) ->
481 MemorySegment.ofArray(new long[ELEMENT_SIZE]);
482 default -> throw new IllegalArgumentException(type.toString());
483 };
484 allocator = SegmentAllocator.slicingAllocator(backingSegment);
485 }
486
487 @Override
488 public MemorySegment allocate(long byteSize, long byteAlignment) {
489 return allocator.allocate(byteSize, byteAlignment);
490 }
491
492 @Override
493 public MemorySegment.Scope scope() {
494 return backingSegment.scope();
495 }
496
497 @Override
498 public void close() {
499 // Do nothing
500 }
501
502 @Override
503 public String toString() {
504 return "HeapArena{" +
505 "type=" + backingSegment.heapBase().orElseThrow().getClass().getName() +
506 '}';
507 }
508 }
509
510 }
|