< prev index next >

test/jdk/java/foreign/TestStringEncoding.java

Print this page

  5  *  This code is free software; you can redistribute it and/or modify it
  6  *  under the terms of the GNU General Public License version 2 only, as
  7  *  published by the Free Software Foundation.
  8  *
  9  *  This code is distributed in the hope that it will be useful, but WITHOUT
 10  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  *  version 2 for more details (a copy is included in the LICENSE file that
 13  *  accompanied this code).
 14  *
 15  *  You should have received a copy of the GNU General Public License version
 16  *  2 along with this work; if not, write to the Free Software Foundation,
 17  *  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  *   Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  *  or visit www.oracle.com if you need additional information or have any
 21  *  questions.
 22  *
 23  */
 24 


 25 import java.lang.foreign.Arena;



 26 import java.lang.foreign.MemorySegment;















 27 

 28 import org.testng.annotations.*;


 29 import static org.testng.Assert.*;
 30 
 31 /*
 32  * @test
 33  * @enablePreview
 34  * @requires jdk.foreign.linker != "UNSUPPORTED"
 35  * @run testng TestStringEncoding
 36  */
 37 
 38 public class TestStringEncoding {
 39 
 40     @Test(dataProvider = "strings")
 41     public void testStrings(String testString, int expectedByteLength) {
 42         try (Arena arena = Arena.ofConfined()) {
 43             MemorySegment text = arena.allocateUtf8String(testString);
























































































































































































































































 44 
 45             assertEquals(text.byteSize(), expectedByteLength);














































 46 
 47             String roundTrip = text.getUtf8String(0);
 48             assertEquals(roundTrip, testString);



















 49         }
 50     }
 51 
 52     @DataProvider
 53     public static Object[][] strings() {
 54         return new Object[][] {
 55             { "testing",               8 },
 56             { "",                      1 },
 57             { "X",                     2 },
 58             { "12345",                 6 },
 59             { "yen \u00A5",            7 }, // in UTF-8 2 bytes: 0xC2 0xA5
 60             { "snowman \u26C4",       12 }, // in UTF-8 three bytes: 0xE2 0x9B 0x84
 61             { "rainbow \uD83C\uDF08", 13 }  // in UTF-8 four bytes: 0xF0 0x9F 0x8C 0x88










 62         };
 63     }





































































































 64 }

  5  *  This code is free software; you can redistribute it and/or modify it
  6  *  under the terms of the GNU General Public License version 2 only, as
  7  *  published by the Free Software Foundation.
  8  *
  9  *  This code is distributed in the hope that it will be useful, but WITHOUT
 10  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  *  version 2 for more details (a copy is included in the LICENSE file that
 13  *  accompanied this code).
 14  *
 15  *  You should have received a copy of the GNU General Public License version
 16  *  2 along with this work; if not, write to the Free Software Foundation,
 17  *  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  *   Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  *  or visit www.oracle.com if you need additional information or have any
 21  *  questions.
 22  *
 23  */
 24 
 25 import java.io.IOException;
 26 import java.io.RandomAccessFile;
 27 import java.lang.foreign.Arena;
 28 import java.lang.foreign.FunctionDescriptor;
 29 import java.lang.foreign.Linker;
 30 import java.lang.foreign.MemoryLayout;
 31 import java.lang.foreign.MemorySegment;
 32 import java.lang.foreign.SegmentAllocator;
 33 import java.lang.foreign.ValueLayout;
 34 import java.lang.invoke.MethodHandle;
 35 import java.lang.reflect.Field;
 36 import java.nio.channels.FileChannel;
 37 import java.nio.charset.Charset;
 38 import java.nio.charset.StandardCharsets;
 39 import java.nio.file.Files;
 40 import java.nio.file.Path;
 41 import java.nio.file.Paths;
 42 import java.util.Arrays;
 43 import java.util.List;
 44 import java.util.Random;
 45 import java.util.function.Consumer;
 46 import java.util.function.UnaryOperator;
 47 
 48 import jdk.internal.foreign.StringSupport;
 49 import org.testng.annotations.*;
 50 
 51 import static java.lang.foreign.ValueLayout.*;
 52 import static org.testng.Assert.*;
 53 
 54 /*
 55  * @test
 56  * @modules java.base/jdk.internal.foreign

 57  * @run testng TestStringEncoding
 58  */
 59 
 60 public class TestStringEncoding {
 61 
 62     @Test(dataProvider = "strings")
 63     public void testStrings(String testString) {
 64         for (Charset charset : Charset.availableCharsets().values()) {
 65             if (isStandard(charset)) {
 66                 for (Arena arena : arenas()) {
 67                     try (arena) {
 68                         MemorySegment text = arena.allocateFrom(testString, charset);
 69 
 70                         int terminatorSize = "\0".getBytes(charset).length;
 71                         if (charset == StandardCharsets.UTF_16) {
 72                             terminatorSize -= 2; // drop BOM
 73                         }
 74                         // Note that the JDK's UTF_32 encoder doesn't add a BOM.
 75                         // This is legal under the Unicode standard, and means the byte order is BE.
 76                         // See: https://unicode.org/faq/utf_bom.html#gen7
 77 
 78                         int expectedByteLength =
 79                                 testString.getBytes(charset).length +
 80                                         terminatorSize;
 81 
 82                         assertEquals(text.byteSize(), expectedByteLength);
 83 
 84                         String roundTrip = text.getString(0, charset);
 85                         if (charset.newEncoder().canEncode(testString)) {
 86                             assertEquals(roundTrip, testString);
 87                         }
 88                     }
 89                 }
 90             } else {
 91                 assertThrows(UnsupportedOperationException.class, () -> Arena.global().allocateFrom(testString, charset));
 92             }
 93         }
 94     }
 95 
 96 
 97     @Test(dataProvider = "strings")
 98     public void testStringsHeap(String testString) {
 99         for (Charset charset : singleByteCharsets()) {
100             for (var arena : arenas()) {
101                 try (arena) {
102                     MemorySegment text = arena.allocateFrom(testString, charset);
103                     text = toHeapSegment(text);
104 
105                     int expectedByteLength =
106                             testString.getBytes(charset).length + 1;
107 
108                     assertEquals(text.byteSize(), expectedByteLength);
109 
110                     String roundTrip = text.getString(0, charset);
111                     if (charset.newEncoder().canEncode(testString)) {
112                         assertEquals(roundTrip, testString);
113                     }
114                 }
115             }
116         }
117     }
118 
119     MemorySegment toHeapSegment(MemorySegment segment) {
120         var heapArray = segment.toArray(JAVA_BYTE);
121         return MemorySegment.ofArray(heapArray);
122     }
123 
124     @Test(dataProvider = "strings")
125     public void unboundedSegment(String testString) {
126         testModifyingSegment(testString,
127                 standardCharsets(),
128                 s -> s.reinterpret(Long.MAX_VALUE),
129                 UnaryOperator.identity());
130     }
131 
132     @Test(dataProvider = "strings")
133     public void unalignedSegmentSingleByte(String testString) {
134         testModifyingSegment(testString,
135                 singleByteCharsets(),
136                 s -> s.byteSize() > 1 ? s.asSlice(1) : s,
137                 s -> s.length() > 0 ? s.substring(1) : s);
138     }
139 
140     @Test(dataProvider = "strings")
141     public void expandedSegment(String testString) {
142         try (var arena = Arena.ofConfined()) {
143             for (int i = 0; i < Long.BYTES; i++) {
144                 int extra = i;
145                 testModifyingSegment(testString,
146                         // Single byte charsets
147                         standardCharsets(),
148                         s -> {
149                             var s2 = arena.allocate(s.byteSize() + extra);
150                             MemorySegment.copy(s, 0, s2, 0, s.byteSize());
151                             return s2;
152                         },
153                         UnaryOperator.identity());
154             }
155         }
156     }
157 
158     public void testModifyingSegment(String testString,
159                                      List<Charset> charsets,
160                                      UnaryOperator<MemorySegment> segmentMapper,
161                                      UnaryOperator<String> stringMapper) {
162         for (var charset : charsets) {
163             try (Arena arena = Arena.ofConfined()) {
164                 MemorySegment text = arena.allocateFrom(testString, charset);
165                 text = segmentMapper.apply(text);
166                 String roundTrip = text.getString(0, charset);
167                 String expected = stringMapper.apply(testString);
168                 if (charset.newEncoder().canEncode(testString)) {
169                     assertEquals(roundTrip, expected);
170                 }
171             }
172         }
173     }
174 
175     @Test()
176     public void testPeculiarContentSingleByte() {
177         Random random = new Random(42);
178         for (int len = 7; len < 71; len++) {
179             for (var arena : arenas()) {
180                 try (arena) {
181                     var segment = arena.allocate(len, 1);
182                     var arr = new byte[len];
183                     random.nextBytes(arr);
184                     segment.copyFrom(MemorySegment.ofArray(arr));
185                     int terminatorIndex = random.nextInt(len);
186                     segment.set(ValueLayout.JAVA_BYTE, terminatorIndex, (byte) 0);
187                     for (Charset charset : singleByteCharsets()) {
188                         var s = segment.getString(0, charset);
189                         var ref = referenceImpl(segment, 0, charset);
190                         assertEquals(s, ref);
191                     }
192                 }
193             }
194         }
195     }
196 
197     @Test(dataProvider = "strings")
198     public void testOffset(String testString) {
199         if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
200             return;
201         }
202         for (var charset : singleByteCharsets()) {
203             for (var arena: arenas()) {
204                 try (arena) {
205                     MemorySegment inSegment = arena.allocateFrom(testString, charset);
206                     for (int i = 0; i < 3; i++) {
207                         String actual = inSegment.getString(i, charset);
208                         assertEquals(actual, testString.substring(i));
209                     }
210                 }
211             }
212         }
213     }
214 
215     @Test()
216     public void testJumboSegment() {
217         testWithJumboSegment("testJumboSegment", segment -> {
218             segment.fill((byte) 1);
219             segment.set(JAVA_BYTE, Integer.MAX_VALUE + 10L, (byte) 0);
220             String big = segment.getString(100);
221         });
222     }
223 
224     @Test()
225     public void testStringLargerThanMaxInt() {
226         testWithJumboSegment("testStringLargerThanMaxInt", segment -> {
227             segment.fill((byte) 1);
228             segment.set(JAVA_BYTE, Integer.MAX_VALUE + 10L, (byte) 0);
229             assertThrows(IllegalArgumentException.class, () -> {
230                 segment.getString(0);
231             });
232         });
233     }
234 
235     private static void testWithJumboSegment(String testName, Consumer<MemorySegment> tester) {
236         Path path = Paths.get("mapped_file");
237         try {
238             // Relly try to make sure the file is deleted after use
239             path.toFile().deleteOnExit();
240             deleteIfExistsOrThrow(path);
241             try (RandomAccessFile raf = new RandomAccessFile(path.toFile(), "rw")) {
242                 FileChannel fc = raf.getChannel();
243                 try (Arena arena = Arena.ofConfined()) {
244                     var segment = fc.map(FileChannel.MapMode.READ_WRITE, 0L, (long) Integer.MAX_VALUE + 100, arena);
245                     tester.accept(segment);
246                 }
247             }
248         } catch (Exception e) {
249             throw new AssertionError(e);
250         } catch (OutOfMemoryError oome) {
251             // Unfortunately, we run out of memory and cannot run this test in this configuration
252             System.out.println("Skipping test because of insufficient memory: " + testName);
253         } finally {
254             deleteIfExistsOrThrow(path);
255         }
256     }
257 
258     private static void deleteIfExistsOrThrow(Path file) {
259         try {
260             Files.deleteIfExists(file);
261         } catch (IOException ioe) {
262             throw new AssertionError("Unable to delete mapped file: " + file);
263         }
264     }
265 
266     private static final MemoryLayout CHAR_POINTER = ADDRESS
267             .withTargetLayout(MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE));
268     private static final Linker LINKER = Linker.nativeLinker();
269     private static final MethodHandle STRCAT = LINKER.downcallHandle(
270             LINKER.defaultLookup().find("strcat").orElseThrow(),
271             FunctionDescriptor.of(CHAR_POINTER, CHAR_POINTER, CHAR_POINTER));
272 
273     @Test(dataProvider = "strings")
274     public void nativeSegFromNativeCall(String testString) {
275         String addition = "123";
276         try (var arena = Arena.ofConfined()) {
277             try {
278                 var testStringSegment = arena.allocateFrom(testString);
279                 var additionSegment = arena.allocateFrom(addition);
280                 var destination = arena.allocate(testStringSegment.byteSize() + additionSegment.byteSize() - 1);
281                 destination.copyFrom(testStringSegment);
282 
283                 MemorySegment concatenation = (MemorySegment) STRCAT.invokeExact(destination, arena.allocateFrom(addition));
284                 var actual = concatenation.getString(0);
285                 assertEquals(actual, testString + addition);
286             } catch (Throwable t) {
287                 throw new AssertionError(t);
288             }
289         }
290     }
291 
292     @Test
293     public void segmentationFault() {
294         for (int i = 1; i < 18; i++) {
295             var size = 1 << i;
296             try (var arena = Arena.ofConfined()) {
297                 var seg = arena.allocate(size, size);
298                 seg.fill((byte) 1);
299                 try {
300                     var s = seg.getString(0);
301                     System.out.println("s.length() = " + s.length());
302                 } catch (IndexOutOfBoundsException e) {
303                     // we will end up here if strlen finds a zero outside the MS
304                 }
305             }
306         }
307     }
308 
309     private static final int TEST_LENGTH_MAX = 277;
310 
311     private Random deterministicRandom() {
312         return new Random(42);
313     }
314 
315     @Test
316     public void chunked_strlen_byte() {
317         Random random = deterministicRandom();
318         for (int skew = 0; skew < Long.BYTES; skew++) {
319             for (int len = 0; len < TEST_LENGTH_MAX; len++) {
320                 try (var arena = Arena.ofConfined()) {
321                     var segment = arena.allocate(len + 1 + skew)
322                             .asSlice(skew);
323                     for (int i = 0; i < len; i++) {
324                         byte value;
325                         while ((value = (byte) random.nextInt()) == 0) {
326                         }
327                         segment.setAtIndex(JAVA_BYTE, i, value);
328                     }
329                     segment.setAtIndex(JAVA_BYTE, len, (byte) 0);
330                     for (int j = 0; j < len; j++) {
331                         int actual = StringSupport.chunkedStrlenByte(segment, j);
332                         assertEquals(actual, len - j);
333                     }
334                 }
335             }
336         }
337     }
338 
339     @Test
340     public void chunked_strlen_short() {
341         Random random = deterministicRandom();
342         for (int skew = 0; skew < Long.BYTES; skew += Short.BYTES) {
343             for (int len = 0; len < TEST_LENGTH_MAX; len++) {
344                 try (var arena = Arena.ofConfined()) {
345                     var segment = arena.allocate((len + 1) * Short.BYTES + skew, JAVA_SHORT.byteAlignment())
346                             .asSlice(skew);
347                     for (int i = 0; i < len; i++) {
348                         short value;
349                         while ((value = (short) random.nextInt()) == 0) {
350                         }
351                         segment.setAtIndex(JAVA_SHORT, i, value);
352                     }
353                     segment.setAtIndex(JAVA_SHORT, len, (short) 0);
354                     for (int j = 0; j < len; j++) {
355                         int actual = StringSupport.chunkedStrlenShort(segment, j * Short.BYTES);
356                         assertEquals(actual, (len - j) * Short.BYTES);
357                     }
358                 }
359             }
360         }
361     }
362 
363     @Test
364     public void strlen_int() {
365         Random random = deterministicRandom();
366         for (int skew = 0; skew < Long.BYTES; skew += Integer.BYTES) {
367             for (int len = 0; len < TEST_LENGTH_MAX; len++) {
368                 try (var arena = Arena.ofConfined()) {
369                     var segment = arena.allocate((len + 1) * Integer.BYTES + skew, JAVA_INT.byteAlignment())
370                             .asSlice(skew);
371                     for (int i = 0; i < len; i++) {
372                         int value;
373                         while ((value = random.nextInt()) == 0) {
374                         }
375                         segment.setAtIndex(JAVA_INT, i, value);
376                     }
377                     segment.setAtIndex(JAVA_INT, len, 0);
378                     for (int j = 0; j < len; j++) {
379                         int actual = StringSupport.strlenInt(segment, j * Integer.BYTES);
380                         assertEquals(actual, (len - j) * Integer.BYTES);
381                     }
382                 }
383             }
384         }
385     }
386 
387     @DataProvider
388     public static Object[][] strings() {
389         return new Object[][]{
390                 {"testing"},
391                 {""},
392                 {"X"},
393                 {"12345"},
394                 {"yen \u00A5"},
395                 {"snowman \u26C4"},
396                 {"rainbow \uD83C\uDF08"},
397                 {"0"},
398                 {"01"},
399                 {"012"},
400                 {"0123"},
401                 {"01234"},
402                 {"012345"},
403                 {"0123456"},
404                 {"01234567"},
405                 {"012345678"},
406                 {"0123456789"}
407         };
408     }
409 
410     public static boolean containsOnlyRegularCharacters(String s) {
411         return s.chars()
412                 .allMatch(c -> Character.isLetterOrDigit((char) c));
413     }
414 
415     boolean isStandard(Charset charset) {
416         for (Field standardCharset : StandardCharsets.class.getDeclaredFields()) {
417             try {
418                 if (standardCharset.get(null) == charset) {
419                     return true;
420                 }
421             } catch (ReflectiveOperationException e) {
422                 throw new AssertionError(e);
423             }
424         }
425         return false;
426     }
427 
428     List<Charset> standardCharsets() {
429         return Charset.availableCharsets().values().stream()
430                 .filter(this::isStandard)
431                 .toList();
432     }
433 
434     List<Charset> singleByteCharsets() {
435         return Arrays.asList(StandardCharsets.UTF_8, StandardCharsets.ISO_8859_1, StandardCharsets.US_ASCII);
436     }
437 
438     static String referenceImpl(MemorySegment segment, long offset, Charset charset) {
439         long len = strlen_byte(segment, offset);
440         byte[] bytes = new byte[(int) len];
441         MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int) len);
442         return new String(bytes, charset);
443     }
444 
445     // Reference implementation
446     private static int strlen_byte(MemorySegment segment, long start) {
447         // iterate until overflow (String can only hold a byte[], whose length can be expressed as an int)
448         for (int offset = 0; offset >= 0; offset++) {
449             byte curr = segment.get(JAVA_BYTE, start + offset);
450             if (curr == 0) {
451                 return offset;
452             }
453         }
454         throw new IllegalArgumentException("String too large");
455     }
456 
457     private static List<Arena> arenas() {
458         return Arrays.asList(
459                 Arena.ofConfined(),          // Native memory
460                 new HeapArena(byte.class),   // Heap memory backed by a byte array
461                 new HeapArena(short.class),  // Heap memory backed by a short array
462                 new HeapArena(int.class),    // Heap memory backed by an int array
463                 new HeapArena(long.class));  // Heap memory backed by a long array
464     }
465 
466     private static final class HeapArena implements Arena {
467 
468         private static final int ELEMENT_SIZE = 1_000;
469 
470         private final MemorySegment backingSegment;
471         private final SegmentAllocator allocator;
472 
473         public HeapArena(Class<?> type) {
474             backingSegment = switch (type) {
475                 case Class<?> c when byte.class.equals(c) -> MemorySegment.ofArray(new byte[ELEMENT_SIZE]);
476                 case Class<?> c when short.class.equals(c) ->
477                         MemorySegment.ofArray(new short[ELEMENT_SIZE]);
478                 case Class<?> c when int.class.equals(c) ->
479                         MemorySegment.ofArray(new int[ELEMENT_SIZE]);
480                 case Class<?> c when long.class.equals(c) ->
481                         MemorySegment.ofArray(new long[ELEMENT_SIZE]);
482                 default -> throw new IllegalArgumentException(type.toString());
483             };
484             allocator = SegmentAllocator.slicingAllocator(backingSegment);
485         }
486 
487         @Override
488         public MemorySegment allocate(long byteSize, long byteAlignment) {
489             return allocator.allocate(byteSize, byteAlignment);
490         }
491 
492         @Override
493         public MemorySegment.Scope scope() {
494             return backingSegment.scope();
495         }
496 
497         @Override
498         public void close() {
499             // Do nothing
500         }
501 
502         @Override
503         public String toString() {
504             return "HeapArena{" +
505                     "type=" + backingSegment.heapBase().orElseThrow().getClass().getName() +
506                     '}';
507         }
508     }
509 
510 }
< prev index next >