1 /* 2 * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.internal.foreign; 27 28 import jdk.internal.access.JavaLangAccess; 29 import jdk.internal.access.SharedSecrets; 30 import jdk.internal.foreign.abi.SharedUtils; 31 import jdk.internal.util.ArraysSupport; 32 33 import java.lang.foreign.MemorySegment; 34 import java.nio.charset.Charset; 35 36 import static java.lang.foreign.ValueLayout.*; 37 38 /** 39 * Miscellaneous functions to read and write strings, in various charsets. 40 */ 41 public final class StringSupport { 42 43 static final JavaLangAccess JAVA_LANG_ACCESS = SharedSecrets.getJavaLangAccess(); 44 45 private StringSupport() {} 46 47 public static String read(MemorySegment segment, long offset, Charset charset) { 48 return switch (CharsetKind.of(charset)) { 49 case SINGLE_BYTE -> readByte(segment, offset, charset); 50 case DOUBLE_BYTE -> readShort(segment, offset, charset); 51 case QUAD_BYTE -> readInt(segment, offset, charset); 52 }; 53 } 54 55 public static void write(MemorySegment segment, long offset, Charset charset, String string) { 56 switch (CharsetKind.of(charset)) { 57 case SINGLE_BYTE -> writeByte(segment, offset, charset, string); 58 case DOUBLE_BYTE -> writeShort(segment, offset, charset, string); 59 case QUAD_BYTE -> writeInt(segment, offset, charset, string); 60 } 61 } 62 63 private static String readByte(MemorySegment segment, long offset, Charset charset) { 64 long len = chunkedStrlenByte(segment, offset); 65 byte[] bytes = new byte[(int)len]; 66 MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int)len); 67 return new String(bytes, charset); 68 } 69 70 private static void writeByte(MemorySegment segment, long offset, Charset charset, String string) { 71 int bytes = copyBytes(string, segment, charset, offset); 72 segment.set(JAVA_BYTE, offset + bytes, (byte)0); 73 } 74 75 private static String readShort(MemorySegment segment, long offset, Charset charset) { 76 long len = chunkedStrlenShort(segment, offset); 77 byte[] bytes = new byte[(int)len]; 78 MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int)len); 79 return new String(bytes, charset); 80 } 81 82 private static void writeShort(MemorySegment segment, long offset, Charset charset, String string) { 83 int bytes = copyBytes(string, segment, charset, offset); 84 segment.set(JAVA_SHORT, offset + bytes, (short)0); 85 } 86 87 private static String readInt(MemorySegment segment, long offset, Charset charset) { 88 long len = strlenInt(segment, offset); 89 byte[] bytes = new byte[(int)len]; 90 MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int)len); 91 return new String(bytes, charset); 92 } 93 94 private static void writeInt(MemorySegment segment, long offset, Charset charset, String string) { 95 int bytes = copyBytes(string, segment, charset, offset); 96 segment.set(JAVA_INT, offset + bytes, 0); 97 } 98 99 /** 100 * {@return the shortest distance beginning at the provided {@code start} 101 * to the encountering of a zero byte in the provided {@code segment}} 102 * <p> 103 * The method divides the region of interest into three distinct regions: 104 * <ul> 105 * <li>head (access made on a byte-by-byte basis) (if any)</li> 106 * <li>body (access made with eight bytes at a time at physically 64-bit-aligned memory) (if any)</li> 107 * <li>tail (access made on a byte-by-byte basis) (if any)</li> 108 * </ul> 109 * <p> 110 * The body is using a heuristic method to determine if a long word 111 * contains a zero byte. The method might have false positives but 112 * never false negatives. 113 * <p> 114 * This method is inspired by the `glibc/string/strlen.c` implementation 115 * 116 * @param segment to examine 117 * @param start from where examination shall begin 118 * @throws IllegalArgumentException if the examined region contains no zero bytes 119 * within a length that can be accepted by a String 120 */ 121 public static int chunkedStrlenByte(MemorySegment segment, long start) { 122 123 // Handle the first unaligned "head" bytes separately 124 int headCount = (int)SharedUtils.remainsToAlignment(segment.address() + start, Long.BYTES); 125 126 int offset = 0; 127 for (; offset < headCount; offset++) { 128 byte curr = segment.get(JAVA_BYTE, start + offset); 129 if (curr == 0) { 130 return offset; 131 } 132 } 133 134 // We are now on a long-aligned boundary so this is the "body" 135 int bodyCount = bodyCount(segment.byteSize() - start - headCount); 136 137 for (; offset < bodyCount; offset += Long.BYTES) { 138 // We know we are `long` aligned so, we can save on alignment checking here 139 long curr = segment.get(JAVA_LONG_UNALIGNED, start + offset); 140 // Is this a candidate? 141 if (mightContainZeroByte(curr)) { 142 for (int j = 0; j < 8; j++) { 143 if (segment.get(JAVA_BYTE, start + offset + j) == 0) { 144 return offset + j; 145 } 146 } 147 } 148 } 149 150 // Handle the "tail" 151 return requireWithinArraySize((long) offset + strlenByte(segment, start + offset)); 152 } 153 154 /* Bits 63 and N * 8 (N = 1..7) of this number are zero. Call these bits 155 the "holes". Note that there is a hole just to the left of 156 each byte, with an extra at the end: 157 158 bits: 01111110 11111110 11111110 11111110 11111110 11111110 11111110 11111111 159 bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD EEEEEEEE FFFFFFFF GGGGGGGG HHHHHHHH 160 161 The 1-bits make sure that carries propagate to the next 0-bit. 162 The 0-bits provide holes for carries to fall into. 163 */ 164 private static final long HIMAGIC_FOR_BYTES = 0x8080_8080_8080_8080L; 165 private static final long LOMAGIC_FOR_BYTES = 0x0101_0101_0101_0101L; 166 167 static boolean mightContainZeroByte(long l) { 168 return ((l - LOMAGIC_FOR_BYTES) & (~l) & HIMAGIC_FOR_BYTES) != 0; 169 } 170 171 private static final long HIMAGIC_FOR_SHORTS = 0x8000_8000_8000_8000L; 172 private static final long LOMAGIC_FOR_SHORTS = 0x0001_0001_0001_0001L; 173 174 static boolean mightContainZeroShort(long l) { 175 return ((l - LOMAGIC_FOR_SHORTS) & (~l) & HIMAGIC_FOR_SHORTS) != 0; 176 } 177 178 static int requireWithinArraySize(long size) { 179 if (size > ArraysSupport.SOFT_MAX_ARRAY_LENGTH) { 180 throw newIaeStringTooLarge(); 181 } 182 return (int) size; 183 } 184 185 static int bodyCount(long remaining) { 186 return (int) Math.min( 187 // Make sure we do not wrap around 188 Integer.MAX_VALUE - Long.BYTES, 189 // Remaining bytes to consider 190 remaining) 191 & -Long.BYTES; // Mask 0xFFFFFFF8 192 } 193 194 private static int strlenByte(MemorySegment segment, long start) { 195 for (int offset = 0; offset < ArraysSupport.SOFT_MAX_ARRAY_LENGTH; offset += 1) { 196 byte curr = segment.get(JAVA_BYTE, start + offset); 197 if (curr == 0) { 198 return offset; 199 } 200 } 201 throw newIaeStringTooLarge(); 202 } 203 204 /** 205 * {@return the shortest distance beginning at the provided {@code start} 206 * to the encountering of a zero short in the provided {@code segment}} 207 * <p> 208 * Note: The inspected region must be short aligned. 209 * 210 * @see #chunkedStrlenByte(MemorySegment, long) for more information 211 * 212 * @param segment to examine 213 * @param start from where examination shall begin 214 * @throws IllegalArgumentException if the examined region contains no zero shorts 215 * within a length that can be accepted by a String 216 */ 217 public static int chunkedStrlenShort(MemorySegment segment, long start) { 218 219 // Handle the first unaligned "head" bytes separately 220 int headCount = (int)SharedUtils.remainsToAlignment(segment.address() + start, Long.BYTES); 221 222 int offset = 0; 223 for (; offset < headCount; offset += Short.BYTES) { 224 short curr = segment.get(JAVA_SHORT, start + offset); 225 if (curr == 0) { 226 return offset; 227 } 228 } 229 230 // We are now on a long-aligned boundary so this is the "body" 231 int bodyCount = bodyCount(segment.byteSize() - start - headCount); 232 233 for (; offset < bodyCount; offset += Long.BYTES) { 234 // We know we are `long` aligned so, we can save on alignment checking here 235 long curr = segment.get(JAVA_LONG_UNALIGNED, start + offset); 236 // Is this a candidate? 237 if (mightContainZeroShort(curr)) { 238 for (int j = 0; j < Long.BYTES; j += Short.BYTES) { 239 if (segment.get(JAVA_SHORT_UNALIGNED, start + offset + j) == 0) { 240 return offset + j; 241 } 242 } 243 } 244 } 245 246 // Handle the "tail" 247 return requireWithinArraySize((long) offset + strlenShort(segment, start + offset)); 248 } 249 250 private static int strlenShort(MemorySegment segment, long start) { 251 for (int offset = 0; offset < ArraysSupport.SOFT_MAX_ARRAY_LENGTH; offset += Short.BYTES) { 252 short curr = segment.get(JAVA_SHORT_UNALIGNED, start + offset); 253 if (curr == (short)0) { 254 return offset; 255 } 256 } 257 throw newIaeStringTooLarge(); 258 } 259 260 // The gain of using `long` wide operations for `int` is lower than for the two other `byte` and `short` variants 261 // so, there is only one method for ints. 262 public static int strlenInt(MemorySegment segment, long start) { 263 for (int offset = 0; offset < ArraysSupport.SOFT_MAX_ARRAY_LENGTH; offset += Integer.BYTES) { 264 // We are guaranteed to be aligned here so, we can use unaligned access. 265 int curr = segment.get(JAVA_INT_UNALIGNED, start + offset); 266 if (curr == 0) { 267 return offset; 268 } 269 } 270 throw newIaeStringTooLarge(); 271 } 272 273 public enum CharsetKind { 274 SINGLE_BYTE(1), 275 DOUBLE_BYTE(2), 276 QUAD_BYTE(4); 277 278 final int terminatorCharSize; 279 280 CharsetKind(int terminatorCharSize) { 281 this.terminatorCharSize = terminatorCharSize; 282 } 283 284 public int terminatorCharSize() { 285 return terminatorCharSize; 286 } 287 288 public static CharsetKind of(Charset charset) { 289 // Comparing the charset to specific internal implementations avoids loading the class `StandardCharsets` 290 if (charset == sun.nio.cs.UTF_8.INSTANCE || 291 charset == sun.nio.cs.ISO_8859_1.INSTANCE || 292 charset == sun.nio.cs.US_ASCII.INSTANCE) { 293 return SINGLE_BYTE; 294 } else if (charset instanceof sun.nio.cs.UTF_16LE || 295 charset instanceof sun.nio.cs.UTF_16BE || 296 charset instanceof sun.nio.cs.UTF_16) { 297 return DOUBLE_BYTE; 298 } else if (charset instanceof sun.nio.cs.UTF_32LE || 299 charset instanceof sun.nio.cs.UTF_32BE || 300 charset instanceof sun.nio.cs.UTF_32) { 301 return QUAD_BYTE; 302 } else { 303 throw new UnsupportedOperationException("Unsupported charset: " + charset); 304 } 305 } 306 } 307 308 public static boolean bytesCompatible(String string, Charset charset) { 309 return JAVA_LANG_ACCESS.bytesCompatible(string, charset); 310 } 311 312 public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset) { 313 if (bytesCompatible(string, charset)) { 314 copyToSegmentRaw(string, segment, offset); 315 return string.length(); 316 } else { 317 byte[] bytes = string.getBytes(charset); 318 MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length); 319 return bytes.length; 320 } 321 } 322 323 public static void copyToSegmentRaw(String string, MemorySegment segment, long offset) { 324 JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset); 325 } 326 327 private static IllegalArgumentException newIaeStringTooLarge() { 328 return new IllegalArgumentException("String too large"); 329 } 330 331 }