1 /* 2 * Copyright (c) 2023, Red Hat, Inc. All rights reserved. 3 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 */ 24 25 package compiler.c2.irTests; 26 27 import compiler.lib.ir_framework.*; 28 import jdk.test.lib.Utils; 29 import jdk.test.whitebox.WhiteBox; 30 import jdk.internal.misc.Unsafe; 31 import java.util.Random; 32 import java.util.Arrays; 33 import java.nio.ByteOrder; 34 35 /* 36 * @test 37 * @bug 8300258 38 * @key randomness 39 * @summary C2: vectorization fails on simple ByteBuffer loop 40 * @modules java.base/jdk.internal.misc 41 * @library /test/lib / 42 * @build jdk.test.whitebox.WhiteBox 43 * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox 44 * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI compiler.c2.irTests.TestVectorizationMismatchedAccess 45 */ 46 47 public class TestVectorizationMismatchedAccess { 48 private static final Unsafe UNSAFE = Unsafe.getUnsafe(); 49 private static final Random RANDOM = Utils.getRandomInstance(); 50 private final static WhiteBox wb = WhiteBox.getWhiteBox(); 51 52 public static void main(String[] args) { 53 // Cross-product: +-AlignVector and +-UseCompactObjectHeaders 54 TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 55 "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", 56 "-XX:-AlignVector"); 57 TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 58 "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", 59 "-XX:+AlignVector"); 60 TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 61 "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", 62 "-XX:-AlignVector"); 63 TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 64 "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", 65 "-XX:+AlignVector"); 66 } 67 68 static int size = 1024; 69 static byte[] byteArray = new byte[size * 8]; 70 static long[] longArray = new long[size]; 71 static byte[] verifyByteArray = new byte[size * 8]; 72 static long[] verifyLongArray = new long[size]; 73 static long baseOffset = 0; 74 static long baseOffHeap = UNSAFE.allocateMemory(size * 8); 75 76 77 static { 78 for (int i = 0; i < verifyByteArray.length; i++) { 79 verifyByteArray[i] = (byte)RANDOM.nextInt(Byte.MAX_VALUE); 80 } 81 for (int i = 0; i < verifyLongArray.length; i++) { 82 verifyLongArray[i] = 0; 83 for (int j = 0; j < 8; j++) { 84 verifyLongArray[i] = verifyLongArray[i] | (((long)verifyByteArray[8 * i + j]) << 8 * j); 85 } 86 } 87 } 88 89 // Method to adjust the value for the native byte order 90 static private long handleByteOrder(long value) { 91 if (ByteOrder.nativeOrder() != ByteOrder.LITTLE_ENDIAN) { 92 value = Long.reverseBytes(value); 93 } 94 return value; 95 } 96 97 static private void runAndVerify(Runnable test, int offset) { 98 System.arraycopy(verifyLongArray, 0, longArray, 0, longArray.length); 99 Arrays.fill(byteArray, (byte)0); 100 test.run(); 101 int i; 102 for (i = 0; i < Math.max(offset, 0); i++) { 103 if (byteArray[i] != 0) { 104 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0"); 105 } 106 } 107 for (; i < Math.min(byteArray.length + offset, byteArray.length); i++) { 108 if (byteArray[i] != verifyByteArray[i - offset]) { 109 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i-offset]); 110 } 111 } 112 for (; i < byteArray.length; i++) { 113 if (byteArray[i] != 0) { 114 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0"); 115 } 116 } 117 } 118 119 static private void runAndVerify2(Runnable test, int offset) { 120 System.arraycopy(verifyByteArray, 0, byteArray, 0, byteArray.length); 121 test.run(); 122 int i; 123 for (i = 0; i < Math.max(offset, 0); i++) { 124 if (byteArray[i] != verifyByteArray[i]) { 125 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i]); 126 } 127 } 128 for (; i < Math.min(byteArray.length + offset, byteArray.length); i++) { 129 int val = offset > 0 ? verifyByteArray[(i-offset) % 8] : verifyByteArray[i-offset]; 130 if (byteArray[i] != val) { 131 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i-offset]); 132 } 133 } 134 for (; i < byteArray.length; i++) { 135 if (byteArray[i] != verifyByteArray[i]) { 136 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i]); 137 } 138 } 139 } 140 141 142 static private void runAndVerify3(Runnable test, int offset) { 143 System.arraycopy(verifyLongArray, 0, longArray, 0, longArray.length); 144 for (int i = 0; i < size * 8; i++) { 145 UNSAFE.putByte(null, baseOffHeap + i, (byte)0); 146 } 147 test.run(); 148 int i; 149 for (i = 0; i < Math.max(offset, 0); i++) { 150 if (UNSAFE.getByte(null, baseOffHeap + i) != 0) { 151 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0"); 152 } 153 } 154 for (; i < Math.min(size * 8 + offset, size * 8); i++) { 155 if (UNSAFE.getByte(null, baseOffHeap + i) != verifyByteArray[i - offset]) { 156 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i-offset]); 157 } 158 } 159 for (; i < byteArray.length; i++) { 160 if (UNSAFE.getByte(null, baseOffHeap + i) != 0) { 161 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0"); 162 } 163 } 164 } 165 166 @Test 167 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 168 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 169 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 170 applyIfPlatform = {"64-bit", "true"}) 171 // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned). 172 // might get fixed with JDK-8325155. 173 public static void testByteLong1a(byte[] dest, long[] src) { 174 for (int i = 0; i < src.length; i++) { 175 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, handleByteOrder(src[i])); 176 // With AlignVector, we need 8-byte alignment of vector loads/stores. 177 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 178 // B_adr = base + 16 + 8*i -> always B_adr = base + 12 + 8*i -> never 179 // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always 180 // -> vectorize -> no vectorization 181 } 182 } 183 184 @Test 185 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 186 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 187 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 188 applyIfPlatform = {"64-bit", "true"}) 189 // 32-bit: address has ConvL2I for cast of long to address, not supported. 190 public static void testByteLong1b(byte[] dest, long[] src) { 191 for (int i = 0; i < src.length; i++) { 192 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, handleByteOrder(src[i])); 193 // With AlignVector, we need 8-byte alignment of vector loads/stores. 194 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 195 // B_adr = base + 16 + 8*i -> always B_adr = base + 12 + 8*i -> never 196 // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always 197 // -> vectorize -> no vectorization 198 } 199 } 200 201 @Test 202 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 203 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}) 204 public static void testByteLong1c(byte[] dest, long[] src) { 205 long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit) 206 for (int i = 0; i < src.length - 8; i++) { 207 UNSAFE.putLongUnaligned(dest, base + 8 * i, handleByteOrder(src[i])); 208 // With AlignVector, we need 8-byte alignment of vector loads/stores. 209 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 210 // B_adr = base + 64 + 8*i -> always B_adr = base + 64 + 8*i -> always 211 // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always 212 // -> vectorize -> vectorize 213 } 214 } 215 216 @Test 217 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 218 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 219 applyIfPlatform = {"64-bit", "true"}) 220 // 32-bit: address has ConvL2I for cast of long to address, not supported. 221 public static void testByteLong1d(byte[] dest, long[] src) { 222 long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit) 223 for (int i = 0; i < src.length - 8; i++) { 224 UNSAFE.putLongUnaligned(dest, base + 8L * i, handleByteOrder(src[i])); 225 // With AlignVector, we need 8-byte alignment of vector loads/stores. 226 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 227 // B_adr = base + 64 + 8*i -> always B_adr = base + 64 + 8*i -> always 228 // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always 229 // -> vectorize -> vectorize 230 } 231 } 232 233 @Run(test = {"testByteLong1a", "testByteLong1b", "testByteLong1c", "testByteLong1d"}) 234 public static void testByteLong1_runner() { 235 runAndVerify(() -> testByteLong1a(byteArray, longArray), 0); 236 runAndVerify(() -> testByteLong1b(byteArray, longArray), 0); 237 testByteLong1c(byteArray, longArray); 238 testByteLong1d(byteArray, longArray); 239 } 240 241 @Test 242 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 243 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 244 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 245 applyIfPlatform = {"64-bit", "true"}) 246 // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned). 247 // might get fixed with JDK-8325155. 248 public static void testByteLong2a(byte[] dest, long[] src) { 249 for (int i = 1; i < src.length; i++) { 250 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), handleByteOrder(src[i])); 251 // With AlignVector, we need 8-byte alignment of vector loads/stores. 252 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 253 // B_adr = base + 16 + 8*(i-1) -> always B_adr = base + 12 + 8*(i-1) -> never 254 // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always 255 // -> vectorize -> no vectorization 256 } 257 } 258 259 @Test 260 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 261 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 262 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 263 applyIfPlatform = {"64-bit", "true"}) 264 // 32-bit: address has ConvL2I for cast of long to address, not supported. 265 public static void testByteLong2b(byte[] dest, long[] src) { 266 for (int i = 1; i < src.length; i++) { 267 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), handleByteOrder(src[i])); 268 // With AlignVector, we need 8-byte alignment of vector loads/stores. 269 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 270 // B_adr = base + 16 + 8*(i-1) -> always B_adr = base + 12 + 8*(i-1) -> never 271 // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always 272 // -> vectorize -> no vectorization 273 } 274 } 275 276 @Run(test = {"testByteLong2a", "testByteLong2b"}) 277 public static void testByteLong2_runner() { 278 runAndVerify(() -> testByteLong2a(byteArray, longArray), -8); 279 runAndVerify(() -> testByteLong2b(byteArray, longArray), -8); 280 } 281 282 @Test 283 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 284 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 285 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 286 applyIfPlatform = {"64-bit", "true"}) 287 // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned). 288 // might get fixed with JDK-8325155. 289 public static void testByteLong3a(byte[] dest, long[] src) { 290 for (int i = 0; i < src.length - 1; i++) { 291 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), handleByteOrder(src[i])); 292 // With AlignVector, we need 8-byte alignment of vector loads/stores. 293 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 294 // B_adr = base + 16 + 8*(i+1) -> always B_adr = base + 12 + 8*(i+1) -> never 295 // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always 296 // -> vectorize -> no vectorization 297 } 298 } 299 300 @Test 301 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 302 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 303 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 304 applyIfPlatform = {"64-bit", "true"}) 305 // 32-bit: address has ConvL2I for cast of long to address, not supported. 306 public static void testByteLong3b(byte[] dest, long[] src) { 307 for (int i = 0; i < src.length - 1; i++) { 308 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + 1), handleByteOrder(src[i])); 309 // With AlignVector, we need 8-byte alignment of vector loads/stores. 310 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 311 // B_adr = base + 16 + 8*(i+1) -> always B_adr = base + 12 + 8*(i+1) -> never 312 // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always 313 // -> vectorize -> no vectorization 314 } 315 } 316 317 @Run(test = {"testByteLong3a", "testByteLong3b"}) 318 public static void testByteLong3_runner() { 319 runAndVerify(() -> testByteLong3a(byteArray, longArray), 8); 320 runAndVerify(() -> testByteLong3b(byteArray, longArray), 8); 321 } 322 323 @Test 324 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 325 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 326 applyIfPlatform = {"64-bit", "true"}, 327 applyIf = {"AlignVector", "false"}) 328 // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned). 329 // might get fixed with JDK-8325155. 330 // AlignVector cannot guarantee that invar is aligned. 331 public static void testByteLong4a(byte[] dest, long[] src, int start, int stop) { 332 for (int i = start; i < stop; i++) { 333 UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, handleByteOrder(src[i])); 334 } 335 } 336 337 @Test 338 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 339 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 340 applyIfPlatform = {"64-bit", "true"}, 341 applyIf = {"AlignVector", "false"}) 342 // 32-bit: address has ConvL2I for cast of long to address, not supported. 343 // AlignVector cannot guarantee that invar is aligned. 344 public static void testByteLong4b(byte[] dest, long[] src, int start, int stop) { 345 for (int i = start; i < stop; i++) { 346 UNSAFE.putLongUnaligned(dest, 8L * i + baseOffset, handleByteOrder(src[i])); 347 } 348 } 349 350 @Run(test = {"testByteLong4a", "testByteLong4b"}) 351 public static void testByteLong4_runner() { 352 baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET; 353 runAndVerify(() -> testByteLong4a(byteArray, longArray, 0, size), 0); 354 runAndVerify(() -> testByteLong4b(byteArray, longArray, 0, size), 0); 355 } 356 357 @Test 358 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 359 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 360 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 361 applyIfPlatform = {"64-bit", "true"}) 362 // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned). 363 // might get fixed with JDK-8325155. 364 public static void testByteLong5a(byte[] dest, long[] src, int start, int stop) { 365 for (int i = start; i < stop; i++) { 366 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), handleByteOrder(src[i])); 367 // With AlignVector, we need 8-byte alignment of vector loads/stores. 368 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 369 // B_adr = base + 16 + 8*(i+x) -> always B_adr = base + 12 + 8*(i+x) -> never 370 // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always 371 // -> vectorize -> no vectorization 372 } 373 } 374 375 @Test 376 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 377 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 378 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 379 applyIfPlatform = {"64-bit", "true"}) 380 // 32-bit: address has ConvL2I for cast of long to address, not supported. 381 public static void testByteLong5b(byte[] dest, long[] src, int start, int stop) { 382 for (int i = start; i < stop; i++) { 383 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + baseOffset), handleByteOrder(src[i])); 384 // With AlignVector, we need 8-byte alignment of vector loads/stores. 385 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 386 // B_adr = base + 16 + 8*(i+x) -> always B_adr = base + 12 + 8*(i+x) -> never 387 // L_adr = base + 16 + 8*i -> always L_adr = base + 16 + 8*i -> always 388 // -> vectorize -> no vectorization 389 } 390 } 391 392 @Run(test = {"testByteLong5a", "testByteLong5b"}) 393 public static void testByteLong5_runner() { 394 baseOffset = 1; 395 runAndVerify(() -> testByteLong5a(byteArray, longArray, 0, size-1), 8); 396 runAndVerify(() -> testByteLong5b(byteArray, longArray, 0, size-1), 8); 397 } 398 399 @Test 400 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 401 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 402 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 403 applyIfPlatform = {"64-bit", "true"}) 404 // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned). 405 // might get fixed with JDK-8325155. 406 public static void testByteByte1a(byte[] dest, byte[] src) { 407 for (int i = 0; i < src.length / 8; i++) { 408 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i)); 409 // With AlignVector, we need 8-byte alignment of vector loads/stores. 410 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 411 // src_adr = base + 16 + 8*i -> always src_adr = base + 12 + 8*i -> never 412 // dst_adr = base + 16 + 8*i -> always dst_adr = base + 12 + 8*i -> never 413 // -> vectorize -> no vectorization 414 } 415 } 416 417 @Test 418 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 419 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 420 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 421 applyIfPlatform = {"64-bit", "true"}) 422 // 32-bit: address has ConvL2I for cast of long to address, not supported. 423 public static void testByteByte1b(byte[] dest, byte[] src) { 424 for (int i = 0; i < src.length / 8; i++) { 425 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i)); 426 // With AlignVector, we need 8-byte alignment of vector loads/stores. 427 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 428 // src_adr = base + 16 + 8*i -> always src_adr = base + 12 + 8*i -> never 429 // dst_adr = base + 16 + 8*i -> always dst_adr = base + 12 + 8*i -> never 430 // -> vectorize -> no vectorization 431 } 432 } 433 434 @Run(test = {"testByteByte1a", "testByteByte1b"}) 435 public static void testByteByte1_runner() { 436 runAndVerify2(() -> testByteByte1a(byteArray, byteArray), 0); 437 runAndVerify2(() -> testByteByte1b(byteArray, byteArray), 0); 438 } 439 440 @Test 441 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 442 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 443 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 444 applyIfPlatform = {"64-bit", "true"}) 445 // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned). 446 // might get fixed with JDK-8325155. 447 public static void testByteByte2a(byte[] dest, byte[] src) { 448 for (int i = 1; i < src.length / 8; i++) { 449 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i)); 450 // With AlignVector, we need 8-byte alignment of vector loads/stores. 451 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 452 // src_adr = base + 16 + 8*i -> always src_adr = base + 12 + 8*i -> never 453 // dst_adr = base + 16 + 8*(i-1) -> always dst_adr = base + 12 + 8*(i-1) -> never 454 // -> vectorize -> no vectorization 455 } 456 } 457 458 @Test 459 @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 460 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 461 applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"}, 462 applyIfPlatform = {"64-bit", "true"}) 463 // 32-bit: address has ConvL2I for cast of long to address, not supported. 464 public static void testByteByte2b(byte[] dest, byte[] src) { 465 for (int i = 1; i < src.length / 8; i++) { 466 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i)); 467 // With AlignVector, we need 8-byte alignment of vector loads/stores. 468 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true 469 // src_adr = base + 16 + 8*i -> always src_adr = base + 12 + 8*i -> never 470 // dst_adr = base + 16 + 8*(i-1) -> always dst_adr = base + 12 + 8*(i-1) -> never 471 // -> vectorize -> no vectorization 472 } 473 } 474 475 @Run(test = {"testByteByte2a", "testByteByte2b"}) 476 public static void testByteByte2_runner() { 477 runAndVerify2(() -> testByteByte2a(byteArray, byteArray), -8); 478 runAndVerify2(() -> testByteByte2b(byteArray, byteArray), -8); 479 } 480 481 @Test 482 @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR }) 483 public static void testByteByte3a(byte[] dest, byte[] src) { 484 for (int i = 0; i < src.length / 8 - 1; i++) { 485 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i)); 486 } 487 } 488 489 @Test 490 @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR }) 491 public static void testByteByte3b(byte[] dest, byte[] src) { 492 for (int i = 0; i < src.length / 8 - 1; i++) { 493 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i)); 494 } 495 } 496 497 @Run(test = {"testByteByte3a", "testByteByte3b"}) 498 public static void testByteByte3_runner() { 499 runAndVerify2(() -> testByteByte3a(byteArray, byteArray), 8); 500 runAndVerify2(() -> testByteByte3b(byteArray, byteArray), 8); 501 } 502 503 @Test 504 @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR }) 505 public static void testByteByte4a(byte[] dest, byte[] src, int start, int stop) { 506 for (int i = start; i < stop; i++) { 507 UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i)); 508 } 509 } 510 511 @Test 512 @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR }) 513 public static void testByteByte4b(byte[] dest, byte[] src, int start, int stop) { 514 for (int i = start; i < stop; i++) { 515 UNSAFE.putLongUnaligned(dest, 8L * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i)); 516 } 517 } 518 519 @Run(test = {"testByteByte4a", "testByteByte4b"}) 520 public static void testByteByte4_runner() { 521 baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET; 522 runAndVerify2(() -> testByteByte4a(byteArray, byteArray, 0, size), 0); 523 runAndVerify2(() -> testByteByte4b(byteArray, byteArray, 0, size), 0); 524 } 525 526 @Test 527 @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR }) 528 public static void testByteByte5a(byte[] dest, byte[] src, int start, int stop) { 529 for (int i = start; i < stop; i++) { 530 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i)); 531 } 532 } 533 534 @Test 535 @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR }) 536 public static void testByteByte5b(byte[] dest, byte[] src, int start, int stop) { 537 for (int i = start; i < stop; i++) { 538 UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i)); 539 } 540 } 541 542 @Run(test = {"testByteByte5a", "testByteByte5b"}) 543 public static void testByteByte5_runner() { 544 baseOffset = 1; 545 runAndVerify2(() -> testByteByte5a(byteArray, byteArray, 0, size-1), 8); 546 runAndVerify2(() -> testByteByte5b(byteArray, byteArray, 0, size-1), 8); 547 } 548 549 @Test 550 @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary 551 // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }) 552 // FAILS: adr is CastX2P(dest + 8 * (i + int_con)) 553 // See: JDK-8331576 554 public static void testOffHeapLong1a(long dest, long[] src) { 555 for (int i = 0; i < src.length; i++) { 556 UNSAFE.putLongUnaligned(null, dest + 8 * i, handleByteOrder(src[i])); 557 } 558 } 559 560 @Test 561 @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary 562 // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }) 563 // FAILS: adr is CastX2P(dest + 8L * (i + int_con)) 564 // See: JDK-8331576 565 public static void testOffHeapLong1b(long dest, long[] src) { 566 for (int i = 0; i < src.length; i++) { 567 UNSAFE.putLongUnaligned(null, dest + 8L * i, handleByteOrder(src[i])); 568 } 569 } 570 571 @Run(test = {"testOffHeapLong1a", "testOffHeapLong1b"}) 572 public static void testOffHeapLong1_runner() { 573 runAndVerify3(() -> testOffHeapLong1a(baseOffHeap, longArray), 0); 574 runAndVerify3(() -> testOffHeapLong1b(baseOffHeap, longArray), 0); 575 } 576 577 @Test 578 @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary 579 // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }) 580 // FAILS: adr is CastX2P 581 // See: JDK-8331576 582 public static void testOffHeapLong2a(long dest, long[] src) { 583 for (int i = 1; i < src.length; i++) { 584 UNSAFE.putLongUnaligned(null, dest + 8 * (i - 1), handleByteOrder(src[i])); 585 } 586 } 587 588 @Test 589 @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary 590 // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }) 591 // FAILS: adr is CastX2P 592 // See: JDK-8331576 593 public static void testOffHeapLong2b(long dest, long[] src) { 594 for (int i = 1; i < src.length; i++) { 595 UNSAFE.putLongUnaligned(null, dest + 8L * (i - 1), handleByteOrder(src[i])); 596 } 597 } 598 599 @Run(test = {"testOffHeapLong2a", "testOffHeapLong2b"}) 600 public static void testOffHeapLong2_runner() { 601 runAndVerify3(() -> testOffHeapLong2a(baseOffHeap, longArray), -8); 602 runAndVerify3(() -> testOffHeapLong2b(baseOffHeap, longArray), -8); 603 } 604 605 @Test 606 @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary 607 // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }) 608 // FAILS: adr is CastX2P 609 // See: JDK-8331576 610 public static void testOffHeapLong3a(long dest, long[] src) { 611 for (int i = 0; i < src.length - 1; i++) { 612 UNSAFE.putLongUnaligned(null, dest + 8 * (i + 1), handleByteOrder(src[i])); 613 } 614 } 615 616 @Test 617 @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary 618 // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }) 619 // FAILS: adr is CastX2P 620 // See: JDK-8331576 621 public static void testOffHeapLong3b(long dest, long[] src) { 622 for (int i = 0; i < src.length - 1; i++) { 623 UNSAFE.putLongUnaligned(null, dest + 8L * (i + 1), handleByteOrder(src[i])); 624 } 625 } 626 627 @Run(test = {"testOffHeapLong3a", "testOffHeapLong3b"}) 628 public static void testOffHeapLong3_runner() { 629 runAndVerify3(() -> testOffHeapLong3a(baseOffHeap, longArray), 8); 630 runAndVerify3(() -> testOffHeapLong3b(baseOffHeap, longArray), 8); 631 } 632 633 @Test 634 @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary 635 // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 636 // applyIf = {"AlignVector", "false"}) 637 // FAILS: adr is CastX2P 638 // See: JDK-8331576 639 // AlignVector cannot guarantee that invar is aligned. 640 public static void testOffHeapLong4a(long dest, long[] src, int start, int stop) { 641 for (int i = start; i < stop; i++) { 642 UNSAFE.putLongUnaligned(null, dest + 8 * i + baseOffset, handleByteOrder(src[i])); 643 } 644 } 645 646 @Test 647 @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary 648 // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" }, 649 // applyIf = {"AlignVector", "false"}) 650 // FAILS: adr is CastX2P 651 // See: JDK-8331576 652 // AlignVector cannot guarantee that invar is aligned. 653 public static void testOffHeapLong4b(long dest, long[] src, int start, int stop) { 654 for (int i = start; i < stop; i++) { 655 UNSAFE.putLongUnaligned(null, dest + 8L * i + baseOffset, handleByteOrder(src[i])); 656 } 657 } 658 659 @Run(test = {"testOffHeapLong4a", "testOffHeapLong4b"}) 660 public static void testOffHeapLong4_runner() { 661 baseOffset = 8; 662 runAndVerify3(() -> testOffHeapLong4a(baseOffHeap, longArray, 0, size-1), 8); 663 runAndVerify3(() -> testOffHeapLong4b(baseOffHeap, longArray, 0, size-1), 8); 664 } 665 }