1 /*
  2  * Copyright (c) 2023, Red Hat, Inc. All rights reserved.
  3  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  */
 24 
 25 package compiler.c2.irTests;
 26 
 27 import compiler.lib.ir_framework.*;
 28 import jdk.test.lib.Utils;
 29 import jdk.test.whitebox.WhiteBox;
 30 import jdk.internal.misc.Unsafe;
 31 import java.util.Random;
 32 import java.util.Arrays;
 33 import java.nio.ByteOrder;
 34 import java.util.List;
 35 
 36 /*
 37  * @test
 38  * @bug 8300258
 39  * @key randomness
 40  * @summary C2: vectorization fails on simple ByteBuffer loop
 41  * @modules java.base/jdk.internal.misc
 42  * @library /test/lib /
 43  * @build jdk.test.whitebox.WhiteBox
 44  * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
 45  * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI compiler.c2.irTests.TestVectorizationMismatchedAccess
 46  */
 47 
 48 public class TestVectorizationMismatchedAccess {
 49     private static final Unsafe UNSAFE = Unsafe.getUnsafe();
 50     private static final Random RANDOM = Utils.getRandomInstance();
 51     private final static WhiteBox wb = WhiteBox.getWhiteBox();
 52 
 53     public static void main(String[] args) {
 54         TestFramework framework = new TestFramework();
 55         framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
 56                            "-XX:+UnlockExperimentalVMOptions");
 57 
 58         // Cross-product:
 59         //   +-AlignVector
 60         //   +-UseCompactObjectHeaders
 61         //   +-UseAutoVectorizationSpeculativeAliasingChecks
 62         int idx = 0;
 63         for (String av : List.of("-XX:-AlignVector", "-XX:+AlignVector")) {
 64             for (String coh : List.of("-XX:-UseCompactObjectHeaders", "-XX:+UseCompactObjectHeaders")) {
 65                 for (String sac : List.of("-XX:-UseAutoVectorizationSpeculativeAliasingChecks", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks")) {
 66                     framework.addScenarios(new Scenario(idx++, av, coh, sac));
 67                 }
 68             }
 69         }
 70 
 71         framework.start();
 72     }
 73 
 74     static int size = 1024;
 75     static byte[] byteArray = new byte[size * 8];
 76     static long[] longArray = new long[size];
 77     static byte[] verifyByteArray = new byte[size * 8];
 78     static long[] verifyLongArray = new long[size];
 79     static long baseOffset = 0;
 80     static long baseOffHeap = UNSAFE.allocateMemory(size * 8);
 81 
 82 
 83     static {
 84         for (int i = 0; i < verifyByteArray.length; i++) {
 85             verifyByteArray[i] = (byte)RANDOM.nextInt(Byte.MAX_VALUE);
 86         }
 87         for (int i = 0; i < verifyLongArray.length; i++) {
 88             verifyLongArray[i] = 0;
 89             for (int j = 0; j < 8; j++) {
 90                 verifyLongArray[i] = verifyLongArray[i] | (((long)verifyByteArray[8 * i + j]) << 8 * j);
 91             }
 92         }
 93     }
 94 
 95     // Method to adjust the value for the native byte order
 96     static private long handleByteOrder(long value) {
 97         if (ByteOrder.nativeOrder() != ByteOrder.LITTLE_ENDIAN) {
 98             value = Long.reverseBytes(value);
 99         }
100         return value;
101     }
102 
103     static private void runAndVerify(Runnable test, int offset) {
104         System.arraycopy(verifyLongArray, 0, longArray, 0, longArray.length);
105         Arrays.fill(byteArray, (byte)0);
106         test.run();
107         int i;
108         for (i = 0; i < Math.max(offset, 0); i++) {
109             if (byteArray[i] != 0) {
110                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0");
111             }
112         }
113         for (; i < Math.min(byteArray.length + offset, byteArray.length); i++) {
114             if (byteArray[i] != verifyByteArray[i - offset]) {
115                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i-offset]);
116             }
117         }
118         for (; i < byteArray.length; i++) {
119             if (byteArray[i] != 0) {
120                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0");
121             }
122         }
123     }
124 
125     static private void runAndVerify2(Runnable test, int offset) {
126         System.arraycopy(verifyByteArray, 0, byteArray, 0, byteArray.length);
127         test.run();
128         int i;
129         for (i = 0; i < Math.max(offset, 0); i++) {
130             if (byteArray[i] != verifyByteArray[i]) {
131                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i]);
132             }
133         }
134         for (; i < Math.min(byteArray.length + offset, byteArray.length); i++) {
135             int val = offset >=1 ? verifyByteArray[(i-offset) % 8] : verifyByteArray[i-offset];
136             if (byteArray[i] != val) {
137                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i-offset]);
138             }
139         }
140         for (; i < byteArray.length; i++) {
141             if (byteArray[i] != verifyByteArray[i]) {
142                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i]);
143             }
144         }
145     }
146 
147 
148     static private void runAndVerify3(Runnable test, int offset) {
149         System.arraycopy(verifyLongArray, 0, longArray, 0, longArray.length);
150         for (int i = 0; i < size * 8; i++) {
151             UNSAFE.putByte(null, baseOffHeap + i, (byte)0);
152         }
153         test.run();
154         int i;
155         for (i = 0; i < Math.max(offset, 0); i++) {
156             if (UNSAFE.getByte(null, baseOffHeap + i) != 0) {
157                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0");
158             }
159         }
160         for (; i < Math.min(size * 8 + offset, size * 8); i++) {
161             if (UNSAFE.getByte(null, baseOffHeap + i) != verifyByteArray[i - offset]) {
162                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i-offset]);
163             }
164         }
165         for (; i < byteArray.length; i++) {
166             if (UNSAFE.getByte(null, baseOffHeap + i) != 0) {
167                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0");
168             }
169         }
170     }
171 
172     @Test
173     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
174         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
175         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
176         applyIfPlatform = {"64-bit", "true"})
177     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
178     //         might get fixed with JDK-8325155.
179     public static void testByteLong1a(byte[] dest, long[] src) {
180         for (int i = 0; i < src.length; i++) {
181             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, handleByteOrder(src[i]));
182             // With AlignVector, we need 8-byte alignment of vector loads/stores.
183             // UseCompactObjectHeaders=false                  UseCompactObjectHeaders=true
184             // B_adr = base + 16 + 8*i  ->  always            B_adr = base + 12 + 8*i  ->  never
185             // L_adr = base + 16 + 8*i  ->  always            L_adr = base + 16 + 8*i  ->  always
186             // -> vectorize                                   -> no vectorization
187         }
188     }
189 
190     @Test
191     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
192         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
193         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
194         applyIfPlatform = {"64-bit", "true"})
195     // 32-bit: address has ConvL2I for cast of long to address, not supported.
196     public static void testByteLong1b(byte[] dest, long[] src) {
197         for (int i = 0; i < src.length; i++) {
198             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, handleByteOrder(src[i]));
199             // With AlignVector, we need 8-byte alignment of vector loads/stores.
200             // UseCompactObjectHeaders=false                  UseCompactObjectHeaders=true
201             // B_adr = base + 16 + 8*i  ->  always            B_adr = base + 12 + 8*i  ->  never
202             // L_adr = base + 16 + 8*i  ->  always            L_adr = base + 16 + 8*i  ->  always
203             // -> vectorize                                   -> no vectorization
204         }
205     }
206 
207     @Test
208     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
209         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"})
210     public static void testByteLong1c(byte[] dest, long[] src) {
211         long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit)
212         for (int i = 0; i < src.length - 8; i++) {
213             UNSAFE.putLongUnaligned(dest, base + 8 * i, handleByteOrder(src[i]));
214             // With AlignVector, we need 8-byte alignment of vector loads/stores.
215             // UseCompactObjectHeaders=false                  UseCompactObjectHeaders=true
216             // B_adr = base + 64 + 8*i  ->  always            B_adr = base + 64 + 8*i  ->  always
217             // L_adr = base + 16 + 8*i  ->  always            L_adr = base + 16 + 8*i  ->  always
218             // -> vectorize                                   -> vectorize
219         }
220     }
221 
222     @Test
223     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
224         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
225         applyIfPlatform = {"64-bit", "true"})
226     // 32-bit: address has ConvL2I for cast of long to address, not supported.
227     public static void testByteLong1d(byte[] dest, long[] src) {
228         long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit)
229         for (int i = 0; i < src.length - 8; i++) {
230             UNSAFE.putLongUnaligned(dest, base + 8L * i, handleByteOrder(src[i]));
231             // With AlignVector, we need 8-byte alignment of vector loads/stores.
232             // UseCompactObjectHeaders=false                  UseCompactObjectHeaders=true
233             // B_adr = base + 64 + 8*i  ->  always            B_adr = base + 64 + 8*i  ->  always
234             // L_adr = base + 16 + 8*i  ->  always            L_adr = base + 16 + 8*i  ->  always
235             // -> vectorize                                   -> vectorize
236         }
237     }
238 
239     @Run(test = {"testByteLong1a", "testByteLong1b", "testByteLong1c", "testByteLong1d"})
240     public static void testByteLong1_runner() {
241         runAndVerify(() -> testByteLong1a(byteArray, longArray), 0);
242         runAndVerify(() -> testByteLong1b(byteArray, longArray), 0);
243         testByteLong1c(byteArray, longArray);
244         testByteLong1d(byteArray, longArray);
245     }
246 
247     @Test
248     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
249         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
250         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
251         applyIfPlatform = {"64-bit", "true"})
252     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
253     //         might get fixed with JDK-8325155.
254     public static void testByteLong2a(byte[] dest, long[] src) {
255         for (int i = 1; i < src.length; i++) {
256             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), handleByteOrder(src[i]));
257             // With AlignVector, we need 8-byte alignment of vector loads/stores.
258             // UseCompactObjectHeaders=false                      UseCompactObjectHeaders=true
259             // B_adr = base + 16 + 8*(i-1)  ->  always            B_adr = base + 12 + 8*(i-1)  ->  never
260             // L_adr = base + 16 + 8*i      ->  always            L_adr = base + 16 + 8*i      ->  always
261             // -> vectorize                                       -> no vectorization
262         }
263     }
264 
265     @Test
266     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
267         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
268         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
269         applyIfPlatform = {"64-bit", "true"})
270     // 32-bit: address has ConvL2I for cast of long to address, not supported.
271     public static void testByteLong2b(byte[] dest, long[] src) {
272         for (int i = 1; i < src.length; i++) {
273             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), handleByteOrder(src[i]));
274             // With AlignVector, we need 8-byte alignment of vector loads/stores.
275             // UseCompactObjectHeaders=false                      UseCompactObjectHeaders=true
276             // B_adr = base + 16 + 8*(i-1)  ->  always            B_adr = base + 12 + 8*(i-1)  ->  never
277             // L_adr = base + 16 + 8*i      ->  always            L_adr = base + 16 + 8*i      ->  always
278             // -> vectorize                                       -> no vectorization
279         }
280     }
281 
282     @Run(test = {"testByteLong2a", "testByteLong2b"})
283     public static void testByteLong2_runner() {
284         runAndVerify(() -> testByteLong2a(byteArray, longArray), -8);
285         runAndVerify(() -> testByteLong2b(byteArray, longArray), -8);
286     }
287 
288     @Test
289     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
290         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
291         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
292         applyIfPlatform = {"64-bit", "true"})
293     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
294     //         might get fixed with JDK-8325155.
295     public static void testByteLong3a(byte[] dest, long[] src) {
296         for (int i = 0; i < src.length - 1; i++) {
297             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), handleByteOrder(src[i]));
298             // With AlignVector, we need 8-byte alignment of vector loads/stores.
299             // UseCompactObjectHeaders=false                      UseCompactObjectHeaders=true
300             // B_adr = base + 16 + 8*(i+1)  ->  always            B_adr = base + 12 + 8*(i+1)  ->  never
301             // L_adr = base + 16 + 8*i      ->  always            L_adr = base + 16 + 8*i      ->  always
302             // -> vectorize                                       -> no vectorization
303         }
304     }
305 
306     @Test
307     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
308         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
309         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
310         applyIfPlatform = {"64-bit", "true"})
311     // 32-bit: address has ConvL2I for cast of long to address, not supported.
312     public static void testByteLong3b(byte[] dest, long[] src) {
313         for (int i = 0; i < src.length - 1; i++) {
314             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + 1), handleByteOrder(src[i]));
315             // With AlignVector, we need 8-byte alignment of vector loads/stores.
316             // UseCompactObjectHeaders=false                      UseCompactObjectHeaders=true
317             // B_adr = base + 16 + 8*(i+1)  ->  always            B_adr = base + 12 + 8*(i+1)  ->  never
318             // L_adr = base + 16 + 8*i      ->  always            L_adr = base + 16 + 8*i      ->  always
319             // -> vectorize                                       -> no vectorization
320         }
321     }
322 
323     @Run(test = {"testByteLong3a", "testByteLong3b"})
324     public static void testByteLong3_runner() {
325         runAndVerify(() -> testByteLong3a(byteArray, longArray), 8);
326         runAndVerify(() -> testByteLong3b(byteArray, longArray), 8);
327     }
328 
329     @Test
330     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
331         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
332         applyIfPlatform = {"64-bit", "true"},
333         applyIf = {"AlignVector", "false"})
334     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
335     //         might get fixed with JDK-8325155.
336     // AlignVector cannot guarantee that invar is aligned.
337     public static void testByteLong4a(byte[] dest, long[] src, int start, int stop) {
338         for (int i = start; i < stop; i++) {
339             UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, handleByteOrder(src[i]));
340         }
341     }
342 
343     @Test
344     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
345         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
346         applyIfPlatform = {"64-bit", "true"},
347         applyIf = {"AlignVector", "false"})
348     // 32-bit: address has ConvL2I for cast of long to address, not supported.
349     // AlignVector cannot guarantee that invar is aligned.
350     public static void testByteLong4b(byte[] dest, long[] src, int start, int stop) {
351         for (int i = start; i < stop; i++) {
352             UNSAFE.putLongUnaligned(dest, 8L * i + baseOffset, handleByteOrder(src[i]));
353         }
354     }
355 
356     @Run(test = {"testByteLong4a", "testByteLong4b"})
357     public static void testByteLong4_runner() {
358         baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
359         runAndVerify(() -> testByteLong4a(byteArray, longArray, 0, size), 0);
360         runAndVerify(() -> testByteLong4b(byteArray, longArray, 0, size), 0);
361     }
362 
363     @Test
364     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
365         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
366         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
367         applyIfPlatform = {"64-bit", "true"})
368     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
369     //         might get fixed with JDK-8325155.
370     public static void testByteLong5a(byte[] dest, long[] src, int start, int stop) {
371         for (int i = start; i < stop; i++) {
372             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), handleByteOrder(src[i]));
373             // With AlignVector, we need 8-byte alignment of vector loads/stores.
374             // UseCompactObjectHeaders=false                      UseCompactObjectHeaders=true
375             // B_adr = base + 16 + 8*(i+x)  ->  always            B_adr = base + 12 + 8*(i+x)  ->  never
376             // L_adr = base + 16 + 8*i      ->  always            L_adr = base + 16 + 8*i      ->  always
377             // -> vectorize                                       -> no vectorization
378         }
379     }
380 
381     @Test
382     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
383         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
384         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
385         applyIfPlatform = {"64-bit", "true"})
386     // 32-bit: address has ConvL2I for cast of long to address, not supported.
387     public static void testByteLong5b(byte[] dest, long[] src, int start, int stop) {
388         for (int i = start; i < stop; i++) {
389             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + baseOffset), handleByteOrder(src[i]));
390             // With AlignVector, we need 8-byte alignment of vector loads/stores.
391             // UseCompactObjectHeaders=false                      UseCompactObjectHeaders=true
392             // B_adr = base + 16 + 8*(i+x)  ->  always            B_adr = base + 12 + 8*(i+x)  ->  never
393             // L_adr = base + 16 + 8*i      ->  always            L_adr = base + 16 + 8*i      ->  always
394             // -> vectorize                                       -> no vectorization
395         }
396     }
397 
398     @Run(test = {"testByteLong5a", "testByteLong5b"})
399     public static void testByteLong5_runner() {
400         baseOffset = 1;
401         runAndVerify(() -> testByteLong5a(byteArray, longArray, 0, size-1), 8);
402         runAndVerify(() -> testByteLong5b(byteArray, longArray, 0, size-1), 8);
403     }
404 
405     @Test
406     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
407         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
408         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
409         applyIfPlatform = {"64-bit", "true"})
410     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
411     //         might get fixed with JDK-8325155.
412     public static void testByteByte1a(byte[] dest, byte[] src) {
413         for (int i = 0; i < src.length / 8; i++) {
414             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
415             // With AlignVector, we need 8-byte alignment of vector loads/stores.
416             // UseCompactObjectHeaders=false                    UseCompactObjectHeaders=true
417             // src_adr = base + 16 + 8*i  ->  always            src_adr = base + 12 + 8*i  ->  never
418             // dst_adr = base + 16 + 8*i  ->  always            dst_adr = base + 12 + 8*i  ->  never
419             // -> vectorize                                     -> no vectorization
420         }
421     }
422 
423     @Test
424     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
425         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
426         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
427         applyIfPlatform = {"64-bit", "true"})
428     // 32-bit: address has ConvL2I for cast of long to address, not supported.
429     public static void testByteByte1b(byte[] dest, byte[] src) {
430         for (int i = 0; i < src.length / 8; i++) {
431             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
432             // With AlignVector, we need 8-byte alignment of vector loads/stores.
433             // UseCompactObjectHeaders=false                    UseCompactObjectHeaders=true
434             // src_adr = base + 16 + 8*i  ->  always            src_adr = base + 12 + 8*i  ->  never
435             // dst_adr = base + 16 + 8*i  ->  always            dst_adr = base + 12 + 8*i  ->  never
436             // -> vectorize                                     -> no vectorization
437         }
438     }
439 
440     @Run(test = {"testByteByte1a", "testByteByte1b"})
441     public static void testByteByte1_runner() {
442         runAndVerify2(() -> testByteByte1a(byteArray, byteArray), 0);
443         runAndVerify2(() -> testByteByte1b(byteArray, byteArray), 0);
444     }
445 
446     @Test
447     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
448         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
449         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
450         applyIfPlatform = {"64-bit", "true"})
451     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
452     //         might get fixed with JDK-8325155.
453     public static void testByteByte2a(byte[] dest, byte[] src) {
454         for (int i = 1; i < src.length / 8; i++) {
455             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
456             // With AlignVector, we need 8-byte alignment of vector loads/stores.
457             // UseCompactObjectHeaders=false                        UseCompactObjectHeaders=true
458             // src_adr = base + 16 + 8*i      ->  always            src_adr = base + 12 + 8*i      ->  never
459             // dst_adr = base + 16 + 8*(i-1)  ->  always            dst_adr = base + 12 + 8*(i-1)  ->  never
460             // -> vectorize                                         -> no vectorization
461         }
462     }
463 
464     @Test
465     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
466         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
467         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
468         applyIfPlatform = {"64-bit", "true"})
469     // 32-bit: address has ConvL2I for cast of long to address, not supported.
470     public static void testByteByte2b(byte[] dest, byte[] src) {
471         for (int i = 1; i < src.length / 8; i++) {
472             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
473             // With AlignVector, we need 8-byte alignment of vector loads/stores.
474             // UseCompactObjectHeaders=false                        UseCompactObjectHeaders=true
475             // src_adr = base + 16 + 8*i      ->  always            src_adr = base + 12 + 8*i      ->  never
476             // dst_adr = base + 16 + 8*(i-1)  ->  always            dst_adr = base + 12 + 8*(i-1)  ->  never
477             // -> vectorize                                         -> no vectorization
478         }
479     }
480 
481     @Run(test = {"testByteByte2a", "testByteByte2b"})
482     public static void testByteByte2_runner() {
483         runAndVerify2(() -> testByteByte2a(byteArray, byteArray), -8);
484         runAndVerify2(() -> testByteByte2b(byteArray, byteArray), -8);
485     }
486 
487     @Test
488     @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR },
489         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"})
490     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1", ".*multiversion.*", ">=1"},
491         phase = CompilePhase.PRINT_IDEAL,
492         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
493         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
494         applyIfPlatform = {"64-bit", "true"})
495     // We have unknown aliasing. At runtime "dest == src", so the AutoVectorization Predicate fails, and recompiles with Multiversioning.
496     public static void testByteByte3a(byte[] dest, byte[] src) {
497         for (int i = 0; i < src.length / 8 - 1; i++) {
498             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
499         }
500     }
501 
502     @Test
503     @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR },
504         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"})
505     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1", ".*multiversion.*", ">=1"},
506         phase = CompilePhase.PRINT_IDEAL,
507         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
508         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
509         applyIfPlatform = {"64-bit", "true"})
510     // We have unknown aliasing. At runtime "dest == src", so the AutoVectorization Predicate fails, and recompiles with Multiversioning.
511     public static void testByteByte3b(byte[] dest, byte[] src) {
512         for (int i = 0; i < src.length / 8 - 1; i++) {
513             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
514         }
515     }
516 
517     @Run(test = {"testByteByte3a", "testByteByte3b"})
518     public static void testByteByte3_runner() {
519         runAndVerify2(() -> testByteByte3a(byteArray, byteArray), 8);
520         runAndVerify2(() -> testByteByte3b(byteArray, byteArray), 8);
521     }
522 
523     @Test
524     @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR },
525         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"})
526     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1", ".*multiversion.*", ">=1"},
527         phase = CompilePhase.PRINT_IDEAL,
528         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
529         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
530         applyIfPlatform = {"64-bit", "true"})
531     // We have unknown aliasing. At runtime "dest == src", so the AutoVectorization Predicate fails, and recompiles with Multiversioning.
532     public static void testByteByte4a(byte[] dest, byte[] src, int start, int stop) {
533         for (int i = start; i < stop; i++) {
534             UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
535         }
536     }
537 
538     @Test
539     @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR },
540         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"})
541     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1", ".*multiversion.*", ">=1"},
542         phase = CompilePhase.PRINT_IDEAL,
543         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
544         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
545         applyIfPlatform = {"64-bit", "true"})
546     // We have unknown aliasing. At runtime "dest == src", so the AutoVectorization Predicate fails, and recompiles with Multiversioning.
547     public static void testByteByte4b(byte[] dest, byte[] src, int start, int stop) {
548         for (int i = start; i < stop; i++) {
549             UNSAFE.putLongUnaligned(dest, 8L * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
550         }
551     }
552 
553     @Run(test = {"testByteByte4a", "testByteByte4b"})
554     public static void testByteByte4_runner() {
555         baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
556         runAndVerify2(() -> testByteByte4a(byteArray, byteArray, 0, size), 0);
557         runAndVerify2(() -> testByteByte4b(byteArray, byteArray, 0, size), 0);
558     }
559 
560     @Test
561     @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR },
562         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"})
563     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1", ".*multiversion.*", ">=1"},
564         phase = CompilePhase.PRINT_IDEAL,
565         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
566         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
567         applyIfPlatform = {"64-bit", "true"})
568     // We have unknown aliasing. At runtime "dest == src", so the AutoVectorization Predicate fails, and recompiles with Multiversioning.
569     public static void testByteByte5a(byte[] dest, byte[] src, int start, int stop) {
570         for (int i = start; i < stop; i++) {
571             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
572         }
573     }
574 
575     @Test
576     @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR },
577         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"})
578     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1", ".*multiversion.*", ">=1"},
579         phase = CompilePhase.PRINT_IDEAL,
580         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true", "rvv", "true"},
581         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
582         applyIfPlatform = {"64-bit", "true"})
583     // We have unknown aliasing. At runtime "dest == src", so the AutoVectorization Predicate fails, and recompiles with Multiversioning.
584     public static void testByteByte5b(byte[] dest, byte[] src, int start, int stop) {
585         for (int i = start; i < stop; i++) {
586             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
587         }
588     }
589 
590     @Run(test = {"testByteByte5a", "testByteByte5b"})
591     public static void testByteByte5_runner() {
592         baseOffset = 1;
593         runAndVerify2(() -> testByteByte5a(byteArray, byteArray, 0, size-1), 8);
594         runAndVerify2(() -> testByteByte5b(byteArray, byteArray, 0, size-1), 8);
595     }
596 
597     @Test
598     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
599     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
600     // FAILS: adr is CastX2P(dest + 8 * (i + int_con))
601     // See: JDK-8331576
602     public static void testOffHeapLong1a(long dest, long[] src) {
603         for (int i = 0; i < src.length; i++) {
604             UNSAFE.putLongUnaligned(null, dest + 8 * i, handleByteOrder(src[i]));
605         }
606     }
607 
608     @Test
609     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
610     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
611     // FAILS: adr is CastX2P(dest + 8L * (i + int_con))
612     // See: JDK-8331576
613     public static void testOffHeapLong1b(long dest, long[] src) {
614         for (int i = 0; i < src.length; i++) {
615             UNSAFE.putLongUnaligned(null, dest + 8L * i, handleByteOrder(src[i]));
616         }
617     }
618 
619     @Run(test = {"testOffHeapLong1a", "testOffHeapLong1b"})
620     public static void testOffHeapLong1_runner() {
621         runAndVerify3(() -> testOffHeapLong1a(baseOffHeap, longArray), 0);
622         runAndVerify3(() -> testOffHeapLong1b(baseOffHeap, longArray), 0);
623     }
624 
625     @Test
626     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
627     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
628     // FAILS: adr is CastX2P
629     // See: JDK-8331576
630     public static void testOffHeapLong2a(long dest, long[] src) {
631         for (int i = 1; i < src.length; i++) {
632             UNSAFE.putLongUnaligned(null, dest + 8 * (i - 1), handleByteOrder(src[i]));
633         }
634     }
635 
636     @Test
637     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
638     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
639     // FAILS: adr is CastX2P
640     // See: JDK-8331576
641     public static void testOffHeapLong2b(long dest, long[] src) {
642         for (int i = 1; i < src.length; i++) {
643             UNSAFE.putLongUnaligned(null, dest + 8L * (i - 1), handleByteOrder(src[i]));
644         }
645     }
646 
647     @Run(test = {"testOffHeapLong2a", "testOffHeapLong2b"})
648     public static void testOffHeapLong2_runner() {
649         runAndVerify3(() -> testOffHeapLong2a(baseOffHeap, longArray), -8);
650         runAndVerify3(() -> testOffHeapLong2b(baseOffHeap, longArray), -8);
651     }
652 
653     @Test
654     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
655     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
656     // FAILS: adr is CastX2P
657     // See: JDK-8331576
658     public static void testOffHeapLong3a(long dest, long[] src) {
659         for (int i = 0; i < src.length - 1; i++) {
660             UNSAFE.putLongUnaligned(null, dest + 8 * (i + 1), handleByteOrder(src[i]));
661         }
662     }
663 
664     @Test
665     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
666     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
667     // FAILS: adr is CastX2P
668     // See: JDK-8331576
669     public static void testOffHeapLong3b(long dest, long[] src) {
670         for (int i = 0; i < src.length - 1; i++) {
671             UNSAFE.putLongUnaligned(null, dest + 8L * (i + 1), handleByteOrder(src[i]));
672         }
673     }
674 
675     @Run(test = {"testOffHeapLong3a", "testOffHeapLong3b"})
676     public static void testOffHeapLong3_runner() {
677         runAndVerify3(() -> testOffHeapLong3a(baseOffHeap, longArray), 8);
678         runAndVerify3(() -> testOffHeapLong3b(baseOffHeap, longArray), 8);
679     }
680 
681     @Test
682     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
683     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
684     //     applyIf = {"AlignVector", "false"})
685     // FAILS: adr is CastX2P
686     // See: JDK-8331576
687     // AlignVector cannot guarantee that invar is aligned.
688     public static void testOffHeapLong4a(long dest, long[] src, int start, int stop) {
689         for (int i = start; i < stop; i++) {
690             UNSAFE.putLongUnaligned(null, dest + 8 * i + baseOffset, handleByteOrder(src[i]));
691         }
692     }
693 
694     @Test
695     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
696     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
697     //     applyIf = {"AlignVector", "false"})
698     // FAILS: adr is CastX2P
699     // See: JDK-8331576
700     // AlignVector cannot guarantee that invar is aligned.
701     public static void testOffHeapLong4b(long dest, long[] src, int start, int stop) {
702         for (int i = start; i < stop; i++) {
703             UNSAFE.putLongUnaligned(null, dest + 8L * i + baseOffset, handleByteOrder(src[i]));
704         }
705     }
706 
707     @Run(test = {"testOffHeapLong4a", "testOffHeapLong4b"})
708     public static void testOffHeapLong4_runner() {
709         baseOffset = 8;
710         runAndVerify3(() -> testOffHeapLong4a(baseOffHeap, longArray, 0, size-1), 8);
711         runAndVerify3(() -> testOffHeapLong4b(baseOffHeap, longArray, 0, size-1), 8);
712     }
713 }