1 /*
  2  * Copyright (c) 2023, Red Hat, Inc. All rights reserved.
  3  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  */
 24 
 25 package compiler.c2.irTests;
 26 
 27 import compiler.lib.ir_framework.*;
 28 import jdk.test.lib.Utils;
 29 import jdk.test.whitebox.WhiteBox;
 30 import jdk.internal.misc.Unsafe;
 31 import java.util.Random;
 32 import java.util.Arrays;
 33 import java.nio.ByteOrder;
 34 
 35 /*
 36  * @test
 37  * @bug 8300258
 38  * @key randomness
 39  * @summary C2: vectorization fails on simple ByteBuffer loop
 40  * @modules java.base/jdk.internal.misc
 41  * @library /test/lib /
 42  * @build jdk.test.whitebox.WhiteBox
 43  * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
 44  * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI compiler.c2.irTests.TestVectorizationMismatchedAccess
 45  */
 46 
 47 public class TestVectorizationMismatchedAccess {
 48     private static final Unsafe UNSAFE = Unsafe.getUnsafe();
 49     private static final Random RANDOM = Utils.getRandomInstance();
 50     private final static WhiteBox wb = WhiteBox.getWhiteBox();
 51 
 52     public static void main(String[] args) {
 53         // Cross-product: +-AlignVector and +-UseCompactObjectHeaders
 54         TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
 55                                    "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders",
 56                                    "-XX:-AlignVector");
 57         TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
 58                                    "-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders",
 59                                    "-XX:+AlignVector");
 60         TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
 61                                    "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders",
 62                                    "-XX:-AlignVector");
 63         TestFramework.runWithFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
 64                                    "-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders",
 65                                    "-XX:+AlignVector");
 66     }
 67 
 68     static int size = 1024;
 69     static byte[] byteArray = new byte[size * 8];
 70     static long[] longArray = new long[size];
 71     static byte[] verifyByteArray = new byte[size * 8];
 72     static long[] verifyLongArray = new long[size];
 73     static long baseOffset = 0;
 74     static long baseOffHeap = UNSAFE.allocateMemory(size * 8);
 75 
 76 
 77     static {
 78         for (int i = 0; i < verifyByteArray.length; i++) {
 79             verifyByteArray[i] = (byte)RANDOM.nextInt(Byte.MAX_VALUE);
 80         }
 81         for (int i = 0; i < verifyLongArray.length; i++) {
 82             verifyLongArray[i] = 0;
 83             for (int j = 0; j < 8; j++) {
 84                 verifyLongArray[i] = verifyLongArray[i] | (((long)verifyByteArray[8 * i + j]) << 8 * j);
 85             }
 86         }
 87     }
 88 
 89     // Method to adjust the value for the native byte order
 90     static private long handleByteOrder(long value) {
 91         if (ByteOrder.nativeOrder() != ByteOrder.LITTLE_ENDIAN) {
 92             value = Long.reverseBytes(value);
 93         }
 94         return value;
 95     }
 96 
 97     static private void runAndVerify(Runnable test, int offset) {
 98         System.arraycopy(verifyLongArray, 0, longArray, 0, longArray.length);
 99         Arrays.fill(byteArray, (byte)0);
100         test.run();
101         int i;
102         for (i = 0; i < Math.max(offset, 0); i++) {
103             if (byteArray[i] != 0) {
104                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0");
105             }
106         }
107         for (; i < Math.min(byteArray.length + offset, byteArray.length); i++) {
108             if (byteArray[i] != verifyByteArray[i - offset]) {
109                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i-offset]);
110             }
111         }
112         for (; i < byteArray.length; i++) {
113             if (byteArray[i] != 0) {
114                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0");
115             }
116         }
117     }
118 
119     static private void runAndVerify2(Runnable test, int offset) {
120         System.arraycopy(verifyByteArray, 0, byteArray, 0, byteArray.length);
121         test.run();
122         int i;
123         for (i = 0; i < Math.max(offset, 0); i++) {
124             if (byteArray[i] != verifyByteArray[i]) {
125                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i]);
126             }
127         }
128         for (; i < Math.min(byteArray.length + offset, byteArray.length); i++) {
129             int val = offset > 0 ? verifyByteArray[(i-offset) % 8] : verifyByteArray[i-offset];
130             if (byteArray[i] != val) {
131                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i-offset]);
132             }
133         }
134         for (; i < byteArray.length; i++) {
135             if (byteArray[i] != verifyByteArray[i]) {
136                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i]);
137             }
138         }
139     }
140 
141 
142     static private void runAndVerify3(Runnable test, int offset) {
143         System.arraycopy(verifyLongArray, 0, longArray, 0, longArray.length);
144         for (int i = 0; i < size * 8; i++) {
145             UNSAFE.putByte(null, baseOffHeap + i, (byte)0);
146         }
147         test.run();
148         int i;
149         for (i = 0; i < Math.max(offset, 0); i++) {
150             if (UNSAFE.getByte(null, baseOffHeap + i) != 0) {
151                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0");
152             }
153         }
154         for (; i < Math.min(size * 8 + offset, size * 8); i++) {
155             if (UNSAFE.getByte(null, baseOffHeap + i) != verifyByteArray[i - offset]) {
156                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != " + verifyByteArray[i-offset]);
157             }
158         }
159         for (; i < byteArray.length; i++) {
160             if (UNSAFE.getByte(null, baseOffHeap + i) != 0) {
161                 throw new RuntimeException("Incorrect result at " + i + " " + byteArray[i] + " != 0");
162             }
163         }
164     }
165 
166     @Test
167     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
168         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
169         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
170         applyIfPlatform = {"64-bit", "true"})
171     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
172     //         might get fixed with JDK-8325155.
173     public static void testByteLong1a(byte[] dest, long[] src) {
174         for (int i = 0; i < src.length; i++) {
175             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, handleByteOrder(src[i]));
176             // With AlignVector, we need 8-byte alignment of vector loads/stores.
177             // UseCompactObjectHeaders=false                  UseCompactObjectHeaders=true
178             // B_adr = base + 16 + 8*i  ->  always            B_adr = base + 12 + 8*i  ->  never
179             // L_adr = base + 16 + 8*i  ->  always            L_adr = base + 16 + 8*i  ->  always
180             // -> vectorize                                   -> no vectorization
181         }
182     }
183 
184     @Test
185     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
186         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
187         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
188         applyIfPlatform = {"64-bit", "true"})
189     // 32-bit: address has ConvL2I for cast of long to address, not supported.
190     public static void testByteLong1b(byte[] dest, long[] src) {
191         for (int i = 0; i < src.length; i++) {
192             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, handleByteOrder(src[i]));
193             // With AlignVector, we need 8-byte alignment of vector loads/stores.
194             // UseCompactObjectHeaders=false                  UseCompactObjectHeaders=true
195             // B_adr = base + 16 + 8*i  ->  always            B_adr = base + 12 + 8*i  ->  never
196             // L_adr = base + 16 + 8*i  ->  always            L_adr = base + 16 + 8*i  ->  always
197             // -> vectorize                                   -> no vectorization
198         }
199     }
200 
201     @Test
202     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
203         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
204     public static void testByteLong1c(byte[] dest, long[] src) {
205         long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit)
206         for (int i = 0; i < src.length - 8; i++) {
207             UNSAFE.putLongUnaligned(dest, base + 8 * i, handleByteOrder(src[i]));
208             // With AlignVector, we need 8-byte alignment of vector loads/stores.
209             // UseCompactObjectHeaders=false                  UseCompactObjectHeaders=true
210             // B_adr = base + 64 + 8*i  ->  always            B_adr = base + 64 + 8*i  ->  always
211             // L_adr = base + 16 + 8*i  ->  always            L_adr = base + 16 + 8*i  ->  always
212             // -> vectorize                                   -> vectorize
213         }
214     }
215 
216     @Test
217     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
218         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
219         applyIfPlatform = {"64-bit", "true"})
220     // 32-bit: address has ConvL2I for cast of long to address, not supported.
221     public static void testByteLong1d(byte[] dest, long[] src) {
222         long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit)
223         for (int i = 0; i < src.length - 8; i++) {
224             UNSAFE.putLongUnaligned(dest, base + 8L * i, handleByteOrder(src[i]));
225             // With AlignVector, we need 8-byte alignment of vector loads/stores.
226             // UseCompactObjectHeaders=false                  UseCompactObjectHeaders=true
227             // B_adr = base + 64 + 8*i  ->  always            B_adr = base + 64 + 8*i  ->  always
228             // L_adr = base + 16 + 8*i  ->  always            L_adr = base + 16 + 8*i  ->  always
229             // -> vectorize                                   -> vectorize
230         }
231     }
232 
233     @Run(test = {"testByteLong1a", "testByteLong1b", "testByteLong1c", "testByteLong1d"})
234     public static void testByteLong1_runner() {
235         runAndVerify(() -> testByteLong1a(byteArray, longArray), 0);
236         runAndVerify(() -> testByteLong1b(byteArray, longArray), 0);
237         testByteLong1c(byteArray, longArray);
238         testByteLong1d(byteArray, longArray);
239     }
240 
241     @Test
242     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
243         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
244         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
245         applyIfPlatform = {"64-bit", "true"})
246     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
247     //         might get fixed with JDK-8325155.
248     public static void testByteLong2a(byte[] dest, long[] src) {
249         for (int i = 1; i < src.length; i++) {
250             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), handleByteOrder(src[i]));
251             // With AlignVector, we need 8-byte alignment of vector loads/stores.
252             // UseCompactObjectHeaders=false                      UseCompactObjectHeaders=true
253             // B_adr = base + 16 + 8*(i-1)  ->  always            B_adr = base + 12 + 8*(i-1)  ->  never
254             // L_adr = base + 16 + 8*i      ->  always            L_adr = base + 16 + 8*i      ->  always
255             // -> vectorize                                       -> no vectorization
256         }
257     }
258 
259     @Test
260     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
261         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
262         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
263         applyIfPlatform = {"64-bit", "true"})
264     // 32-bit: address has ConvL2I for cast of long to address, not supported.
265     public static void testByteLong2b(byte[] dest, long[] src) {
266         for (int i = 1; i < src.length; i++) {
267             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), handleByteOrder(src[i]));
268             // With AlignVector, we need 8-byte alignment of vector loads/stores.
269             // UseCompactObjectHeaders=false                      UseCompactObjectHeaders=true
270             // B_adr = base + 16 + 8*(i-1)  ->  always            B_adr = base + 12 + 8*(i-1)  ->  never
271             // L_adr = base + 16 + 8*i      ->  always            L_adr = base + 16 + 8*i      ->  always
272             // -> vectorize                                       -> no vectorization
273         }
274     }
275 
276     @Run(test = {"testByteLong2a", "testByteLong2b"})
277     public static void testByteLong2_runner() {
278         runAndVerify(() -> testByteLong2a(byteArray, longArray), -8);
279         runAndVerify(() -> testByteLong2b(byteArray, longArray), -8);
280     }
281 
282     @Test
283     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
284         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
285         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
286         applyIfPlatform = {"64-bit", "true"})
287     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
288     //         might get fixed with JDK-8325155.
289     public static void testByteLong3a(byte[] dest, long[] src) {
290         for (int i = 0; i < src.length - 1; i++) {
291             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), handleByteOrder(src[i]));
292             // With AlignVector, we need 8-byte alignment of vector loads/stores.
293             // UseCompactObjectHeaders=false                      UseCompactObjectHeaders=true
294             // B_adr = base + 16 + 8*(i+1)  ->  always            B_adr = base + 12 + 8*(i+1)  ->  never
295             // L_adr = base + 16 + 8*i      ->  always            L_adr = base + 16 + 8*i      ->  always
296             // -> vectorize                                       -> no vectorization
297         }
298     }
299 
300     @Test
301     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
302         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
303         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
304         applyIfPlatform = {"64-bit", "true"})
305     // 32-bit: address has ConvL2I for cast of long to address, not supported.
306     public static void testByteLong3b(byte[] dest, long[] src) {
307         for (int i = 0; i < src.length - 1; i++) {
308             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + 1), handleByteOrder(src[i]));
309             // With AlignVector, we need 8-byte alignment of vector loads/stores.
310             // UseCompactObjectHeaders=false                      UseCompactObjectHeaders=true
311             // B_adr = base + 16 + 8*(i+1)  ->  always            B_adr = base + 12 + 8*(i+1)  ->  never
312             // L_adr = base + 16 + 8*i      ->  always            L_adr = base + 16 + 8*i      ->  always
313             // -> vectorize                                       -> no vectorization
314         }
315     }
316 
317     @Run(test = {"testByteLong3a", "testByteLong3b"})
318     public static void testByteLong3_runner() {
319         runAndVerify(() -> testByteLong3a(byteArray, longArray), 8);
320         runAndVerify(() -> testByteLong3b(byteArray, longArray), 8);
321     }
322 
323     @Test
324     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
325         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
326         applyIfPlatform = {"64-bit", "true"},
327         applyIf = {"AlignVector", "false"})
328     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
329     //         might get fixed with JDK-8325155.
330     // AlignVector cannot guarantee that invar is aligned.
331     public static void testByteLong4a(byte[] dest, long[] src, int start, int stop) {
332         for (int i = start; i < stop; i++) {
333             UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, handleByteOrder(src[i]));
334         }
335     }
336 
337     @Test
338     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
339         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
340         applyIfPlatform = {"64-bit", "true"},
341         applyIf = {"AlignVector", "false"})
342     // 32-bit: address has ConvL2I for cast of long to address, not supported.
343     // AlignVector cannot guarantee that invar is aligned.
344     public static void testByteLong4b(byte[] dest, long[] src, int start, int stop) {
345         for (int i = start; i < stop; i++) {
346             UNSAFE.putLongUnaligned(dest, 8L * i + baseOffset, handleByteOrder(src[i]));
347         }
348     }
349 
350     @Run(test = {"testByteLong4a", "testByteLong4b"})
351     public static void testByteLong4_runner() {
352         baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
353         runAndVerify(() -> testByteLong4a(byteArray, longArray, 0, size), 0);
354         runAndVerify(() -> testByteLong4b(byteArray, longArray, 0, size), 0);
355     }
356 
357     @Test
358     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
359         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
360         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
361         applyIfPlatform = {"64-bit", "true"})
362     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
363     //         might get fixed with JDK-8325155.
364     public static void testByteLong5a(byte[] dest, long[] src, int start, int stop) {
365         for (int i = start; i < stop; i++) {
366             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), handleByteOrder(src[i]));
367             // With AlignVector, we need 8-byte alignment of vector loads/stores.
368             // UseCompactObjectHeaders=false                      UseCompactObjectHeaders=true
369             // B_adr = base + 16 + 8*(i+x)  ->  always            B_adr = base + 12 + 8*(i+x)  ->  never
370             // L_adr = base + 16 + 8*i      ->  always            L_adr = base + 16 + 8*i      ->  always
371             // -> vectorize                                       -> no vectorization
372         }
373     }
374 
375     @Test
376     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
377         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
378         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
379         applyIfPlatform = {"64-bit", "true"})
380     // 32-bit: address has ConvL2I for cast of long to address, not supported.
381     public static void testByteLong5b(byte[] dest, long[] src, int start, int stop) {
382         for (int i = start; i < stop; i++) {
383             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + baseOffset), handleByteOrder(src[i]));
384             // With AlignVector, we need 8-byte alignment of vector loads/stores.
385             // UseCompactObjectHeaders=false                      UseCompactObjectHeaders=true
386             // B_adr = base + 16 + 8*(i+x)  ->  always            B_adr = base + 12 + 8*(i+x)  ->  never
387             // L_adr = base + 16 + 8*i      ->  always            L_adr = base + 16 + 8*i      ->  always
388             // -> vectorize                                       -> no vectorization
389         }
390     }
391 
392     @Run(test = {"testByteLong5a", "testByteLong5b"})
393     public static void testByteLong5_runner() {
394         baseOffset = 1;
395         runAndVerify(() -> testByteLong5a(byteArray, longArray, 0, size-1), 8);
396         runAndVerify(() -> testByteLong5b(byteArray, longArray, 0, size-1), 8);
397     }
398 
399     @Test
400     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
401         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
402         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
403         applyIfPlatform = {"64-bit", "true"})
404     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
405     //         might get fixed with JDK-8325155.
406     public static void testByteByte1a(byte[] dest, byte[] src) {
407         for (int i = 0; i < src.length / 8; i++) {
408             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
409             // With AlignVector, we need 8-byte alignment of vector loads/stores.
410             // UseCompactObjectHeaders=false                    UseCompactObjectHeaders=true
411             // src_adr = base + 16 + 8*i  ->  always            src_adr = base + 12 + 8*i  ->  never
412             // dst_adr = base + 16 + 8*i  ->  always            dst_adr = base + 12 + 8*i  ->  never
413             // -> vectorize                                     -> no vectorization
414         }
415     }
416 
417     @Test
418     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
419         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
420         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
421         applyIfPlatform = {"64-bit", "true"})
422     // 32-bit: address has ConvL2I for cast of long to address, not supported.
423     public static void testByteByte1b(byte[] dest, byte[] src) {
424         for (int i = 0; i < src.length / 8; i++) {
425             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
426             // With AlignVector, we need 8-byte alignment of vector loads/stores.
427             // UseCompactObjectHeaders=false                    UseCompactObjectHeaders=true
428             // src_adr = base + 16 + 8*i  ->  always            src_adr = base + 12 + 8*i  ->  never
429             // dst_adr = base + 16 + 8*i  ->  always            dst_adr = base + 12 + 8*i  ->  never
430             // -> vectorize                                     -> no vectorization
431         }
432     }
433 
434     @Run(test = {"testByteByte1a", "testByteByte1b"})
435     public static void testByteByte1_runner() {
436         runAndVerify2(() -> testByteByte1a(byteArray, byteArray), 0);
437         runAndVerify2(() -> testByteByte1b(byteArray, byteArray), 0);
438     }
439 
440     @Test
441     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
442         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
443         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
444         applyIfPlatform = {"64-bit", "true"})
445     // 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
446     //         might get fixed with JDK-8325155.
447     public static void testByteByte2a(byte[] dest, byte[] src) {
448         for (int i = 1; i < src.length / 8; i++) {
449             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
450             // With AlignVector, we need 8-byte alignment of vector loads/stores.
451             // UseCompactObjectHeaders=false                        UseCompactObjectHeaders=true
452             // src_adr = base + 16 + 8*i      ->  always            src_adr = base + 12 + 8*i      ->  never
453             // dst_adr = base + 16 + 8*(i-1)  ->  always            dst_adr = base + 12 + 8*(i-1)  ->  never
454             // -> vectorize                                         -> no vectorization
455         }
456     }
457 
458     @Test
459     @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
460         applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
461         applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
462         applyIfPlatform = {"64-bit", "true"})
463     // 32-bit: address has ConvL2I for cast of long to address, not supported.
464     public static void testByteByte2b(byte[] dest, byte[] src) {
465         for (int i = 1; i < src.length / 8; i++) {
466             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
467             // With AlignVector, we need 8-byte alignment of vector loads/stores.
468             // UseCompactObjectHeaders=false                        UseCompactObjectHeaders=true
469             // src_adr = base + 16 + 8*i      ->  always            src_adr = base + 12 + 8*i      ->  never
470             // dst_adr = base + 16 + 8*(i-1)  ->  always            dst_adr = base + 12 + 8*(i-1)  ->  never
471             // -> vectorize                                         -> no vectorization
472         }
473     }
474 
475     @Run(test = {"testByteByte2a", "testByteByte2b"})
476     public static void testByteByte2_runner() {
477         runAndVerify2(() -> testByteByte2a(byteArray, byteArray), -8);
478         runAndVerify2(() -> testByteByte2b(byteArray, byteArray), -8);
479     }
480 
481     @Test
482     @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
483     public static void testByteByte3a(byte[] dest, byte[] src) {
484         for (int i = 0; i < src.length / 8 - 1; i++) {
485             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
486         }
487     }
488 
489     @Test
490     @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
491     public static void testByteByte3b(byte[] dest, byte[] src) {
492         for (int i = 0; i < src.length / 8 - 1; i++) {
493             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
494         }
495     }
496 
497     @Run(test = {"testByteByte3a", "testByteByte3b"})
498     public static void testByteByte3_runner() {
499         runAndVerify2(() -> testByteByte3a(byteArray, byteArray), 8);
500         runAndVerify2(() -> testByteByte3b(byteArray, byteArray), 8);
501     }
502 
503     @Test
504     @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
505     public static void testByteByte4a(byte[] dest, byte[] src, int start, int stop) {
506         for (int i = start; i < stop; i++) {
507             UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
508         }
509     }
510 
511     @Test
512     @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
513     public static void testByteByte4b(byte[] dest, byte[] src, int start, int stop) {
514         for (int i = start; i < stop; i++) {
515             UNSAFE.putLongUnaligned(dest, 8L * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
516         }
517     }
518 
519     @Run(test = {"testByteByte4a", "testByteByte4b"})
520     public static void testByteByte4_runner() {
521         baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
522         runAndVerify2(() -> testByteByte4a(byteArray, byteArray, 0, size), 0);
523         runAndVerify2(() -> testByteByte4b(byteArray, byteArray, 0, size), 0);
524     }
525 
526     @Test
527     @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
528     public static void testByteByte5a(byte[] dest, byte[] src, int start, int stop) {
529         for (int i = start; i < stop; i++) {
530             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
531         }
532     }
533 
534     @Test
535     @IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
536     public static void testByteByte5b(byte[] dest, byte[] src, int start, int stop) {
537         for (int i = start; i < stop; i++) {
538             UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
539         }
540     }
541 
542     @Run(test = {"testByteByte5a", "testByteByte5b"})
543     public static void testByteByte5_runner() {
544         baseOffset = 1;
545         runAndVerify2(() -> testByteByte5a(byteArray, byteArray, 0, size-1), 8);
546         runAndVerify2(() -> testByteByte5b(byteArray, byteArray, 0, size-1), 8);
547     }
548 
549     @Test
550     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
551     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
552     // FAILS: adr is CastX2P(dest + 8 * (i + int_con))
553     // See: JDK-8331576
554     public static void testOffHeapLong1a(long dest, long[] src) {
555         for (int i = 0; i < src.length; i++) {
556             UNSAFE.putLongUnaligned(null, dest + 8 * i, handleByteOrder(src[i]));
557         }
558     }
559 
560     @Test
561     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
562     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
563     // FAILS: adr is CastX2P(dest + 8L * (i + int_con))
564     // See: JDK-8331576
565     public static void testOffHeapLong1b(long dest, long[] src) {
566         for (int i = 0; i < src.length; i++) {
567             UNSAFE.putLongUnaligned(null, dest + 8L * i, handleByteOrder(src[i]));
568         }
569     }
570 
571     @Run(test = {"testOffHeapLong1a", "testOffHeapLong1b"})
572     public static void testOffHeapLong1_runner() {
573         runAndVerify3(() -> testOffHeapLong1a(baseOffHeap, longArray), 0);
574         runAndVerify3(() -> testOffHeapLong1b(baseOffHeap, longArray), 0);
575     }
576 
577     @Test
578     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
579     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
580     // FAILS: adr is CastX2P
581     // See: JDK-8331576
582     public static void testOffHeapLong2a(long dest, long[] src) {
583         for (int i = 1; i < src.length; i++) {
584             UNSAFE.putLongUnaligned(null, dest + 8 * (i - 1), handleByteOrder(src[i]));
585         }
586     }
587 
588     @Test
589     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
590     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
591     // FAILS: adr is CastX2P
592     // See: JDK-8331576
593     public static void testOffHeapLong2b(long dest, long[] src) {
594         for (int i = 1; i < src.length; i++) {
595             UNSAFE.putLongUnaligned(null, dest + 8L * (i - 1), handleByteOrder(src[i]));
596         }
597     }
598 
599     @Run(test = {"testOffHeapLong2a", "testOffHeapLong2b"})
600     public static void testOffHeapLong2_runner() {
601         runAndVerify3(() -> testOffHeapLong2a(baseOffHeap, longArray), -8);
602         runAndVerify3(() -> testOffHeapLong2b(baseOffHeap, longArray), -8);
603     }
604 
605     @Test
606     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
607     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
608     // FAILS: adr is CastX2P
609     // See: JDK-8331576
610     public static void testOffHeapLong3a(long dest, long[] src) {
611         for (int i = 0; i < src.length - 1; i++) {
612             UNSAFE.putLongUnaligned(null, dest + 8 * (i + 1), handleByteOrder(src[i]));
613         }
614     }
615 
616     @Test
617     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
618     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
619     // FAILS: adr is CastX2P
620     // See: JDK-8331576
621     public static void testOffHeapLong3b(long dest, long[] src) {
622         for (int i = 0; i < src.length - 1; i++) {
623             UNSAFE.putLongUnaligned(null, dest + 8L * (i + 1), handleByteOrder(src[i]));
624         }
625     }
626 
627     @Run(test = {"testOffHeapLong3a", "testOffHeapLong3b"})
628     public static void testOffHeapLong3_runner() {
629         runAndVerify3(() -> testOffHeapLong3a(baseOffHeap, longArray), 8);
630         runAndVerify3(() -> testOffHeapLong3b(baseOffHeap, longArray), 8);
631     }
632 
633     @Test
634     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
635     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
636     //     applyIf = {"AlignVector", "false"})
637     // FAILS: adr is CastX2P
638     // See: JDK-8331576
639     // AlignVector cannot guarantee that invar is aligned.
640     public static void testOffHeapLong4a(long dest, long[] src, int start, int stop) {
641         for (int i = start; i < stop; i++) {
642             UNSAFE.putLongUnaligned(null, dest + 8 * i + baseOffset, handleByteOrder(src[i]));
643         }
644     }
645 
646     @Test
647     @IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
648     // @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
649     //     applyIf = {"AlignVector", "false"})
650     // FAILS: adr is CastX2P
651     // See: JDK-8331576
652     // AlignVector cannot guarantee that invar is aligned.
653     public static void testOffHeapLong4b(long dest, long[] src, int start, int stop) {
654         for (int i = start; i < stop; i++) {
655             UNSAFE.putLongUnaligned(null, dest + 8L * i + baseOffset, handleByteOrder(src[i]));
656         }
657     }
658 
659     @Run(test = {"testOffHeapLong4a", "testOffHeapLong4b"})
660     public static void testOffHeapLong4_runner() {
661         baseOffset = 8;
662         runAndVerify3(() -> testOffHeapLong4a(baseOffHeap, longArray, 0, size-1), 8);
663         runAndVerify3(() -> testOffHeapLong4b(baseOffHeap, longArray, 0, size-1), 8);
664     }
665 }