1 /*
2 * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package compiler.loopopts.superword;
25
26 import compiler.lib.ir_framework.*;
27 import jdk.test.lib.Utils;
28 import jdk.test.whitebox.WhiteBox;
29 import jdk.internal.misc.Unsafe;
30 import java.lang.reflect.Array;
31 import java.util.Map;
32 import java.util.HashMap;
33 import java.util.Random;
34 import java.nio.ByteOrder;
35
36 /*
37 * @test id=NoAlignVector
38 * @bug 8310190
39 * @key randomness
40 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
41 * @modules java.base/jdk.internal.misc
42 * @library /test/lib /
43 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector
44 */
45
46 /*
47 * @test id=AlignVector
48 * @bug 8310190
49 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
50 * @modules java.base/jdk.internal.misc
51 * @library /test/lib /
52 * @run driver compiler.loopopts.superword.TestAlignVector AlignVector
53 */
54
55 /*
56 * @test id=VerifyAlignVector
57 * @bug 8310190
58 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
59 * @modules java.base/jdk.internal.misc
60 * @library /test/lib /
61 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector
62 */
63
64 /*
65 * @test id=NoAlignVector-COH
66 * @bug 8310190
67 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
68 * @modules java.base/jdk.internal.misc
69 * @library /test/lib /
70 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector-COH
71 */
72
73 /*
74 * @test id=VerifyAlignVector-COH
75 * @bug 8310190
76 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
77 * @modules java.base/jdk.internal.misc
78 * @library /test/lib /
79 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector-COH
80 */
81
82 public class TestAlignVector {
83 static int RANGE = 1024*8;
84 static int RANGE_FINAL = 1024*8;
85 private static final Unsafe UNSAFE = Unsafe.getUnsafe();
86 private static final Random RANDOM = Utils.getRandomInstance();
87
88 // Inputs
89 byte[] aB;
90 byte[] bB;
91 byte mB = (byte)31;
92 short[] aS;
93 short[] bS;
94 short mS = (short)0xF0F0;
95 int[] aI;
96 int[] bI;
97 int mI = 0xF0F0F0F0;
98 long[] aL;
99 long[] bL;
100 long mL = 0xF0F0F0F0F0F0F0F0L;
101
102 // List of tests
103 Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
104
105 // List of gold, the results from the first run before compilation
106 Map<String,Object[]> golds = new HashMap<String,Object[]>();
107
108 interface TestFunction {
109 Object[] run();
110 }
111
112 public static void main(String[] args) {
113 TestFramework framework = new TestFramework(TestAlignVector.class);
114 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
115 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250");
116
117 switch (args[0]) {
118 case "NoAlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); }
119 case "AlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); }
120 case "VerifyAlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
121 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
122 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
123 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
124 }
125 framework.start();
126 }
127
128 public TestAlignVector() {
129 // Generate input once
130 aB = generateB();
131 bB = generateB();
132 aS = generateS();
133 bS = generateS();
134 aI = generateI();
135 bI = generateI();
136 aL = generateL();
137 bL = generateL();
138
139 // Add all tests to list
140 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
141 tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); });
142 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
143 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
144 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
145 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
146 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
147 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
149 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
150 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
151
152 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
153 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
154 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
155 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
156
157 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
158 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
159 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
160 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
161
162 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
163 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
164 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
165 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
166
167 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
168 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
169 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
170 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
171
172 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
173 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
174 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
175 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
176
177 tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); });
178
179 tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); });
180 tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); });
181 tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); });
182 tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
183
184 tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); });
185 tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); });
186 tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); });
187 tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
188
189 tests.put("test14aB", () -> { return test14aB(aB.clone()); });
190 tests.put("test14bB", () -> { return test14bB(aB.clone()); });
191 tests.put("test14cB", () -> { return test14cB(aB.clone()); });
192 tests.put("test14dB", () -> { return test14dB(aB.clone()); });
193 tests.put("test14eB", () -> { return test14eB(aB.clone()); });
194 tests.put("test14fB", () -> { return test14fB(aB.clone()); });
195
196 tests.put("test15aB", () -> { return test15aB(aB.clone()); });
197 tests.put("test15bB", () -> { return test15bB(aB.clone()); });
198 tests.put("test15cB", () -> { return test15cB(aB.clone()); });
199
200 tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); });
201 tests.put("test16b", () -> { return test16b(aB.clone()); });
202
203 tests.put("test17a", () -> { return test17a(aL.clone()); });
204 tests.put("test17b", () -> { return test17b(aL.clone()); });
205 tests.put("test17c", () -> { return test17c(aL.clone()); });
206 tests.put("test17d", () -> { return test17d(aL.clone()); });
207
208 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
209 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
210
211 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
212 tests.put("test20", () -> { return test20(aB.clone()); });
213
214 // Compute gold value for all test methods before compilation
215 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
216 String name = entry.getKey();
217 TestFunction test = entry.getValue();
218 Object[] gold = test.run();
219 golds.put(name, gold);
220 }
221 }
222
223 @Warmup(100)
224 @Run(test = {"test0",
225 "test1",
226 "test2",
227 "test3",
228 "test4",
229 "test5",
230 "test6",
231 "test7",
232 "test8",
233 "test9",
234 "test10a",
235 "test10b",
236 "test10c",
237 "test10d",
238 "test11aB",
239 "test11aS",
240 "test11aI",
241 "test11aL",
242 "test11bB",
243 "test11bS",
244 "test11bI",
245 "test11bL",
246 "test11cB",
247 "test11cS",
248 "test11cI",
249 "test11cL",
250 "test11dB",
251 "test11dS",
252 "test11dI",
253 "test11dL",
254 "test12",
255 "test13aIL",
256 "test13aIB",
257 "test13aIS",
258 "test13aBSIL",
259 "test13bIL",
260 "test13bIB",
261 "test13bIS",
262 "test13bBSIL",
263 "test14aB",
264 "test14bB",
265 "test14cB",
266 "test14dB",
267 "test14eB",
268 "test14fB",
269 "test15aB",
270 "test15bB",
271 "test15cB",
272 "test16a",
273 "test16b",
274 "test17a",
275 "test17b",
276 "test17c",
277 "test17d",
278 "test18a",
279 "test18b",
280 "test19",
281 "test20"})
282 public void runTests() {
283 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
284 String name = entry.getKey();
285 TestFunction test = entry.getValue();
286 // Recall gold value from before compilation
287 Object[] gold = golds.get(name);
288 // Compute new result
289 Object[] result = test.run();
290 // Compare gold and new result
291 verify(name, gold, result);
292 }
293 }
294
295 static byte[] generateB() {
296 byte[] a = new byte[RANGE];
297 for (int i = 0; i < a.length; i++) {
298 a[i] = (byte)RANDOM.nextInt();
299 }
300 return a;
301 }
302
303 static short[] generateS() {
304 short[] a = new short[RANGE];
305 for (int i = 0; i < a.length; i++) {
306 a[i] = (short)RANDOM.nextInt();
307 }
308 return a;
309 }
310
311 static int[] generateI() {
312 int[] a = new int[RANGE];
313 for (int i = 0; i < a.length; i++) {
314 a[i] = RANDOM.nextInt();
315 }
316 return a;
317 }
318
319 static long[] generateL() {
320 long[] a = new long[RANGE];
321 for (int i = 0; i < a.length; i++) {
322 a[i] = RANDOM.nextLong();
323 }
324 return a;
325 }
326
327 static void verify(String name, Object[] gold, Object[] result) {
328 if (gold.length != result.length) {
329 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
330 gold.length + ", result.length = " + result.length);
331 }
332 for (int i = 0; i < gold.length; i++) {
333 Object g = gold[i];
334 Object r = result[i];
335 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
336 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
337 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
338 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
339 }
340 if (g == r) {
341 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
342 " gold[" + i + "] == result[" + i + "]");
343 }
344 if (Array.getLength(g) != Array.getLength(r)) {
345 throw new RuntimeException("verify " + name + ": arrays must have same length:" +
346 " gold[" + i + "].length = " + Array.getLength(g) +
347 " result[" + i + "].length = " + Array.getLength(r));
348 }
349 Class c = g.getClass().getComponentType();
350 if (c == byte.class) {
351 verifyB(name, i, (byte[])g, (byte[])r);
352 } else if (c == short.class) {
353 verifyS(name, i, (short[])g, (short[])r);
354 } else if (c == int.class) {
355 verifyI(name, i, (int[])g, (int[])r);
356 } else if (c == long.class) {
357 verifyL(name, i, (long[])g, (long[])r);
358 } else {
359 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
360 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
361 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
362 }
363 }
364 }
365
366 static void verifyB(String name, int i, byte[] g, byte[] r) {
367 for (int j = 0; j < g.length; j++) {
368 if (g[j] != r[j]) {
369 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
370 " gold[" + i + "][" + j + "] = " + g[j] +
371 " result[" + i + "][" + j + "] = " + r[j]);
372 }
373 }
374 }
375
376 static void verifyS(String name, int i, short[] g, short[] r) {
377 for (int j = 0; j < g.length; j++) {
378 if (g[j] != r[j]) {
379 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
380 " gold[" + i + "][" + j + "] = " + g[j] +
381 " result[" + i + "][" + j + "] = " + r[j]);
382 }
383 }
384 }
385
386 static void verifyI(String name, int i, int[] g, int[] r) {
387 for (int j = 0; j < g.length; j++) {
388 if (g[j] != r[j]) {
389 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
390 " gold[" + i + "][" + j + "] = " + g[j] +
391 " result[" + i + "][" + j + "] = " + r[j]);
392 }
393 }
394 }
395
396 static void verifyL(String name, int i, long[] g, long[] r) {
397 for (int j = 0; j < g.length; j++) {
398 if (g[j] != r[j]) {
399 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
400 " gold[" + i + "][" + j + "] = " + g[j] +
401 " result[" + i + "][" + j + "] = " + r[j]);
402 }
403 }
404 }
405
406 @Test
407 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
408 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
409 IRNode.STORE_VECTOR, "> 0"},
410 applyIf = {"MaxVectorSize", ">=8"},
411 applyIfPlatform = {"64-bit", "true"},
412 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
413 static Object[] test0(byte[] a, byte[] b, byte mask) {
414 for (int i = 0; i < RANGE; i+=8) {
415 // Safe to vectorize with AlignVector
416 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
417 b[i+1] = (byte)(a[i+1] & mask);
418 b[i+2] = (byte)(a[i+2] & mask);
419 b[i+3] = (byte)(a[i+3] & mask);
420 }
421 return new Object[]{ a, b };
422 }
423
424 @Test
425 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
426 IRNode.AND_VB, "> 0",
427 IRNode.STORE_VECTOR, "> 0"},
428 applyIfPlatform = {"64-bit", "true"},
429 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
430 static Object[] test1(byte[] a, byte[] b, byte mask) {
431 for (int i = 0; i < RANGE; i+=8) {
432 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
433 b[i+1] = (byte)(a[i+1] & mask);
434 b[i+2] = (byte)(a[i+2] & mask);
435 b[i+3] = (byte)(a[i+3] & mask);
436 b[i+4] = (byte)(a[i+4] & mask);
437 b[i+5] = (byte)(a[i+5] & mask);
438 b[i+6] = (byte)(a[i+6] & mask);
439 b[i+7] = (byte)(a[i+7] & mask);
440 }
441 return new Object[]{ a, b };
442 }
443
444 @Test
445 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
446 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
447 IRNode.STORE_VECTOR, "> 0"},
448 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
449 applyIfPlatform = {"64-bit", "true"},
450 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
451 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
452 IRNode.AND_VB, "= 0",
453 IRNode.STORE_VECTOR, "= 0"},
454 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
455 applyIfPlatform = {"64-bit", "true"},
456 applyIf = {"AlignVector", "true"})
457 static Object[] test2(byte[] a, byte[] b, byte mask) {
458 for (int i = 0; i < RANGE; i+=8) {
459 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
460 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
461 b[i+4] = (byte)(a[i+4] & mask);
462 b[i+5] = (byte)(a[i+5] & mask);
463 b[i+6] = (byte)(a[i+6] & mask);
464 }
465 return new Object[]{ a, b };
466 }
467
468 @Test
469 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
470 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
471 IRNode.STORE_VECTOR, "> 0"},
472 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
473 applyIfPlatform = {"64-bit", "true"},
474 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
475 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
476 IRNode.AND_VB, "= 0",
477 IRNode.STORE_VECTOR, "= 0"},
478 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
479 applyIfPlatform = {"64-bit", "true"},
480 applyIf = {"AlignVector", "true"})
481 static Object[] test3(byte[] a, byte[] b, byte mask) {
482 for (int i = 0; i < RANGE; i+=8) {
483 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
484
485 // Problematic for AlignVector
486 b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0
487
488 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes
489 b[i+4] = (byte)(a[i+4] & mask);
490 b[i+5] = (byte)(a[i+5] & mask);
491 b[i+6] = (byte)(a[i+6] & mask);
492 }
493 return new Object[]{ a, b };
494 }
495
496 @Test
497 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
498 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0",
499 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
500 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0",
501 IRNode.STORE_VECTOR, "> 0"},
502 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
503 applyIfPlatform = {"64-bit", "true"},
504 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
505 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
506 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
507 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
508 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
509 IRNode.STORE_VECTOR, "> 0"},
510 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
511 applyIfPlatform = {"64-bit", "true"},
512 applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"})
513 static Object[] test4(byte[] a, byte[] b, byte mask) {
514 for (int i = 0; i < RANGE/16; i++) {
515 // Problematic for AlignVector
516 b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned
517 b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask);
518 b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask);
519 b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask);
520
521 b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned
522 b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask);
523 b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask);
524 b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask);
525 b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask);
526 b[i*16 + 10] = (byte)(a[i*16 + 10] & mask);
527 b[i*16 + 11] = (byte)(a[i*16 + 11] & mask);
528 b[i*16 + 12] = (byte)(a[i*16 + 12] & mask);
529 }
530 return new Object[]{ a, b };
531 }
532
533 @Test
534 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
535 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
536 IRNode.STORE_VECTOR, "> 0"},
537 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
538 applyIfPlatform = {"64-bit", "true"},
539 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
540 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
541 IRNode.AND_VB, "= 0",
542 IRNode.STORE_VECTOR, "= 0"},
543 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
544 applyIfPlatform = {"64-bit", "true"},
545 applyIf = {"AlignVector", "true"})
546 static Object[] test5(byte[] a, byte[] b, byte mask, int inv) {
547 for (int i = 0; i < RANGE; i+=8) {
548 // Cannot align with AlignVector because of invariant
549 b[i+inv+0] = (byte)(a[i+inv+0] & mask);
550
551 b[i+inv+3] = (byte)(a[i+inv+3] & mask);
552 b[i+inv+4] = (byte)(a[i+inv+4] & mask);
553 b[i+inv+5] = (byte)(a[i+inv+5] & mask);
554 b[i+inv+6] = (byte)(a[i+inv+6] & mask);
555 }
556 return new Object[]{ a, b };
557 }
558
559 @Test
560 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
561 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
562 IRNode.STORE_VECTOR, "> 0"},
563 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
564 applyIfPlatform = {"64-bit", "true"},
565 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
566 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
567 IRNode.AND_VB, "= 0",
568 IRNode.STORE_VECTOR, "= 0"},
569 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
570 applyIfPlatform = {"64-bit", "true"},
571 applyIf = {"AlignVector", "true"})
572 static Object[] test6(byte[] a, byte[] b, byte mask) {
573 for (int i = 0; i < RANGE/8; i+=2) {
574 // Cannot align with AlignVector because offset is odd
575 b[i*4+0] = (byte)(a[i*4+0] & mask);
576
577 b[i*4+3] = (byte)(a[i*4+3] & mask);
578 b[i*4+4] = (byte)(a[i*4+4] & mask);
579 b[i*4+5] = (byte)(a[i*4+5] & mask);
580 b[i*4+6] = (byte)(a[i*4+6] & mask);
581 }
582 return new Object[]{ a, b };
583 }
584
585 @Test
586 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
587 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
588 IRNode.STORE_VECTOR, "> 0"},
589 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"},
590 applyIfPlatform = {"64-bit", "true"},
591 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
592 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
593 IRNode.AND_VS, "= 0",
594 IRNode.STORE_VECTOR, "= 0"},
595 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
596 applyIfPlatform = {"64-bit", "true"},
597 applyIf = {"AlignVector", "true"})
598 static Object[] test7(short[] a, short[] b, short mask) {
599 for (int i = 0; i < RANGE/8; i+=2) {
600 // Cannot align with AlignVector because offset is odd
601 b[i*4+0] = (short)(a[i*4+0] & mask);
602
603 b[i*4+3] = (short)(a[i*4+3] & mask);
604 b[i*4+4] = (short)(a[i*4+4] & mask);
605 b[i*4+5] = (short)(a[i*4+5] & mask);
606 b[i*4+6] = (short)(a[i*4+6] & mask);
607 }
608 return new Object[]{ a, b };
609 }
610
611 @Test
612 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
613 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
614 IRNode.STORE_VECTOR, "> 0"},
615 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
616 applyIfPlatform = {"64-bit", "true"},
617 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
618 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
619 IRNode.AND_VB, "= 0",
620 IRNode.STORE_VECTOR, "= 0"},
621 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
622 applyIfPlatform = {"64-bit", "true"},
623 applyIf = {"AlignVector", "true"})
624 static Object[] test8(byte[] a, byte[] b, byte mask, int init) {
625 for (int i = init; i < RANGE; i+=8) {
626 // Cannot align with AlignVector because of invariant (variable init becomes invar)
627 b[i+0] = (byte)(a[i+0] & mask);
628
629 b[i+3] = (byte)(a[i+3] & mask);
630 b[i+4] = (byte)(a[i+4] & mask);
631 b[i+5] = (byte)(a[i+5] & mask);
632 b[i+6] = (byte)(a[i+6] & mask);
633 }
634 return new Object[]{ a, b };
635 }
636
637 @Test
638 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
639 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
640 IRNode.STORE_VECTOR, "> 0"},
641 applyIf = {"MaxVectorSize", ">=8"},
642 applyIfPlatform = {"64-bit", "true"},
643 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
644 static Object[] test9(byte[] a, byte[] b, byte mask) {
645 // known non-zero init value does not affect offset, but has implicit effect on iv
646 for (int i = 13; i < RANGE-8; i+=8) {
647 b[i+0] = (byte)(a[i+0] & mask);
648
649 b[i+3] = (byte)(a[i+3] & mask);
650 b[i+4] = (byte)(a[i+4] & mask);
651 b[i+5] = (byte)(a[i+5] & mask);
652 b[i+6] = (byte)(a[i+6] & mask);
653 }
654 return new Object[]{ a, b };
655 }
656
657 @Test
658 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
659 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
660 IRNode.STORE_VECTOR, "> 0"},
661 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
662 applyIfPlatform = {"64-bit", "true"},
663 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
664 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
665 IRNode.AND_VB, "= 0",
666 IRNode.STORE_VECTOR, "= 0"},
667 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
668 applyIfPlatform = {"64-bit", "true"},
669 applyIf = {"AlignVector", "true"})
670 static Object[] test10a(byte[] a, byte[] b, byte mask) {
671 // This is not alignable with pre-loop, because of odd init.
672 for (int i = 3; i < RANGE-8; i+=8) {
673 b[i+0] = (byte)(a[i+0] & mask);
674 b[i+1] = (byte)(a[i+1] & mask);
675 b[i+2] = (byte)(a[i+2] & mask);
676 b[i+3] = (byte)(a[i+3] & mask);
677 }
678 return new Object[]{ a, b };
679 }
680
681 @Test
682 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
683 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
684 IRNode.STORE_VECTOR, "> 0"},
685 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
686 applyIfPlatform = {"64-bit", "true"},
687 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
688 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
689 IRNode.AND_VB, "= 0",
690 IRNode.STORE_VECTOR, "= 0"},
691 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
692 applyIfPlatform = {"64-bit", "true"},
693 applyIf = {"AlignVector", "true"})
694 static Object[] test10b(byte[] a, byte[] b, byte mask) {
695 // This is not alignable with pre-loop, because of odd init.
696 // Seems not correctly handled.
697 for (int i = 13; i < RANGE-8; i+=8) {
698 b[i+0] = (byte)(a[i+0] & mask);
699 b[i+1] = (byte)(a[i+1] & mask);
700 b[i+2] = (byte)(a[i+2] & mask);
701 b[i+3] = (byte)(a[i+3] & mask);
702 }
703 return new Object[]{ a, b };
704 }
705
706 @Test
707 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
708 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
709 IRNode.STORE_VECTOR, "> 0"},
710 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
711 applyIfPlatform = {"64-bit", "true"},
712 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
713 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
714 IRNode.AND_VS, "= 0",
715 IRNode.STORE_VECTOR, "= 0"},
716 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
717 applyIfPlatform = {"64-bit", "true"},
718 applyIf = {"AlignVector", "true"})
719 static Object[] test10c(short[] a, short[] b, short mask) {
720 // This is not alignable with pre-loop, because of odd init.
721 // Seems not correctly handled with MaxVectorSize >= 32.
722 for (int i = 13; i < RANGE-8; i+=8) {
723 b[i+0] = (short)(a[i+0] & mask);
724 b[i+1] = (short)(a[i+1] & mask);
725 b[i+2] = (short)(a[i+2] & mask);
726 b[i+3] = (short)(a[i+3] & mask);
727 }
728 return new Object[]{ a, b };
729 }
730
731 @Test
732 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
733 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
734 IRNode.STORE_VECTOR, "> 0"},
735 applyIf = {"MaxVectorSize", ">=16"},
736 applyIfPlatform = {"64-bit", "true"},
737 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
738 static Object[] test10d(short[] a, short[] b, short mask) {
739 for (int i = 13; i < RANGE-16; i+=8) {
740 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
741 b[i+0+3] = (short)(a[i+0+3] & mask);
742 b[i+1+3] = (short)(a[i+1+3] & mask);
743 b[i+2+3] = (short)(a[i+2+3] & mask);
744 b[i+3+3] = (short)(a[i+3+3] & mask);
745 }
746 return new Object[]{ a, b };
747 }
748
749 @Test
750 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
751 IRNode.AND_VB, "> 0",
752 IRNode.STORE_VECTOR, "> 0"},
753 applyIfPlatform = {"64-bit", "true"},
754 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
755 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
756 for (int i = 0; i < RANGE; i++) {
757 // always alignable
758 b[i+0] = (byte)(a[i+0] & mask);
759 }
760 return new Object[]{ a, b };
761 }
762
763 @Test
764 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
765 IRNode.AND_VS, "> 0",
766 IRNode.STORE_VECTOR, "> 0"},
767 applyIfPlatform = {"64-bit", "true"},
768 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
769 static Object[] test11aS(short[] a, short[] b, short mask) {
770 for (int i = 0; i < RANGE; i++) {
771 // always alignable
772 b[i+0] = (short)(a[i+0] & mask);
773 }
774 return new Object[]{ a, b };
775 }
776
777 @Test
778 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
779 IRNode.AND_VI, "> 0",
780 IRNode.STORE_VECTOR, "> 0"},
781 applyIfPlatform = {"64-bit", "true"},
782 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
783 static Object[] test11aI(int[] a, int[] b, int mask) {
784 for (int i = 0; i < RANGE; i++) {
785 // always alignable
786 b[i+0] = (int)(a[i+0] & mask);
787 }
788 return new Object[]{ a, b };
789 }
790
791 @Test
792 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
793 IRNode.AND_VL, "> 0",
794 IRNode.STORE_VECTOR, "> 0"},
795 applyIfPlatform = {"64-bit", "true"},
796 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
797 static Object[] test11aL(long[] a, long[] b, long mask) {
798 for (int i = 0; i < RANGE; i++) {
799 // always alignable
800 b[i+0] = (long)(a[i+0] & mask);
801 }
802 return new Object[]{ a, b };
803 }
804
805 @Test
806 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
807 IRNode.AND_VB, "> 0",
808 IRNode.STORE_VECTOR, "> 0"},
809 applyIfPlatform = {"64-bit", "true"},
810 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
811 static Object[] test11bB(byte[] a, byte[] b, byte mask) {
812 for (int i = 1; i < RANGE; i++) {
813 // always alignable
814 b[i+0] = (byte)(a[i+0] & mask);
815 }
816 return new Object[]{ a, b };
817 }
818
819 @Test
820 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
821 IRNode.AND_VS, "> 0",
822 IRNode.STORE_VECTOR, "> 0"},
823 applyIfPlatform = {"64-bit", "true"},
824 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
825 static Object[] test11bS(short[] a, short[] b, short mask) {
826 for (int i = 1; i < RANGE; i++) {
827 // always alignable
828 b[i+0] = (short)(a[i+0] & mask);
829 }
830 return new Object[]{ a, b };
831 }
832
833 @Test
834 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
835 IRNode.AND_VI, "> 0",
836 IRNode.STORE_VECTOR, "> 0"},
837 applyIfPlatform = {"64-bit", "true"},
838 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
839 static Object[] test11bI(int[] a, int[] b, int mask) {
840 for (int i = 1; i < RANGE; i++) {
841 // always alignable
842 b[i+0] = (int)(a[i+0] & mask);
843 }
844 return new Object[]{ a, b };
845 }
846
847 @Test
848 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
849 IRNode.AND_VL, "> 0",
850 IRNode.STORE_VECTOR, "> 0"},
851 applyIfPlatform = {"64-bit", "true"},
852 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
853 static Object[] test11bL(long[] a, long[] b, long mask) {
854 for (int i = 1; i < RANGE; i++) {
855 // always alignable
856 b[i+0] = (long)(a[i+0] & mask);
857 }
858 return new Object[]{ a, b };
859 }
860
861 @Test
862 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
863 IRNode.AND_VB, "> 0",
864 IRNode.STORE_VECTOR, "> 0"},
865 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
866 applyIfPlatform = {"64-bit", "true"},
867 applyIf = {"AlignVector", "false"})
868 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
869 IRNode.AND_VB, "= 0",
870 IRNode.STORE_VECTOR, "= 0"},
871 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
872 applyIfPlatform = {"64-bit", "true"},
873 applyIf = {"AlignVector", "true"})
874 static Object[] test11cB(byte[] a, byte[] b, byte mask) {
875 for (int i = 1; i < RANGE-1; i++) {
876 // 1 byte offset -> not alignable with AlignVector
877 b[i+0] = (byte)(a[i+1] & mask);
878 }
879 return new Object[]{ a, b };
880 }
881
882 @Test
883 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
884 IRNode.AND_VS, "> 0",
885 IRNode.STORE_VECTOR, "> 0"},
886 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
887 applyIfPlatform = {"64-bit", "true"},
888 applyIf = {"AlignVector", "false"})
889 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
890 IRNode.AND_VS, "= 0",
891 IRNode.STORE_VECTOR, "= 0"},
892 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
893 applyIfPlatform = {"64-bit", "true"},
894 applyIf = {"AlignVector", "true"})
895 static Object[] test11cS(short[] a, short[] b, short mask) {
896 for (int i = 1; i < RANGE-1; i++) {
897 // 2 byte offset -> not alignable with AlignVector
898 b[i+0] = (short)(a[i+1] & mask);
899 }
900 return new Object[]{ a, b };
901 }
902
903 @Test
904 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
905 IRNode.AND_VI, "> 0",
906 IRNode.STORE_VECTOR, "> 0"},
907 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
908 applyIfPlatform = {"64-bit", "true"},
909 applyIf = {"AlignVector", "false"})
910 @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
911 IRNode.AND_VI, "= 0",
912 IRNode.STORE_VECTOR, "= 0"},
913 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
914 applyIfPlatform = {"64-bit", "true"},
915 applyIf = {"AlignVector", "true"})
916 static Object[] test11cI(int[] a, int[] b, int mask) {
917 for (int i = 1; i < RANGE-1; i++) {
918 // 4 byte offset -> not alignable with AlignVector
919 b[i+0] = (int)(a[i+1] & mask);
920 }
921 return new Object[]{ a, b };
922 }
923
924 @Test
925 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
926 IRNode.AND_VL, "> 0",
927 IRNode.STORE_VECTOR, "> 0"},
928 applyIfPlatform = {"64-bit", "true"},
929 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
930 static Object[] test11cL(long[] a, long[] b, long mask) {
931 for (int i = 1; i < RANGE-1; i++) {
932 // always alignable (8 byte offset)
933 b[i+0] = (long)(a[i+1] & mask);
934 }
935 return new Object[]{ a, b };
936 }
937
938 @Test
939 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
940 IRNode.AND_VB, "> 0",
941 IRNode.STORE_VECTOR, "> 0"},
942 applyIfPlatform = {"64-bit", "true"},
943 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
944 static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) {
945 for (int i = 0; i < RANGE; i++) {
946 b[i+0+invar] = (byte)(a[i+0+invar] & mask);
947 }
948 return new Object[]{ a, b };
949 }
950
951 @Test
952 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
953 IRNode.AND_VS, "> 0",
954 IRNode.STORE_VECTOR, "> 0"},
955 applyIfPlatform = {"64-bit", "true"},
956 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
957 static Object[] test11dS(short[] a, short[] b, short mask, int invar) {
958 for (int i = 0; i < RANGE; i++) {
959 b[i+0+invar] = (short)(a[i+0+invar] & mask);
960 }
961 return new Object[]{ a, b };
962 }
963
964 @Test
965 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
966 IRNode.AND_VI, "> 0",
967 IRNode.STORE_VECTOR, "> 0"},
968 applyIfPlatform = {"64-bit", "true"},
969 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
970 static Object[] test11dI(int[] a, int[] b, int mask, int invar) {
971 for (int i = 0; i < RANGE; i++) {
972 b[i+0+invar] = (int)(a[i+0+invar] & mask);
973 }
974 return new Object[]{ a, b };
975 }
976
977 @Test
978 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
979 IRNode.AND_VL, "> 0",
980 IRNode.STORE_VECTOR, "> 0"},
981 applyIfPlatform = {"64-bit", "true"},
982 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
983 static Object[] test11dL(long[] a, long[] b, long mask, int invar) {
984 for (int i = 0; i < RANGE; i++) {
985 b[i+0+invar] = (long)(a[i+0+invar] & mask);
986 }
987 return new Object[]{ a, b };
988 }
989
990 @Test
991 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
992 IRNode.AND_VB, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
993 IRNode.STORE_VECTOR, "> 0"},
994 applyIfPlatform = {"64-bit", "true"},
995 applyIf = {"AlignVector", "false"},
996 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
997 static Object[] test12(byte[] a, byte[] b, byte mask) {
998 for (int i = 0; i < RANGE/16; i++) {
999 // Non-power-of-2 stride. Vectorization of 4 bytes, then 2-bytes gap.
1000 b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask);
1001 b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask);
1002 b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask);
1003 b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask);
1004 }
1005 return new Object[]{ a, b };
1006 }
1007
1008 @Test
1009 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1010 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1011 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1012 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1013 IRNode.STORE_VECTOR, "> 0"},
1014 applyIfPlatform = {"64-bit", "true"},
1015 applyIfCPUFeature = {"avx2", "true"})
1016 // require avx to ensure vectors are larger than what unrolling produces
1017 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1018 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1019 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1020 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1021 IRNode.STORE_VECTOR, "> 0"},
1022 applyIfPlatform = {"riscv64", "true"},
1023 applyIfCPUFeature = {"rvv", "true"},
1024 applyIf = {"MaxVectorSize", ">=32"})
1025 static Object[] test13aIL(int[] a, long[] b) {
1026 for (int i = 0; i < RANGE; i++) {
1027 a[i]++;
1028 b[i]++;
1029 }
1030 return new Object[]{ a, b };
1031 }
1032
1033 @Test
1034 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1035 IRNode.LOAD_VECTOR_I, "> 0",
1036 IRNode.ADD_VB, "> 0",
1037 IRNode.ADD_VI, "> 0",
1038 IRNode.STORE_VECTOR, "> 0"},
1039 applyIfPlatform = {"64-bit", "true"},
1040 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1041 static Object[] test13aIB(int[] a, byte[] b) {
1042 for (int i = 0; i < RANGE; i++) {
1043 a[i]++;
1044 b[i]++;
1045 }
1046 return new Object[]{ a, b };
1047 }
1048
1049 @Test
1050 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1051 IRNode.LOAD_VECTOR_S, "> 0",
1052 IRNode.ADD_VI, "> 0",
1053 IRNode.ADD_VS, "> 0",
1054 IRNode.STORE_VECTOR, "> 0"},
1055 applyIfPlatform = {"64-bit", "true"},
1056 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1057 static Object[] test13aIS(int[] a, short[] b) {
1058 for (int i = 0; i < RANGE; i++) {
1059 a[i]++;
1060 b[i]++;
1061 }
1062 return new Object[]{ a, b };
1063 }
1064
1065 @Test
1066 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1067 IRNode.LOAD_VECTOR_S, "> 0",
1068 IRNode.LOAD_VECTOR_I, "> 0",
1069 IRNode.LOAD_VECTOR_L, "> 0",
1070 IRNode.ADD_VB, "> 0",
1071 IRNode.ADD_VS, "> 0",
1072 IRNode.ADD_VI, "> 0",
1073 IRNode.ADD_VL, "> 0",
1074 IRNode.STORE_VECTOR, "> 0"},
1075 applyIfPlatform = {"64-bit", "true"},
1076 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1077 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1078 for (int i = 0; i < RANGE; i++) {
1079 a[i]++;
1080 b[i]++;
1081 c[i]++;
1082 d[i]++;
1083 }
1084 return new Object[]{ a, b, c, d };
1085 }
1086
1087 @Test
1088 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1089 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1090 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1091 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1092 IRNode.STORE_VECTOR, "> 0"},
1093 applyIfPlatform = {"64-bit", "true"},
1094 applyIfCPUFeature = {"avx2", "true"})
1095 // require avx to ensure vectors are larger than what unrolling produces
1096 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1097 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1098 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1099 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1100 IRNode.STORE_VECTOR, "> 0"},
1101 applyIfPlatform = {"riscv64", "true"},
1102 applyIfCPUFeature = {"rvv", "true"},
1103 applyIf = {"MaxVectorSize", ">=32"})
1104 static Object[] test13bIL(int[] a, long[] b) {
1105 for (int i = 1; i < RANGE; i++) {
1106 a[i]++;
1107 b[i]++;
1108 }
1109 return new Object[]{ a, b };
1110 }
1111
1112 @Test
1113 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1114 IRNode.LOAD_VECTOR_I, "> 0",
1115 IRNode.ADD_VB, "> 0",
1116 IRNode.ADD_VI, "> 0",
1117 IRNode.STORE_VECTOR, "> 0"},
1118 applyIfPlatform = {"64-bit", "true"},
1119 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1120 static Object[] test13bIB(int[] a, byte[] b) {
1121 for (int i = 1; i < RANGE; i++) {
1122 a[i]++;
1123 b[i]++;
1124 }
1125 return new Object[]{ a, b };
1126 }
1127
1128 @Test
1129 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1130 IRNode.LOAD_VECTOR_S, "> 0",
1131 IRNode.ADD_VI, "> 0",
1132 IRNode.ADD_VS, "> 0",
1133 IRNode.STORE_VECTOR, "> 0"},
1134 applyIfPlatform = {"64-bit", "true"},
1135 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1136 static Object[] test13bIS(int[] a, short[] b) {
1137 for (int i = 1; i < RANGE; i++) {
1138 a[i]++;
1139 b[i]++;
1140 }
1141 return new Object[]{ a, b };
1142 }
1143
1144 @Test
1145 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1146 IRNode.LOAD_VECTOR_S, "> 0",
1147 IRNode.LOAD_VECTOR_I, "> 0",
1148 IRNode.LOAD_VECTOR_L, "> 0",
1149 IRNode.ADD_VB, "> 0",
1150 IRNode.ADD_VS, "> 0",
1151 IRNode.ADD_VI, "> 0",
1152 IRNode.ADD_VL, "> 0",
1153 IRNode.STORE_VECTOR, "> 0"},
1154 applyIfPlatform = {"64-bit", "true"},
1155 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1156 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1157 for (int i = 1; i < RANGE; i++) {
1158 a[i]++;
1159 b[i]++;
1160 c[i]++;
1161 d[i]++;
1162 }
1163 return new Object[]{ a, b, c, d };
1164 }
1165
1166 @Test
1167 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1168 IRNode.ADD_VB, "= 0",
1169 IRNode.STORE_VECTOR, "= 0"},
1170 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1171 applyIfPlatform = {"64-bit", "true"},
1172 applyIf = {"AlignVector", "false"})
1173 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1174 IRNode.ADD_VB, "= 0",
1175 IRNode.STORE_VECTOR, "= 0"},
1176 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1177 applyIfPlatform = {"64-bit", "true"},
1178 applyIf = {"AlignVector", "true"})
1179 static Object[] test14aB(byte[] a) {
1180 // non-power-of-2 stride
1181 for (int i = 0; i < RANGE-20; i+=9) {
1182 // Since the stride is shorter than the vector length, there will be always
1183 // partial overlap of loads with previous stores, this leads to failure in
1184 // store-to-load-forwarding -> vectorization not profitable.
1185 a[i+0]++;
1186 a[i+1]++;
1187 a[i+2]++;
1188 a[i+3]++;
1189 a[i+4]++;
1190 a[i+5]++;
1191 a[i+6]++;
1192 a[i+7]++;
1193 a[i+8]++;
1194 a[i+9]++;
1195 a[i+10]++;
1196 a[i+11]++;
1197 a[i+12]++;
1198 a[i+13]++;
1199 a[i+14]++;
1200 a[i+15]++;
1201 }
1202 return new Object[]{ a };
1203 }
1204
1205 @Test
1206 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1207 IRNode.ADD_VB, "= 0",
1208 IRNode.STORE_VECTOR, "= 0"},
1209 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1210 applyIfPlatform = {"64-bit", "true"},
1211 applyIf = {"AlignVector", "false"})
1212 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1213 IRNode.ADD_VB, "= 0",
1214 IRNode.STORE_VECTOR, "= 0"},
1215 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1216 applyIfPlatform = {"64-bit", "true"},
1217 applyIf = {"AlignVector", "true"})
1218 static Object[] test14bB(byte[] a) {
1219 // non-power-of-2 stride
1220 for (int i = 0; i < RANGE-20; i+=3) {
1221 // Since the stride is shorter than the vector length, there will be always
1222 // partial overlap of loads with previous stores, this leads to failure in
1223 // store-to-load-forwarding -> vectorization not profitable.
1224 a[i+0]++;
1225 a[i+1]++;
1226 a[i+2]++;
1227 a[i+3]++;
1228 a[i+4]++;
1229 a[i+5]++;
1230 a[i+6]++;
1231 a[i+7]++;
1232 a[i+8]++;
1233 a[i+9]++;
1234 a[i+10]++;
1235 a[i+11]++;
1236 a[i+12]++;
1237 a[i+13]++;
1238 a[i+14]++;
1239 a[i+15]++;
1240 }
1241 return new Object[]{ a };
1242 }
1243
1244 @Test
1245 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1246 IRNode.ADD_VB, "= 0",
1247 IRNode.STORE_VECTOR, "= 0"},
1248 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1249 applyIfPlatform = {"64-bit", "true"},
1250 applyIf = {"AlignVector", "false"})
1251 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1252 IRNode.ADD_VB, "= 0",
1253 IRNode.STORE_VECTOR, "= 0"},
1254 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1255 applyIfPlatform = {"64-bit", "true"},
1256 applyIf = {"AlignVector", "true"})
1257 static Object[] test14cB(byte[] a) {
1258 // non-power-of-2 stride
1259 for (int i = 0; i < RANGE-20; i+=5) {
1260 // Since the stride is shorter than the vector length, there will be always
1261 // partial overlap of loads with previous stores, this leads to failure in
1262 // store-to-load-forwarding -> vectorization not profitable.
1263 a[i+0]++;
1264 a[i+1]++;
1265 a[i+2]++;
1266 a[i+3]++;
1267 a[i+4]++;
1268 a[i+5]++;
1269 a[i+6]++;
1270 a[i+7]++;
1271 a[i+8]++;
1272 a[i+9]++;
1273 a[i+10]++;
1274 a[i+11]++;
1275 a[i+12]++;
1276 a[i+13]++;
1277 a[i+14]++;
1278 a[i+15]++;
1279 }
1280 return new Object[]{ a };
1281 }
1282
1283 @Test
1284 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1285 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1286 IRNode.STORE_VECTOR, "> 0"},
1287 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1288 applyIfPlatform = {"64-bit", "true"},
1289 applyIf = {"AlignVector", "false"})
1290 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1291 IRNode.ADD_VB, "= 0",
1292 IRNode.STORE_VECTOR, "= 0"},
1293 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1294 applyIfPlatform = {"64-bit", "true"},
1295 applyIf = {"AlignVector", "true"})
1296 static Object[] test14dB(byte[] a) {
1297 // non-power-of-2 stride
1298 for (int i = 0; i < RANGE-20; i+=9) {
1299 a[i+0]++;
1300 a[i+1]++;
1301 a[i+2]++;
1302 a[i+3]++;
1303 a[i+4]++;
1304 a[i+5]++;
1305 a[i+6]++;
1306 a[i+7]++;
1307 }
1308 return new Object[]{ a };
1309 }
1310
1311 @Test
1312 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1313 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1314 IRNode.STORE_VECTOR, "> 0"},
1315 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1316 applyIfPlatform = {"64-bit", "true"},
1317 applyIf = {"AlignVector", "false"})
1318 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1319 IRNode.ADD_VB, "= 0",
1320 IRNode.STORE_VECTOR, "= 0"},
1321 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1322 applyIfPlatform = {"64-bit", "true"},
1323 applyIf = {"AlignVector", "true"})
1324 static Object[] test14eB(byte[] a) {
1325 // non-power-of-2 stride
1326 for (int i = 0; i < RANGE-32; i+=11) {
1327 a[i+0]++;
1328 a[i+1]++;
1329 a[i+2]++;
1330 a[i+3]++;
1331 a[i+4]++;
1332 a[i+5]++;
1333 a[i+6]++;
1334 a[i+7]++;
1335 }
1336 return new Object[]{ a };
1337 }
1338
1339 @Test
1340 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1341 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1342 IRNode.STORE_VECTOR, "> 0"},
1343 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1344 applyIfPlatform = {"64-bit", "true"},
1345 applyIf = {"AlignVector", "false"})
1346 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1347 IRNode.ADD_VB, "= 0",
1348 IRNode.STORE_VECTOR, "= 0"},
1349 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1350 applyIfPlatform = {"64-bit", "true"},
1351 applyIf = {"AlignVector", "true"})
1352 static Object[] test14fB(byte[] a) {
1353 // non-power-of-2 stride
1354 for (int i = 0; i < RANGE-40; i+=12) {
1355 a[i+0]++;
1356 a[i+1]++;
1357 a[i+2]++;
1358 a[i+3]++;
1359 a[i+4]++;
1360 a[i+5]++;
1361 a[i+6]++;
1362 a[i+7]++;
1363 }
1364 return new Object[]{ a };
1365 }
1366
1367 @Test
1368 // IR rules difficult because of modulo wrapping with offset after peeling.
1369 static Object[] test15aB(byte[] a) {
1370 // non-power-of-2 scale
1371 for (int i = 0; i < RANGE/64-20; i++) {
1372 a[53*i+0]++;
1373 a[53*i+1]++;
1374 a[53*i+2]++;
1375 a[53*i+3]++;
1376 a[53*i+4]++;
1377 a[53*i+5]++;
1378 a[53*i+6]++;
1379 a[53*i+7]++;
1380 a[53*i+8]++;
1381 a[53*i+9]++;
1382 a[53*i+10]++;
1383 a[53*i+11]++;
1384 a[53*i+12]++;
1385 a[53*i+13]++;
1386 a[53*i+14]++;
1387 a[53*i+15]++;
1388 }
1389 return new Object[]{ a };
1390 }
1391
1392 @Test
1393 // IR rules difficult because of modulo wrapping with offset after peeling.
1394 static Object[] test15bB(byte[] a) {
1395 // non-power-of-2 scale
1396 for (int i = 0; i < RANGE/64-20; i++) {
1397 a[25*i+0]++;
1398 a[25*i+1]++;
1399 a[25*i+2]++;
1400 a[25*i+3]++;
1401 a[25*i+4]++;
1402 a[25*i+5]++;
1403 a[25*i+6]++;
1404 a[25*i+7]++;
1405 a[25*i+8]++;
1406 a[25*i+9]++;
1407 a[25*i+10]++;
1408 a[25*i+11]++;
1409 a[25*i+12]++;
1410 a[25*i+13]++;
1411 a[25*i+14]++;
1412 a[25*i+15]++;
1413 }
1414 return new Object[]{ a };
1415 }
1416
1417 @Test
1418 // IR rules difficult because of modulo wrapping with offset after peeling.
1419 static Object[] test15cB(byte[] a) {
1420 // non-power-of-2 scale
1421 for (int i = 0; i < RANGE/64-20; i++) {
1422 a[19*i+0]++;
1423 a[19*i+1]++;
1424 a[19*i+2]++;
1425 a[19*i+3]++;
1426 a[19*i+4]++;
1427 a[19*i+5]++;
1428 a[19*i+6]++;
1429 a[19*i+7]++;
1430 a[19*i+8]++;
1431 a[19*i+9]++;
1432 a[19*i+10]++;
1433 a[19*i+11]++;
1434 a[19*i+12]++;
1435 a[19*i+13]++;
1436 a[19*i+14]++;
1437 a[19*i+15]++;
1438 }
1439 return new Object[]{ a };
1440 }
1441
1442 @Test
1443 static Object[] test16a(byte[] a, short[] b) {
1444 // infinite loop issues
1445 for (int i = 0; i < RANGE/2-20; i++) {
1446 a[2*i+0]++;
1447 a[2*i+1]++;
1448 a[2*i+2]++;
1449 a[2*i+3]++;
1450 a[2*i+4]++;
1451 a[2*i+5]++;
1452 a[2*i+6]++;
1453 a[2*i+7]++;
1454 a[2*i+8]++;
1455 a[2*i+9]++;
1456 a[2*i+10]++;
1457 a[2*i+11]++;
1458 a[2*i+12]++;
1459 a[2*i+13]++;
1460 a[2*i+14]++;
1461
1462 b[2*i+0]++;
1463 b[2*i+1]++;
1464 b[2*i+2]++;
1465 b[2*i+3]++;
1466 }
1467 return new Object[]{ a, b };
1468 }
1469
1470 @Test
1471 static Object[] test16b(byte[] a) {
1472 // infinite loop issues
1473 for (int i = 0; i < RANGE/2-20; i++) {
1474 a[2*i+0]++;
1475 a[2*i+1]++;
1476 a[2*i+2]++;
1477 a[2*i+3]++;
1478 a[2*i+4]++;
1479 a[2*i+5]++;
1480 a[2*i+6]++;
1481 a[2*i+7]++;
1482 a[2*i+8]++;
1483 a[2*i+9]++;
1484 a[2*i+10]++;
1485 a[2*i+11]++;
1486 a[2*i+12]++;
1487 a[2*i+13]++;
1488 a[2*i+14]++;
1489 }
1490 return new Object[]{ a };
1491 }
1492
1493 @Test
1494 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
1495 IRNode.ADD_VL, "> 0",
1496 IRNode.STORE_VECTOR, "> 0"},
1497 applyIfPlatform = {"64-bit", "true"},
1498 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1499 static Object[] test17a(long[] a) {
1500 // Unsafe: vectorizes with profiling (not xcomp)
1501 for (int i = 0; i < RANGE; i++) {
1502 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1503 long v = UNSAFE.getLongUnaligned(a, adr);
1504 UNSAFE.putLongUnaligned(a, adr, v + 1);
1505 }
1506 return new Object[]{ a };
1507 }
1508
1509 @Test
1510 // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs.
1511 static Object[] test17b(long[] a) {
1512 // Not alignable
1513 for (int i = 0; i < RANGE-1; i++) {
1514 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1515 long v = UNSAFE.getLongUnaligned(a, adr);
1516 UNSAFE.putLongUnaligned(a, adr, v + 1);
1517 }
1518 return new Object[]{ a };
1519 }
1520
1521 @Test
1522 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1523 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
1524 IRNode.STORE_VECTOR, "> 0"},
1525 applyIf = {"MaxVectorSize", ">=32"},
1526 applyIfPlatform = {"64-bit", "true"},
1527 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1528 static Object[] test17c(long[] a) {
1529 // Unsafe: aligned vectorizes
1530 for (int i = 0; i < RANGE-1; i+=4) {
1531 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1532 long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1533 long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1534 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1535 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1536 }
1537 return new Object[]{ a };
1538 }
1539
1540 @Test
1541 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1542 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
1543 IRNode.STORE_VECTOR, "> 0"},
1544 applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true", "rvv", "true"},
1545 applyIfPlatform = {"64-bit", "true"},
1546 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"})
1547 // Ensure vector width is large enough to fit 64 byte for longs:
1548 // The offsets are: 25, 33, 57, 65
1549 // In modulo 32: 25, 1, 25, 1 -> does not vectorize
1550 // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes
1551 // This problem is because we compute modulo vector width in memory_alignment.
1552 @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0",
1553 IRNode.ADD_VL, "= 0",
1554 IRNode.STORE_VECTOR, "= 0"},
1555 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1556 applyIfPlatform = {"64-bit", "true"},
1557 applyIf = {"AlignVector", "true"})
1558 static Object[] test17d(long[] a) {
1559 // Not alignable
1560 for (int i = 0; i < RANGE-1; i+=4) {
1561 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1562 long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1563 long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1564 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1565 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1566 }
1567 return new Object[]{ a };
1568 }
1569
1570 @Test
1571 static Object[] test18a(byte[] a, int[] b) {
1572 // scale = 0 --> no iv
1573 for (int i = 0; i < RANGE; i++) {
1574 a[0] = 1;
1575 b[i] = 2;
1576 a[1] = 1;
1577 }
1578 return new Object[]{ a, b };
1579 }
1580
1581 @Test
1582 static Object[] test18b(byte[] a, int[] b) {
1583 // scale = 0 --> no iv
1584 for (int i = 0; i < RANGE; i++) {
1585 a[1] = 1;
1586 b[i] = 2;
1587 a[2] = 1;
1588 }
1589 return new Object[]{ a, b };
1590 }
1591
1592 @Test
1593 static Object[] test19(int[] a, int[] b) {
1594 for (int i = 5000; i > 0; i--) {
1595 a[RANGE_FINAL - i] = b[RANGE_FINAL - i];
1596 }
1597 return new Object[]{ a, b };
1598 }
1599
1600 @Test
1601 static Object[] test20(byte[] a) {
1602 // Example where it is easy to pass alignment check,
1603 // but used to fail the alignment calculation
1604 for (int i = 1; i < RANGE/2-50; i++) {
1605 a[2*i+0+30]++;
1606 a[2*i+1+30]++;
1607 a[2*i+2+30]++;
1608 a[2*i+3+30]++;
1609 }
1610 return new Object[]{ a };
1611 }
1612 }