1 /*
2 * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package compiler.loopopts.superword;
25
26 import compiler.lib.ir_framework.*;
27 import jdk.test.lib.Utils;
28 import jdk.test.whitebox.WhiteBox;
29 import jdk.internal.misc.Unsafe;
30 import java.lang.reflect.Array;
31 import java.util.Map;
32 import java.util.HashMap;
33 import java.util.Random;
34 import java.nio.ByteOrder;
35
36 /*
37 * @test id=NoAlignVector
38 * @bug 8310190
39 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
40 * @modules java.base/jdk.internal.misc
41 * @library /test/lib /
42 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector
43 */
44
45 /*
46 * @test id=AlignVector
47 * @bug 8310190
48 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
49 * @modules java.base/jdk.internal.misc
50 * @library /test/lib /
51 * @run driver compiler.loopopts.superword.TestAlignVector AlignVector
52 */
53
54 /*
55 * @test id=VerifyAlignVector
56 * @bug 8310190
57 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
58 * @modules java.base/jdk.internal.misc
59 * @library /test/lib /
60 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector
61 */
62
63 /*
64 * @test id=NoAlignVector-COH
65 * @bug 8310190
66 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
67 * @modules java.base/jdk.internal.misc
68 * @library /test/lib /
69 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector-COH
70 */
71
72 /*
73 * @test id=VerifyAlignVector-COH
74 * @bug 8310190
75 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
76 * @modules java.base/jdk.internal.misc
77 * @library /test/lib /
78 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector-COH
79 */
80
81 public class TestAlignVector {
82 static int RANGE = 1024*8;
83 static int RANGE_FINAL = 1024*8;
84 private static final Unsafe UNSAFE = Unsafe.getUnsafe();
85 private static final Random RANDOM = Utils.getRandomInstance();
86
87 // Inputs
88 byte[] aB;
89 byte[] bB;
90 byte mB = (byte)31;
91 short[] aS;
92 short[] bS;
93 short mS = (short)0xF0F0;
94 int[] aI;
95 int[] bI;
96 int mI = 0xF0F0F0F0;
97 long[] aL;
98 long[] bL;
99 long mL = 0xF0F0F0F0F0F0F0F0L;
100
101 // List of tests
102 Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
103
104 // List of gold, the results from the first run before compilation
105 Map<String,Object[]> golds = new HashMap<String,Object[]>();
106
107 interface TestFunction {
108 Object[] run();
109 }
110
111 public static void main(String[] args) {
112 TestFramework framework = new TestFramework(TestAlignVector.class);
113 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
114 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250");
115
116 switch (args[0]) {
117 case "NoAlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); }
118 case "AlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); }
119 case "VerifyAlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
120 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
121 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
122 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
123 }
124 framework.start();
125 }
126
127 public TestAlignVector() {
128 // Generate input once
129 aB = generateB();
130 bB = generateB();
131 aS = generateS();
132 bS = generateS();
133 aI = generateI();
134 bI = generateI();
135 aL = generateL();
136 bL = generateL();
137
138 // Add all tests to list
139 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
140 tests.put("test1a", () -> { return test1a(aB.clone(), bB.clone(), mB); });
141 tests.put("test1b", () -> { return test1b(aB.clone(), bB.clone(), mB); });
142 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
143 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
144 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
145 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
146 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
147 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
149 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
150 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
151
152 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
153 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
154 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
155 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
156 tests.put("test10e", () -> { return test10e(aS.clone(), bS.clone(), mS); });
157
158 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
159 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
160 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
161 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
162
163 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
164 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
165 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
166 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
167
168 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
169 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
170 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
171 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
172
173 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
174 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
175 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
176 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
177
178 tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); });
179
180 tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); });
181 tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); });
182 tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); });
183 tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
184
185 tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); });
186 tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); });
187 tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); });
188 tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
189
190 tests.put("test14aB", () -> { return test14aB(aB.clone()); });
191 tests.put("test14bB", () -> { return test14bB(aB.clone()); });
192 tests.put("test14cB", () -> { return test14cB(aB.clone()); });
193 tests.put("test14dB", () -> { return test14dB(aB.clone()); });
194 tests.put("test14eB", () -> { return test14eB(aB.clone()); });
195 tests.put("test14fB", () -> { return test14fB(aB.clone()); });
196
197 tests.put("test15aB", () -> { return test15aB(aB.clone()); });
198 tests.put("test15bB", () -> { return test15bB(aB.clone()); });
199 tests.put("test15cB", () -> { return test15cB(aB.clone()); });
200
201 tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); });
202 tests.put("test16b", () -> { return test16b(aB.clone()); });
203
204 tests.put("test17a", () -> { return test17a(aL.clone()); });
205 tests.put("test17b", () -> { return test17b(aL.clone()); });
206 tests.put("test17c", () -> { return test17c(aL.clone()); });
207 tests.put("test17d", () -> { return test17d(aL.clone()); });
208
209 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
210 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
211
212 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
213 tests.put("test20", () -> { return test20(aB.clone()); });
214
215 // Compute gold value for all test methods before compilation
216 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
217 String name = entry.getKey();
218 TestFunction test = entry.getValue();
219 Object[] gold = test.run();
220 golds.put(name, gold);
221 }
222 }
223
224 @Warmup(100)
225 @Run(test = {"test0",
226 "test1a",
227 "test1b",
228 "test2",
229 "test3",
230 "test4",
231 "test5",
232 "test6",
233 "test7",
234 "test8",
235 "test9",
236 "test10a",
237 "test10b",
238 "test10c",
239 "test10d",
240 "test10e",
241 "test11aB",
242 "test11aS",
243 "test11aI",
244 "test11aL",
245 "test11bB",
246 "test11bS",
247 "test11bI",
248 "test11bL",
249 "test11cB",
250 "test11cS",
251 "test11cI",
252 "test11cL",
253 "test11dB",
254 "test11dS",
255 "test11dI",
256 "test11dL",
257 "test12",
258 "test13aIL",
259 "test13aIB",
260 "test13aIS",
261 "test13aBSIL",
262 "test13bIL",
263 "test13bIB",
264 "test13bIS",
265 "test13bBSIL",
266 "test14aB",
267 "test14bB",
268 "test14cB",
269 "test14dB",
270 "test14eB",
271 "test14fB",
272 "test15aB",
273 "test15bB",
274 "test15cB",
275 "test16a",
276 "test16b",
277 "test17a",
278 "test17b",
279 "test17c",
280 "test17d",
281 "test18a",
282 "test18b",
283 "test19",
284 "test20"})
285 public void runTests() {
286 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
287 String name = entry.getKey();
288 TestFunction test = entry.getValue();
289 // Recall gold value from before compilation
290 Object[] gold = golds.get(name);
291 // Compute new result
292 Object[] result = test.run();
293 // Compare gold and new result
294 verify(name, gold, result);
295 }
296 }
297
298 static byte[] generateB() {
299 byte[] a = new byte[RANGE];
300 for (int i = 0; i < a.length; i++) {
301 a[i] = (byte)RANDOM.nextInt();
302 }
303 return a;
304 }
305
306 static short[] generateS() {
307 short[] a = new short[RANGE];
308 for (int i = 0; i < a.length; i++) {
309 a[i] = (short)RANDOM.nextInt();
310 }
311 return a;
312 }
313
314 static int[] generateI() {
315 int[] a = new int[RANGE];
316 for (int i = 0; i < a.length; i++) {
317 a[i] = RANDOM.nextInt();
318 }
319 return a;
320 }
321
322 static long[] generateL() {
323 long[] a = new long[RANGE];
324 for (int i = 0; i < a.length; i++) {
325 a[i] = RANDOM.nextLong();
326 }
327 return a;
328 }
329
330 static void verify(String name, Object[] gold, Object[] result) {
331 if (gold.length != result.length) {
332 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
333 gold.length + ", result.length = " + result.length);
334 }
335 for (int i = 0; i < gold.length; i++) {
336 Object g = gold[i];
337 Object r = result[i];
338 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
339 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
340 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
341 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
342 }
343 if (g == r) {
344 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
345 " gold[" + i + "] == result[" + i + "]");
346 }
347 if (Array.getLength(g) != Array.getLength(r)) {
348 throw new RuntimeException("verify " + name + ": arrays must have same length:" +
349 " gold[" + i + "].length = " + Array.getLength(g) +
350 " result[" + i + "].length = " + Array.getLength(r));
351 }
352 Class c = g.getClass().getComponentType();
353 if (c == byte.class) {
354 verifyB(name, i, (byte[])g, (byte[])r);
355 } else if (c == short.class) {
356 verifyS(name, i, (short[])g, (short[])r);
357 } else if (c == int.class) {
358 verifyI(name, i, (int[])g, (int[])r);
359 } else if (c == long.class) {
360 verifyL(name, i, (long[])g, (long[])r);
361 } else {
362 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
363 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
364 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
365 }
366 }
367 }
368
369 static void verifyB(String name, int i, byte[] g, byte[] r) {
370 for (int j = 0; j < g.length; j++) {
371 if (g[j] != r[j]) {
372 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
373 " gold[" + i + "][" + j + "] = " + g[j] +
374 " result[" + i + "][" + j + "] = " + r[j]);
375 }
376 }
377 }
378
379 static void verifyS(String name, int i, short[] g, short[] r) {
380 for (int j = 0; j < g.length; j++) {
381 if (g[j] != r[j]) {
382 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
383 " gold[" + i + "][" + j + "] = " + g[j] +
384 " result[" + i + "][" + j + "] = " + r[j]);
385 }
386 }
387 }
388
389 static void verifyI(String name, int i, int[] g, int[] r) {
390 for (int j = 0; j < g.length; j++) {
391 if (g[j] != r[j]) {
392 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
393 " gold[" + i + "][" + j + "] = " + g[j] +
394 " result[" + i + "][" + j + "] = " + r[j]);
395 }
396 }
397 }
398
399 static void verifyL(String name, int i, long[] g, long[] r) {
400 for (int j = 0; j < g.length; j++) {
401 if (g[j] != r[j]) {
402 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
403 " gold[" + i + "][" + j + "] = " + g[j] +
404 " result[" + i + "][" + j + "] = " + r[j]);
405 }
406 }
407 }
408
409 @Test
410 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
411 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
412 IRNode.STORE_VECTOR, "> 0"},
413 applyIf = {"MaxVectorSize", ">=8"},
414 applyIfPlatform = {"64-bit", "true"},
415 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
416 static Object[] test0(byte[] a, byte[] b, byte mask) {
417 for (int i = 0; i < RANGE; i+=8) {
418 // Safe to vectorize with AlignVector
419 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
420 b[i+1] = (byte)(a[i+1] & mask);
421 b[i+2] = (byte)(a[i+2] & mask);
422 b[i+3] = (byte)(a[i+3] & mask);
423 }
424 return new Object[]{ a, b };
425 }
426
427 @Test
428 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
429 IRNode.AND_VB, "> 0",
430 IRNode.STORE_VECTOR, "> 0"},
431 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
432 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
433 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
434 applyIfPlatform = {"64-bit", "true"},
435 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
436 static Object[] test1a(byte[] a, byte[] b, byte mask) {
437 for (int i = 0; i < RANGE; i+=8) {
438 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
439 b[i+1] = (byte)(a[i+1] & mask);
440 b[i+2] = (byte)(a[i+2] & mask);
441 b[i+3] = (byte)(a[i+3] & mask);
442 b[i+4] = (byte)(a[i+4] & mask);
443 b[i+5] = (byte)(a[i+5] & mask);
444 b[i+6] = (byte)(a[i+6] & mask);
445 b[i+7] = (byte)(a[i+7] & mask);
446 }
447 return new Object[]{ a, b };
448 }
449
450 @Test
451 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
452 IRNode.AND_VB, "> 0",
453 IRNode.STORE_VECTOR, "> 0"},
454 applyIfOr = {"UseCompactObjectHeaders", "true", "AlignVector", "false"},
455 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
456 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
457 applyIfPlatform = {"64-bit", "true"},
458 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
459 static Object[] test1b(byte[] a, byte[] b, byte mask) {
460 for (int i = 4; i < RANGE-8; i+=8) {
461 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4 + iter*8
462 b[i+1] = (byte)(a[i+1] & mask);
463 b[i+2] = (byte)(a[i+2] & mask);
464 b[i+3] = (byte)(a[i+3] & mask);
465 b[i+4] = (byte)(a[i+4] & mask);
466 b[i+5] = (byte)(a[i+5] & mask);
467 b[i+6] = (byte)(a[i+6] & mask);
468 b[i+7] = (byte)(a[i+7] & mask);
469 }
470 return new Object[]{ a, b };
471 }
472
473 @Test
474 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
475 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
476 IRNode.STORE_VECTOR, "> 0"},
477 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
478 applyIfPlatform = {"64-bit", "true"},
479 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
480 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
481 IRNode.AND_VB, "= 0",
482 IRNode.STORE_VECTOR, "= 0"},
483 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
484 applyIfPlatform = {"64-bit", "true"},
485 applyIf = {"AlignVector", "true"})
486 static Object[] test2(byte[] a, byte[] b, byte mask) {
487 for (int i = 0; i < RANGE; i+=8) {
488 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
489 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
490 b[i+4] = (byte)(a[i+4] & mask);
491 b[i+5] = (byte)(a[i+5] & mask);
492 b[i+6] = (byte)(a[i+6] & mask);
493 }
494 return new Object[]{ a, b };
495 }
496
497 @Test
498 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
499 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
500 IRNode.STORE_VECTOR, "> 0"},
501 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
502 applyIfPlatform = {"64-bit", "true"},
503 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
504 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
505 IRNode.AND_VB, "= 0",
506 IRNode.STORE_VECTOR, "= 0"},
507 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
508 applyIfPlatform = {"64-bit", "true"},
509 applyIf = {"AlignVector", "true"})
510 static Object[] test3(byte[] a, byte[] b, byte mask) {
511 for (int i = 0; i < RANGE; i+=8) {
512 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
513
514 // Problematic for AlignVector
515 b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0
516
517 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes
518 b[i+4] = (byte)(a[i+4] & mask);
519 b[i+5] = (byte)(a[i+5] & mask);
520 b[i+6] = (byte)(a[i+6] & mask);
521 }
522 return new Object[]{ a, b };
523 }
524
525 @Test
526 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
527 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0",
528 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
529 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0",
530 IRNode.STORE_VECTOR, "> 0"},
531 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
532 applyIfPlatform = {"64-bit", "true"},
533 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
534 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
535 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
536 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
537 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
538 IRNode.STORE_VECTOR, "> 0"},
539 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
540 applyIfPlatform = {"64-bit", "true"},
541 applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"})
542 static Object[] test4(byte[] a, byte[] b, byte mask) {
543 for (int i = 0; i < RANGE/16; i++) {
544 // Problematic for AlignVector
545 b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned
546 b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask);
547 b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask);
548 b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask);
549
550 b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned
551 b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask);
552 b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask);
553 b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask);
554 b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask);
555 b[i*16 + 10] = (byte)(a[i*16 + 10] & mask);
556 b[i*16 + 11] = (byte)(a[i*16 + 11] & mask);
557 b[i*16 + 12] = (byte)(a[i*16 + 12] & mask);
558 }
559 return new Object[]{ a, b };
560 }
561
562 @Test
563 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
564 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
565 IRNode.STORE_VECTOR, "> 0"},
566 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
567 applyIfPlatform = {"64-bit", "true"},
568 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
569 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
570 IRNode.AND_VB, "= 0",
571 IRNode.STORE_VECTOR, "= 0"},
572 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
573 applyIfPlatform = {"64-bit", "true"},
574 applyIf = {"AlignVector", "true"})
575 static Object[] test5(byte[] a, byte[] b, byte mask, int inv) {
576 for (int i = 0; i < RANGE; i+=8) {
577 // Cannot align with AlignVector because of invariant
578 b[i+inv+0] = (byte)(a[i+inv+0] & mask);
579
580 b[i+inv+3] = (byte)(a[i+inv+3] & mask);
581 b[i+inv+4] = (byte)(a[i+inv+4] & mask);
582 b[i+inv+5] = (byte)(a[i+inv+5] & mask);
583 b[i+inv+6] = (byte)(a[i+inv+6] & mask);
584 }
585 return new Object[]{ a, b };
586 }
587
588 @Test
589 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
590 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
591 IRNode.STORE_VECTOR, "> 0"},
592 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
593 applyIfPlatform = {"64-bit", "true"},
594 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
595 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
596 IRNode.AND_VB, "= 0",
597 IRNode.STORE_VECTOR, "= 0"},
598 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
599 applyIfPlatform = {"64-bit", "true"},
600 applyIf = {"AlignVector", "true"})
601 static Object[] test6(byte[] a, byte[] b, byte mask) {
602 for (int i = 0; i < RANGE/8; i+=2) {
603 // Cannot align with AlignVector because offset is odd
604 b[i*4+0] = (byte)(a[i*4+0] & mask);
605
606 b[i*4+3] = (byte)(a[i*4+3] & mask);
607 b[i*4+4] = (byte)(a[i*4+4] & mask);
608 b[i*4+5] = (byte)(a[i*4+5] & mask);
609 b[i*4+6] = (byte)(a[i*4+6] & mask);
610 }
611 return new Object[]{ a, b };
612 }
613
614 @Test
615 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
616 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
617 IRNode.STORE_VECTOR, "> 0"},
618 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"},
619 applyIfPlatform = {"64-bit", "true"},
620 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
621 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
622 IRNode.AND_VS, "= 0",
623 IRNode.STORE_VECTOR, "= 0"},
624 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
625 applyIfPlatform = {"64-bit", "true"},
626 applyIf = {"AlignVector", "true"})
627 static Object[] test7(short[] a, short[] b, short mask) {
628 for (int i = 0; i < RANGE/8; i+=2) {
629 // Cannot align with AlignVector because offset is odd
630 b[i*4+0] = (short)(a[i*4+0] & mask);
631
632 b[i*4+3] = (short)(a[i*4+3] & mask);
633 b[i*4+4] = (short)(a[i*4+4] & mask);
634 b[i*4+5] = (short)(a[i*4+5] & mask);
635 b[i*4+6] = (short)(a[i*4+6] & mask);
636 }
637 return new Object[]{ a, b };
638 }
639
640 @Test
641 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
642 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
643 IRNode.STORE_VECTOR, "> 0"},
644 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
645 applyIfPlatform = {"64-bit", "true"},
646 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
647 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
648 IRNode.AND_VB, "= 0",
649 IRNode.STORE_VECTOR, "= 0"},
650 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
651 applyIfPlatform = {"64-bit", "true"},
652 applyIf = {"AlignVector", "true"})
653 static Object[] test8(byte[] a, byte[] b, byte mask, int init) {
654 for (int i = init; i < RANGE; i+=8) {
655 // Cannot align with AlignVector because of invariant (variable init becomes invar)
656 b[i+0] = (byte)(a[i+0] & mask);
657
658 b[i+3] = (byte)(a[i+3] & mask);
659 b[i+4] = (byte)(a[i+4] & mask);
660 b[i+5] = (byte)(a[i+5] & mask);
661 b[i+6] = (byte)(a[i+6] & mask);
662 }
663 return new Object[]{ a, b };
664 }
665
666 @Test
667 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
668 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
669 IRNode.STORE_VECTOR, "> 0"},
670 applyIf = {"MaxVectorSize", ">=8"},
671 applyIfPlatform = {"64-bit", "true"},
672 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
673 static Object[] test9(byte[] a, byte[] b, byte mask) {
674 // known non-zero init value does not affect offset, but has implicit effect on iv
675 for (int i = 13; i < RANGE-8; i+=8) {
676 b[i+0] = (byte)(a[i+0] & mask);
677
678 b[i+3] = (byte)(a[i+3] & mask);
679 b[i+4] = (byte)(a[i+4] & mask);
680 b[i+5] = (byte)(a[i+5] & mask);
681 b[i+6] = (byte)(a[i+6] & mask);
682 }
683 return new Object[]{ a, b };
684 }
685
686 @Test
687 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
688 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
689 IRNode.STORE_VECTOR, "> 0"},
690 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
691 applyIfPlatform = {"64-bit", "true"},
692 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
693 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
694 IRNode.AND_VB, "= 0",
695 IRNode.STORE_VECTOR, "= 0"},
696 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
697 applyIfPlatform = {"64-bit", "true"},
698 applyIf = {"AlignVector", "true"})
699 static Object[] test10a(byte[] a, byte[] b, byte mask) {
700 // This is not alignable with pre-loop, because of odd init.
701 for (int i = 3; i < RANGE-8; i+=8) {
702 b[i+0] = (byte)(a[i+0] & mask);
703 b[i+1] = (byte)(a[i+1] & mask);
704 b[i+2] = (byte)(a[i+2] & mask);
705 b[i+3] = (byte)(a[i+3] & mask);
706 }
707 return new Object[]{ a, b };
708 }
709
710 @Test
711 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
712 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
713 IRNode.STORE_VECTOR, "> 0"},
714 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
715 applyIfPlatform = {"64-bit", "true"},
716 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
717 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
718 IRNode.AND_VB, "= 0",
719 IRNode.STORE_VECTOR, "= 0"},
720 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
721 applyIfPlatform = {"64-bit", "true"},
722 applyIf = {"AlignVector", "true"})
723 static Object[] test10b(byte[] a, byte[] b, byte mask) {
724 // This is not alignable with pre-loop, because of odd init.
725 // Seems not correctly handled.
726 for (int i = 13; i < RANGE-8; i+=8) {
727 b[i+0] = (byte)(a[i+0] & mask);
728 b[i+1] = (byte)(a[i+1] & mask);
729 b[i+2] = (byte)(a[i+2] & mask);
730 b[i+3] = (byte)(a[i+3] & mask);
731 }
732 return new Object[]{ a, b };
733 }
734
735 @Test
736 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
737 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
738 IRNode.STORE_VECTOR, "> 0"},
739 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
740 applyIfPlatform = {"64-bit", "true"},
741 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
742 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
743 IRNode.AND_VS, "= 0",
744 IRNode.STORE_VECTOR, "= 0"},
745 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
746 applyIfPlatform = {"64-bit", "true"},
747 applyIf = {"AlignVector", "true"})
748 static Object[] test10c(short[] a, short[] b, short mask) {
749 // This is not alignable with pre-loop, because of odd init.
750 // Seems not correctly handled with MaxVectorSize >= 32.
751 for (int i = 13; i < RANGE-8; i+=8) {
752 b[i+0] = (short)(a[i+0] & mask);
753 b[i+1] = (short)(a[i+1] & mask);
754 b[i+2] = (short)(a[i+2] & mask);
755 b[i+3] = (short)(a[i+3] & mask);
756 }
757 return new Object[]{ a, b };
758 }
759
760 @Test
761 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
762 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
763 IRNode.STORE_VECTOR, "> 0"},
764 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "false"},
765 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
766 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
767 applyIfPlatform = {"64-bit", "true"},
768 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
769 static Object[] test10d(short[] a, short[] b, short mask) {
770 for (int i = 13; i < RANGE-16; i+=8) {
771 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
772 b[i+0+3] = (short)(a[i+0+3] & mask);
773 b[i+1+3] = (short)(a[i+1+3] & mask);
774 b[i+2+3] = (short)(a[i+2+3] & mask);
775 b[i+3+3] = (short)(a[i+3+3] & mask);
776 }
777 return new Object[]{ a, b };
778 }
779
780 @Test
781 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
782 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
783 IRNode.STORE_VECTOR, "> 0"},
784 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "true"},
785 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
786 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
787 applyIfPlatform = {"64-bit", "true"},
788 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
789 static Object[] test10e(short[] a, short[] b, short mask) {
790 for (int i = 11; i < RANGE-16; i+=8) {
791 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 11) + iter*16
792 b[i+0+3] = (short)(a[i+0+3] & mask);
793 b[i+1+3] = (short)(a[i+1+3] & mask);
794 b[i+2+3] = (short)(a[i+2+3] & mask);
795 b[i+3+3] = (short)(a[i+3+3] & mask);
796 }
797 return new Object[]{ a, b };
798 }
799
800 @Test
801 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
802 IRNode.AND_VB, "> 0",
803 IRNode.STORE_VECTOR, "> 0"},
804 applyIfPlatform = {"64-bit", "true"},
805 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
806 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
807 for (int i = 0; i < RANGE; i++) {
808 // always alignable
809 b[i+0] = (byte)(a[i+0] & mask);
810 }
811 return new Object[]{ a, b };
812 }
813
814 @Test
815 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
816 IRNode.AND_VS, "> 0",
817 IRNode.STORE_VECTOR, "> 0"},
818 applyIfPlatform = {"64-bit", "true"},
819 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
820 static Object[] test11aS(short[] a, short[] b, short mask) {
821 for (int i = 0; i < RANGE; i++) {
822 // always alignable
823 b[i+0] = (short)(a[i+0] & mask);
824 }
825 return new Object[]{ a, b };
826 }
827
828 @Test
829 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
830 IRNode.AND_VI, "> 0",
831 IRNode.STORE_VECTOR, "> 0"},
832 applyIfPlatform = {"64-bit", "true"},
833 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
834 static Object[] test11aI(int[] a, int[] b, int mask) {
835 for (int i = 0; i < RANGE; i++) {
836 // always alignable
837 b[i+0] = (int)(a[i+0] & mask);
838 }
839 return new Object[]{ a, b };
840 }
841
842 @Test
843 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
844 IRNode.AND_VL, "> 0",
845 IRNode.STORE_VECTOR, "> 0"},
846 applyIfPlatform = {"64-bit", "true"},
847 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
848 static Object[] test11aL(long[] a, long[] b, long mask) {
849 for (int i = 0; i < RANGE; i++) {
850 // always alignable
851 b[i+0] = (long)(a[i+0] & mask);
852 }
853 return new Object[]{ a, b };
854 }
855
856 @Test
857 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
858 IRNode.AND_VB, "> 0",
859 IRNode.STORE_VECTOR, "> 0"},
860 applyIfPlatform = {"64-bit", "true"},
861 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
862 static Object[] test11bB(byte[] a, byte[] b, byte mask) {
863 for (int i = 1; i < RANGE; i++) {
864 // always alignable
865 b[i+0] = (byte)(a[i+0] & mask);
866 }
867 return new Object[]{ a, b };
868 }
869
870 @Test
871 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
872 IRNode.AND_VS, "> 0",
873 IRNode.STORE_VECTOR, "> 0"},
874 applyIfPlatform = {"64-bit", "true"},
875 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
876 static Object[] test11bS(short[] a, short[] b, short mask) {
877 for (int i = 1; i < RANGE; i++) {
878 // always alignable
879 b[i+0] = (short)(a[i+0] & mask);
880 }
881 return new Object[]{ a, b };
882 }
883
884 @Test
885 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
886 IRNode.AND_VI, "> 0",
887 IRNode.STORE_VECTOR, "> 0"},
888 applyIfPlatform = {"64-bit", "true"},
889 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
890 static Object[] test11bI(int[] a, int[] b, int mask) {
891 for (int i = 1; i < RANGE; i++) {
892 // always alignable
893 b[i+0] = (int)(a[i+0] & mask);
894 }
895 return new Object[]{ a, b };
896 }
897
898 @Test
899 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
900 IRNode.AND_VL, "> 0",
901 IRNode.STORE_VECTOR, "> 0"},
902 applyIfPlatform = {"64-bit", "true"},
903 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
904 static Object[] test11bL(long[] a, long[] b, long mask) {
905 for (int i = 1; i < RANGE; i++) {
906 // always alignable
907 b[i+0] = (long)(a[i+0] & mask);
908 }
909 return new Object[]{ a, b };
910 }
911
912 @Test
913 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
914 IRNode.AND_VB, "> 0",
915 IRNode.STORE_VECTOR, "> 0"},
916 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
917 applyIfPlatform = {"64-bit", "true"},
918 applyIf = {"AlignVector", "false"})
919 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
920 IRNode.AND_VB, "= 0",
921 IRNode.STORE_VECTOR, "= 0"},
922 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
923 applyIfPlatform = {"64-bit", "true"},
924 applyIf = {"AlignVector", "true"})
925 static Object[] test11cB(byte[] a, byte[] b, byte mask) {
926 for (int i = 1; i < RANGE-1; i++) {
927 // 1 byte offset -> not alignable with AlignVector
928 b[i+0] = (byte)(a[i+1] & mask);
929 }
930 return new Object[]{ a, b };
931 }
932
933 @Test
934 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
935 IRNode.AND_VS, "> 0",
936 IRNode.STORE_VECTOR, "> 0"},
937 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
938 applyIfPlatform = {"64-bit", "true"},
939 applyIf = {"AlignVector", "false"})
940 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
941 IRNode.AND_VS, "= 0",
942 IRNode.STORE_VECTOR, "= 0"},
943 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
944 applyIfPlatform = {"64-bit", "true"},
945 applyIf = {"AlignVector", "true"})
946 static Object[] test11cS(short[] a, short[] b, short mask) {
947 for (int i = 1; i < RANGE-1; i++) {
948 // 2 byte offset -> not alignable with AlignVector
949 b[i+0] = (short)(a[i+1] & mask);
950 }
951 return new Object[]{ a, b };
952 }
953
954 @Test
955 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
956 IRNode.AND_VI, "> 0",
957 IRNode.STORE_VECTOR, "> 0"},
958 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
959 applyIfPlatform = {"64-bit", "true"},
960 applyIf = {"AlignVector", "false"})
961 @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
962 IRNode.AND_VI, "= 0",
963 IRNode.STORE_VECTOR, "= 0"},
964 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
965 applyIfPlatform = {"64-bit", "true"},
966 applyIf = {"AlignVector", "true"})
967 static Object[] test11cI(int[] a, int[] b, int mask) {
968 for (int i = 1; i < RANGE-1; i++) {
969 // 4 byte offset -> not alignable with AlignVector
970 b[i+0] = (int)(a[i+1] & mask);
971 }
972 return new Object[]{ a, b };
973 }
974
975 @Test
976 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
977 IRNode.AND_VL, "> 0",
978 IRNode.STORE_VECTOR, "> 0"},
979 applyIfPlatform = {"64-bit", "true"},
980 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
981 static Object[] test11cL(long[] a, long[] b, long mask) {
982 for (int i = 1; i < RANGE-1; i++) {
983 // always alignable (8 byte offset)
984 b[i+0] = (long)(a[i+1] & mask);
985 }
986 return new Object[]{ a, b };
987 }
988
989 @Test
990 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
991 IRNode.AND_VB, "> 0",
992 IRNode.STORE_VECTOR, "> 0"},
993 applyIfPlatform = {"64-bit", "true"},
994 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
995 static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) {
996 for (int i = 0; i < RANGE; i++) {
997 b[i+0+invar] = (byte)(a[i+0+invar] & mask);
998 }
999 return new Object[]{ a, b };
1000 }
1001
1002 @Test
1003 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1004 IRNode.AND_VS, "> 0",
1005 IRNode.STORE_VECTOR, "> 0"},
1006 applyIfPlatform = {"64-bit", "true"},
1007 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1008 static Object[] test11dS(short[] a, short[] b, short mask, int invar) {
1009 for (int i = 0; i < RANGE; i++) {
1010 b[i+0+invar] = (short)(a[i+0+invar] & mask);
1011 }
1012 return new Object[]{ a, b };
1013 }
1014
1015 @Test
1016 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1017 IRNode.AND_VI, "> 0",
1018 IRNode.STORE_VECTOR, "> 0"},
1019 applyIfPlatform = {"64-bit", "true"},
1020 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1021 static Object[] test11dI(int[] a, int[] b, int mask, int invar) {
1022 for (int i = 0; i < RANGE; i++) {
1023 b[i+0+invar] = (int)(a[i+0+invar] & mask);
1024 }
1025 return new Object[]{ a, b };
1026 }
1027
1028 @Test
1029 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
1030 IRNode.AND_VL, "> 0",
1031 IRNode.STORE_VECTOR, "> 0"},
1032 applyIfPlatform = {"64-bit", "true"},
1033 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1034 static Object[] test11dL(long[] a, long[] b, long mask, int invar) {
1035 for (int i = 0; i < RANGE; i++) {
1036 b[i+0+invar] = (long)(a[i+0+invar] & mask);
1037 }
1038 return new Object[]{ a, b };
1039 }
1040
1041 @Test
1042 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
1043 IRNode.AND_VB, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
1044 IRNode.STORE_VECTOR, "> 0"},
1045 applyIfPlatform = {"64-bit", "true"},
1046 applyIf = {"AlignVector", "false"},
1047 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1048 static Object[] test12(byte[] a, byte[] b, byte mask) {
1049 for (int i = 0; i < RANGE/16; i++) {
1050 // Non-power-of-2 stride. Vectorization of 4 bytes, then 2-bytes gap.
1051 b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask);
1052 b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask);
1053 b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask);
1054 b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask);
1055 }
1056 return new Object[]{ a, b };
1057 }
1058
1059 @Test
1060 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1061 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1062 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1063 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1064 IRNode.STORE_VECTOR, "> 0"},
1065 applyIfPlatform = {"64-bit", "true"},
1066 applyIfCPUFeatureOr = {"avx2", "true", "rvv", "true"})
1067 // require avx to ensure vectors are larger than what unrolling produces
1068 static Object[] test13aIL(int[] a, long[] b) {
1069 for (int i = 0; i < RANGE; i++) {
1070 a[i]++;
1071 b[i]++;
1072 }
1073 return new Object[]{ a, b };
1074 }
1075
1076 @Test
1077 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1078 IRNode.LOAD_VECTOR_I, "> 0",
1079 IRNode.ADD_VB, "> 0",
1080 IRNode.ADD_VI, "> 0",
1081 IRNode.STORE_VECTOR, "> 0"},
1082 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1083 applyIfPlatform = {"64-bit", "true"},
1084 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1085 static Object[] test13aIB(int[] a, byte[] b) {
1086 for (int i = 0; i < RANGE; i++) {
1087 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1088 // = 16 (or 12 if UseCompactObjectHeaders=true)
1089 a[i]++;
1090 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1091 // = 16 (or 12 if UseCompactObjectHeaders=true)
1092 b[i]++;
1093 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1094 // If UseCompactObjectHeaders=false:
1095 // a: 0, 8, 16, 24, 32, ...
1096 // b: 0, 2, 4, 6, 8, ...
1097 // -> Ok, aligns every 8th iteration.
1098 // If UseCompactObjectHeaders=true:
1099 // a: 4, 12, 20, 28, 36, ...
1100 // b: 1, 3, 5, 7, 9, ...
1101 // -> we can never align both vectors!
1102 }
1103 return new Object[]{ a, b };
1104 }
1105
1106 @Test
1107 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1108 IRNode.LOAD_VECTOR_S, "> 0",
1109 IRNode.ADD_VI, "> 0",
1110 IRNode.ADD_VS, "> 0",
1111 IRNode.STORE_VECTOR, "> 0"},
1112 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1113 applyIfPlatform = {"64-bit", "true"},
1114 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1115 static Object[] test13aIS(int[] a, short[] b) {
1116 for (int i = 0; i < RANGE; i++) {
1117 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4*iter
1118 // = 16 (or 12 if UseCompactObjectHeaders=true)
1119 a[i]++;
1120 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1121 // = 16 (or 12 if UseCompactObjectHeaders=true)
1122 b[i]++;
1123 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1124 // If UseCompactObjectHeaders=false:
1125 // a: iter % 2 == 0
1126 // b: iter % 4 == 0
1127 // -> Ok, aligns every 4th iteration.
1128 // If UseCompactObjectHeaders=true:
1129 // a: iter % 2 = 1
1130 // b: iter % 4 = 2
1131 // -> we can never align both vectors!
1132 }
1133 return new Object[]{ a, b };
1134 }
1135
1136 @Test
1137 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1138 IRNode.LOAD_VECTOR_S, "> 0",
1139 IRNode.LOAD_VECTOR_I, "> 0",
1140 IRNode.LOAD_VECTOR_L, "> 0",
1141 IRNode.ADD_VB, "> 0",
1142 IRNode.ADD_VS, "> 0",
1143 IRNode.ADD_VI, "> 0",
1144 IRNode.ADD_VL, "> 0",
1145 IRNode.STORE_VECTOR, "> 0"},
1146 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1147 applyIfPlatform = {"64-bit", "true"},
1148 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1149 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1150 for (int i = 0; i < RANGE; i++) {
1151 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1152 // = 16 (or 12 if UseCompactObjectHeaders=true)
1153 a[i]++;
1154 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1155 // = 16 (or 12 if UseCompactObjectHeaders=true)
1156 b[i]++;
1157 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1158 // = 16 (or 12 if UseCompactObjectHeaders=true)
1159 c[i]++;
1160 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8*iter
1161 // = 16 (always)
1162 d[i]++;
1163 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1164 // a: iter % 8 = 4
1165 // c: iter % 2 = 1
1166 // -> can never align both vectors!
1167 }
1168 return new Object[]{ a, b, c, d };
1169 }
1170
1171 @Test
1172 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1173 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1174 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1175 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1176 IRNode.STORE_VECTOR, "> 0"},
1177 applyIfPlatform = {"64-bit", "true"},
1178 applyIfCPUFeatureOr = {"avx2", "true", "rvv", "true"})
1179 // require avx to ensure vectors are larger than what unrolling produces
1180 static Object[] test13bIL(int[] a, long[] b) {
1181 for (int i = 1; i < RANGE; i++) {
1182 a[i]++;
1183 b[i]++;
1184 }
1185 return new Object[]{ a, b };
1186 }
1187
1188 @Test
1189 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1190 IRNode.LOAD_VECTOR_I, "> 0",
1191 IRNode.ADD_VB, "> 0",
1192 IRNode.ADD_VI, "> 0",
1193 IRNode.STORE_VECTOR, "> 0"},
1194 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1195 applyIfPlatform = {"64-bit", "true"},
1196 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1197 static Object[] test13bIB(int[] a, byte[] b) {
1198 for (int i = 1; i < RANGE; i++) {
1199 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1200 // = 16 (or 12 if UseCompactObjectHeaders=true)
1201 a[i]++;
1202 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1203 // = 16 (or 12 if UseCompactObjectHeaders=true)
1204 b[i]++;
1205 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1206 // a: iter % 2 = 0
1207 // b: iter % 8 = 3
1208 // -> can never align both vectors!
1209 }
1210 return new Object[]{ a, b };
1211 }
1212
1213 @Test
1214 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1215 IRNode.LOAD_VECTOR_S, "> 0",
1216 IRNode.ADD_VI, "> 0",
1217 IRNode.ADD_VS, "> 0",
1218 IRNode.STORE_VECTOR, "> 0"},
1219 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1220 applyIfPlatform = {"64-bit", "true"},
1221 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1222 static Object[] test13bIS(int[] a, short[] b) {
1223 for (int i = 1; i < RANGE; i++) {
1224 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1225 // = 16 (or 12 if UseCompactObjectHeaders=true)
1226 a[i]++;
1227 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1228 // = 16 (or 12 if UseCompactObjectHeaders=true)
1229 b[i]++;
1230 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1231 // a: iter % 2 = 0
1232 // b: iter % 4 = 1
1233 // -> can never align both vectors!
1234 }
1235 return new Object[]{ a, b };
1236 }
1237
1238 @Test
1239 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1240 IRNode.LOAD_VECTOR_S, "> 0",
1241 IRNode.LOAD_VECTOR_I, "> 0",
1242 IRNode.LOAD_VECTOR_L, "> 0",
1243 IRNode.ADD_VB, "> 0",
1244 IRNode.ADD_VS, "> 0",
1245 IRNode.ADD_VI, "> 0",
1246 IRNode.ADD_VL, "> 0",
1247 IRNode.STORE_VECTOR, "> 0"},
1248 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1249 applyIfPlatform = {"64-bit", "true"},
1250 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1251 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1252 for (int i = 1; i < RANGE; i++) {
1253 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1254 // = 16 (or 12 if UseCompactObjectHeaders=true)
1255 a[i]++;
1256 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1257 // = 16 (or 12 if UseCompactObjectHeaders=true)
1258 b[i]++;
1259 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1260 // = 16 (or 12 if UseCompactObjectHeaders=true)
1261 c[i]++;
1262 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 + 8*iter
1263 // = 16 (always)
1264 d[i]++;
1265 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1266 // a: iter % 8 = 3
1267 // c: iter % 2 = 0
1268 // -> can never align both vectors!
1269 }
1270 return new Object[]{ a, b, c, d };
1271 }
1272
1273 @Test
1274 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1275 IRNode.ADD_VB, "= 0",
1276 IRNode.STORE_VECTOR, "= 0"},
1277 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1278 applyIfPlatform = {"64-bit", "true"},
1279 applyIf = {"AlignVector", "false"})
1280 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1281 IRNode.ADD_VB, "= 0",
1282 IRNode.STORE_VECTOR, "= 0"},
1283 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1284 applyIfPlatform = {"64-bit", "true"},
1285 applyIf = {"AlignVector", "true"})
1286 static Object[] test14aB(byte[] a) {
1287 // non-power-of-2 stride
1288 for (int i = 0; i < RANGE-20; i+=9) {
1289 // Since the stride is shorter than the vector length, there will be always
1290 // partial overlap of loads with previous stores, this leads to failure in
1291 // store-to-load-forwarding -> vectorization not profitable.
1292 a[i+0]++;
1293 a[i+1]++;
1294 a[i+2]++;
1295 a[i+3]++;
1296 a[i+4]++;
1297 a[i+5]++;
1298 a[i+6]++;
1299 a[i+7]++;
1300 a[i+8]++;
1301 a[i+9]++;
1302 a[i+10]++;
1303 a[i+11]++;
1304 a[i+12]++;
1305 a[i+13]++;
1306 a[i+14]++;
1307 a[i+15]++;
1308 }
1309 return new Object[]{ a };
1310 }
1311
1312 @Test
1313 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1314 IRNode.ADD_VB, "= 0",
1315 IRNode.STORE_VECTOR, "= 0"},
1316 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1317 applyIfPlatform = {"64-bit", "true"},
1318 applyIf = {"AlignVector", "false"})
1319 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1320 IRNode.ADD_VB, "= 0",
1321 IRNode.STORE_VECTOR, "= 0"},
1322 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1323 applyIfPlatform = {"64-bit", "true"},
1324 applyIf = {"AlignVector", "true"})
1325 static Object[] test14bB(byte[] a) {
1326 // non-power-of-2 stride
1327 for (int i = 0; i < RANGE-20; i+=3) {
1328 // Since the stride is shorter than the vector length, there will be always
1329 // partial overlap of loads with previous stores, this leads to failure in
1330 // store-to-load-forwarding -> vectorization not profitable.
1331 a[i+0]++;
1332 a[i+1]++;
1333 a[i+2]++;
1334 a[i+3]++;
1335 a[i+4]++;
1336 a[i+5]++;
1337 a[i+6]++;
1338 a[i+7]++;
1339 a[i+8]++;
1340 a[i+9]++;
1341 a[i+10]++;
1342 a[i+11]++;
1343 a[i+12]++;
1344 a[i+13]++;
1345 a[i+14]++;
1346 a[i+15]++;
1347 }
1348 return new Object[]{ a };
1349 }
1350
1351 @Test
1352 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1353 IRNode.ADD_VB, "= 0",
1354 IRNode.STORE_VECTOR, "= 0"},
1355 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1356 applyIfPlatform = {"64-bit", "true"},
1357 applyIf = {"AlignVector", "false"})
1358 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1359 IRNode.ADD_VB, "= 0",
1360 IRNode.STORE_VECTOR, "= 0"},
1361 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1362 applyIfPlatform = {"64-bit", "true"},
1363 applyIf = {"AlignVector", "true"})
1364 static Object[] test14cB(byte[] a) {
1365 // non-power-of-2 stride
1366 for (int i = 0; i < RANGE-20; i+=5) {
1367 // Since the stride is shorter than the vector length, there will be always
1368 // partial overlap of loads with previous stores, this leads to failure in
1369 // store-to-load-forwarding -> vectorization not profitable.
1370 a[i+0]++;
1371 a[i+1]++;
1372 a[i+2]++;
1373 a[i+3]++;
1374 a[i+4]++;
1375 a[i+5]++;
1376 a[i+6]++;
1377 a[i+7]++;
1378 a[i+8]++;
1379 a[i+9]++;
1380 a[i+10]++;
1381 a[i+11]++;
1382 a[i+12]++;
1383 a[i+13]++;
1384 a[i+14]++;
1385 a[i+15]++;
1386 }
1387 return new Object[]{ a };
1388 }
1389
1390 @Test
1391 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1392 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1393 IRNode.STORE_VECTOR, "> 0"},
1394 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1395 applyIfPlatform = {"64-bit", "true"},
1396 applyIf = {"AlignVector", "false"})
1397 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1398 IRNode.ADD_VB, "= 0",
1399 IRNode.STORE_VECTOR, "= 0"},
1400 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1401 applyIfPlatform = {"64-bit", "true"},
1402 applyIf = {"AlignVector", "true"})
1403 static Object[] test14dB(byte[] a) {
1404 // non-power-of-2 stride
1405 for (int i = 0; i < RANGE-20; i+=9) {
1406 a[i+0]++;
1407 a[i+1]++;
1408 a[i+2]++;
1409 a[i+3]++;
1410 a[i+4]++;
1411 a[i+5]++;
1412 a[i+6]++;
1413 a[i+7]++;
1414 }
1415 return new Object[]{ a };
1416 }
1417
1418 @Test
1419 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1420 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1421 IRNode.STORE_VECTOR, "> 0"},
1422 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1423 applyIfPlatform = {"64-bit", "true"},
1424 applyIf = {"AlignVector", "false"})
1425 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1426 IRNode.ADD_VB, "= 0",
1427 IRNode.STORE_VECTOR, "= 0"},
1428 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1429 applyIfPlatform = {"64-bit", "true"},
1430 applyIf = {"AlignVector", "true"})
1431 static Object[] test14eB(byte[] a) {
1432 // non-power-of-2 stride
1433 for (int i = 0; i < RANGE-32; i+=11) {
1434 a[i+0]++;
1435 a[i+1]++;
1436 a[i+2]++;
1437 a[i+3]++;
1438 a[i+4]++;
1439 a[i+5]++;
1440 a[i+6]++;
1441 a[i+7]++;
1442 }
1443 return new Object[]{ a };
1444 }
1445
1446 @Test
1447 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1448 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1449 IRNode.STORE_VECTOR, "> 0"},
1450 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1451 applyIfPlatform = {"64-bit", "true"},
1452 applyIf = {"AlignVector", "false"})
1453 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1454 IRNode.ADD_VB, "= 0",
1455 IRNode.STORE_VECTOR, "= 0"},
1456 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1457 applyIfPlatform = {"64-bit", "true"},
1458 applyIf = {"AlignVector", "true"})
1459 static Object[] test14fB(byte[] a) {
1460 // non-power-of-2 stride
1461 for (int i = 0; i < RANGE-40; i+=12) {
1462 a[i+0]++;
1463 a[i+1]++;
1464 a[i+2]++;
1465 a[i+3]++;
1466 a[i+4]++;
1467 a[i+5]++;
1468 a[i+6]++;
1469 a[i+7]++;
1470 }
1471 return new Object[]{ a };
1472 }
1473
1474 @Test
1475 // IR rules difficult because of modulo wrapping with offset after peeling.
1476 static Object[] test15aB(byte[] a) {
1477 // non-power-of-2 scale
1478 for (int i = 0; i < RANGE/64-20; i++) {
1479 a[53*i+0]++;
1480 a[53*i+1]++;
1481 a[53*i+2]++;
1482 a[53*i+3]++;
1483 a[53*i+4]++;
1484 a[53*i+5]++;
1485 a[53*i+6]++;
1486 a[53*i+7]++;
1487 a[53*i+8]++;
1488 a[53*i+9]++;
1489 a[53*i+10]++;
1490 a[53*i+11]++;
1491 a[53*i+12]++;
1492 a[53*i+13]++;
1493 a[53*i+14]++;
1494 a[53*i+15]++;
1495 }
1496 return new Object[]{ a };
1497 }
1498
1499 @Test
1500 // IR rules difficult because of modulo wrapping with offset after peeling.
1501 static Object[] test15bB(byte[] a) {
1502 // non-power-of-2 scale
1503 for (int i = 0; i < RANGE/64-20; i++) {
1504 a[25*i+0]++;
1505 a[25*i+1]++;
1506 a[25*i+2]++;
1507 a[25*i+3]++;
1508 a[25*i+4]++;
1509 a[25*i+5]++;
1510 a[25*i+6]++;
1511 a[25*i+7]++;
1512 a[25*i+8]++;
1513 a[25*i+9]++;
1514 a[25*i+10]++;
1515 a[25*i+11]++;
1516 a[25*i+12]++;
1517 a[25*i+13]++;
1518 a[25*i+14]++;
1519 a[25*i+15]++;
1520 }
1521 return new Object[]{ a };
1522 }
1523
1524 @Test
1525 // IR rules difficult because of modulo wrapping with offset after peeling.
1526 static Object[] test15cB(byte[] a) {
1527 // non-power-of-2 scale
1528 for (int i = 0; i < RANGE/64-20; i++) {
1529 a[19*i+0]++;
1530 a[19*i+1]++;
1531 a[19*i+2]++;
1532 a[19*i+3]++;
1533 a[19*i+4]++;
1534 a[19*i+5]++;
1535 a[19*i+6]++;
1536 a[19*i+7]++;
1537 a[19*i+8]++;
1538 a[19*i+9]++;
1539 a[19*i+10]++;
1540 a[19*i+11]++;
1541 a[19*i+12]++;
1542 a[19*i+13]++;
1543 a[19*i+14]++;
1544 a[19*i+15]++;
1545 }
1546 return new Object[]{ a };
1547 }
1548
1549 @Test
1550 static Object[] test16a(byte[] a, short[] b) {
1551 // infinite loop issues
1552 for (int i = 0; i < RANGE/2-20; i++) {
1553 a[2*i+0]++;
1554 a[2*i+1]++;
1555 a[2*i+2]++;
1556 a[2*i+3]++;
1557 a[2*i+4]++;
1558 a[2*i+5]++;
1559 a[2*i+6]++;
1560 a[2*i+7]++;
1561 a[2*i+8]++;
1562 a[2*i+9]++;
1563 a[2*i+10]++;
1564 a[2*i+11]++;
1565 a[2*i+12]++;
1566 a[2*i+13]++;
1567 a[2*i+14]++;
1568
1569 b[2*i+0]++;
1570 b[2*i+1]++;
1571 b[2*i+2]++;
1572 b[2*i+3]++;
1573 }
1574 return new Object[]{ a, b };
1575 }
1576
1577 @Test
1578 static Object[] test16b(byte[] a) {
1579 // infinite loop issues
1580 for (int i = 0; i < RANGE/2-20; i++) {
1581 a[2*i+0]++;
1582 a[2*i+1]++;
1583 a[2*i+2]++;
1584 a[2*i+3]++;
1585 a[2*i+4]++;
1586 a[2*i+5]++;
1587 a[2*i+6]++;
1588 a[2*i+7]++;
1589 a[2*i+8]++;
1590 a[2*i+9]++;
1591 a[2*i+10]++;
1592 a[2*i+11]++;
1593 a[2*i+12]++;
1594 a[2*i+13]++;
1595 a[2*i+14]++;
1596 }
1597 return new Object[]{ a };
1598 }
1599
1600 @Test
1601 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
1602 IRNode.ADD_VL, "> 0",
1603 IRNode.STORE_VECTOR, "> 0"},
1604 applyIfPlatform = {"64-bit", "true"},
1605 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1606 static Object[] test17a(long[] a) {
1607 // Unsafe: vectorizes with profiling (not xcomp)
1608 for (int i = 0; i < RANGE; i++) {
1609 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1610 long v = UNSAFE.getLongUnaligned(a, adr);
1611 UNSAFE.putLongUnaligned(a, adr, v + 1);
1612 }
1613 return new Object[]{ a };
1614 }
1615
1616 @Test
1617 // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs.
1618 static Object[] test17b(long[] a) {
1619 // Not alignable
1620 for (int i = 0; i < RANGE-1; i++) {
1621 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1622 long v = UNSAFE.getLongUnaligned(a, adr);
1623 UNSAFE.putLongUnaligned(a, adr, v + 1);
1624 }
1625 return new Object[]{ a };
1626 }
1627
1628 @Test
1629 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1630 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
1631 IRNode.STORE_VECTOR, "> 0"},
1632 applyIf = {"MaxVectorSize", ">=32"},
1633 applyIfPlatform = {"64-bit", "true"},
1634 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1635 static Object[] test17c(long[] a) {
1636 // Unsafe: aligned vectorizes
1637 for (int i = 0; i < RANGE-1; i+=4) {
1638 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1639 long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1640 long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1641 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1642 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1643 }
1644 return new Object[]{ a };
1645 }
1646
1647 @Test
1648 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1649 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
1650 IRNode.STORE_VECTOR, "> 0"},
1651 applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true", "rvv", "true"},
1652 applyIfPlatform = {"64-bit", "true"},
1653 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"})
1654 // Ensure vector width is large enough to fit 64 byte for longs:
1655 // The offsets are: 25, 33, 57, 65
1656 // In modulo 32: 25, 1, 25, 1 -> does not vectorize
1657 // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes
1658 // This problem is because we compute modulo vector width in memory_alignment.
1659 @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0",
1660 IRNode.ADD_VL, "= 0",
1661 IRNode.STORE_VECTOR, "= 0"},
1662 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1663 applyIfPlatform = {"64-bit", "true"},
1664 applyIf = {"AlignVector", "true"})
1665 static Object[] test17d(long[] a) {
1666 // Not alignable
1667 for (int i = 0; i < RANGE-1; i+=4) {
1668 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1669 long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1670 long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1671 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1672 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1673 }
1674 return new Object[]{ a };
1675 }
1676
1677 @Test
1678 static Object[] test18a(byte[] a, int[] b) {
1679 // scale = 0 --> no iv
1680 for (int i = 0; i < RANGE; i++) {
1681 a[0] = 1;
1682 b[i] = 2;
1683 a[1] = 1;
1684 }
1685 return new Object[]{ a, b };
1686 }
1687
1688 @Test
1689 static Object[] test18b(byte[] a, int[] b) {
1690 // scale = 0 --> no iv
1691 for (int i = 0; i < RANGE; i++) {
1692 a[1] = 1;
1693 b[i] = 2;
1694 a[2] = 1;
1695 }
1696 return new Object[]{ a, b };
1697 }
1698
1699 @Test
1700 static Object[] test19(int[] a, int[] b) {
1701 for (int i = 5000; i > 0; i--) {
1702 a[RANGE_FINAL - i] = b[RANGE_FINAL - i];
1703 }
1704 return new Object[]{ a, b };
1705 }
1706
1707 @Test
1708 static Object[] test20(byte[] a) {
1709 // Example where it is easy to pass alignment check,
1710 // but used to fail the alignment calculation
1711 for (int i = 1; i < RANGE/2-50; i++) {
1712 a[2*i+0+30]++;
1713 a[2*i+1+30]++;
1714 a[2*i+2+30]++;
1715 a[2*i+3+30]++;
1716 }
1717 return new Object[]{ a };
1718 }
1719 }