1 /*
2 * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package compiler.loopopts.superword;
25
26 import compiler.lib.ir_framework.*;
27 import jdk.test.lib.Utils;
28 import jdk.test.whitebox.WhiteBox;
29 import jdk.internal.misc.Unsafe;
30 import java.lang.reflect.Array;
31 import java.util.Map;
32 import java.util.HashMap;
33 import java.util.Random;
34 import java.nio.ByteOrder;
35
36 /*
37 * @test id=NoAlignVector
38 * @bug 8310190
39 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
40 * @modules java.base/jdk.internal.misc
41 * @library /test/lib /
42 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector
43 */
44
45 /*
46 * @test id=AlignVector
47 * @bug 8310190
48 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
49 * @modules java.base/jdk.internal.misc
50 * @library /test/lib /
51 * @run driver compiler.loopopts.superword.TestAlignVector AlignVector
52 */
53
54 /*
55 * @test id=VerifyAlignVector
56 * @bug 8310190
57 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
58 * @modules java.base/jdk.internal.misc
59 * @library /test/lib /
60 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector
61 */
62
63 /*
64 * @test id=NoAlignVector-COH
65 * @bug 8310190
66 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
67 * @modules java.base/jdk.internal.misc
68 * @library /test/lib /
69 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector-COH
70 */
71
72 /*
73 * @test id=VerifyAlignVector-COH
74 * @bug 8310190
75 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
76 * @modules java.base/jdk.internal.misc
77 * @library /test/lib /
78 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector-COH
79 */
80
81 public class TestAlignVector {
82 static int RANGE = 1024*8;
83 static int RANGE_FINAL = 1024*8;
84 private static final Unsafe UNSAFE = Unsafe.getUnsafe();
85 private static final Random RANDOM = Utils.getRandomInstance();
86
87 // Inputs
88 byte[] aB;
89 byte[] bB;
90 byte mB = (byte)31;
91 short[] aS;
92 short[] bS;
93 short mS = (short)0xF0F0;
94 int[] aI;
95 int[] bI;
96 int mI = 0xF0F0F0F0;
97 long[] aL;
98 long[] bL;
99 long mL = 0xF0F0F0F0F0F0F0F0L;
100
101 // List of tests
102 Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
103
104 // List of gold, the results from the first run before compilation
105 Map<String,Object[]> golds = new HashMap<String,Object[]>();
106
107 interface TestFunction {
108 Object[] run();
109 }
110
111 public static void main(String[] args) {
112 TestFramework framework = new TestFramework(TestAlignVector.class);
113 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
114 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250");
115
116 switch (args[0]) {
117 case "NoAlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); }
118 case "AlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); }
119 case "VerifyAlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
120 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
121 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
122 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
123 }
124 framework.start();
125 }
126
127 public TestAlignVector() {
128 // Generate input once
129 aB = generateB();
130 bB = generateB();
131 aS = generateS();
132 bS = generateS();
133 aI = generateI();
134 bI = generateI();
135 aL = generateL();
136 bL = generateL();
137
138 // Add all tests to list
139 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
140 tests.put("test1a", () -> { return test1a(aB.clone(), bB.clone(), mB); });
141 tests.put("test1b", () -> { return test1b(aB.clone(), bB.clone(), mB); });
142 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
143 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
144 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
145 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
146 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
147 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
149 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
150 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
151
152 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
153 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
154 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
155 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
156 tests.put("test10e", () -> { return test10e(aS.clone(), bS.clone(), mS); });
157
158 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
159 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
160 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
161 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
162
163 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
164 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
165 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
166 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
167
168 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
169 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
170 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
171 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
172
173 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
174 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
175 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
176 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
177
178 tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); });
179
180 tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); });
181 tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); });
182 tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); });
183 tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
184
185 tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); });
186 tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); });
187 tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); });
188 tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
189
190 tests.put("test14aB", () -> { return test14aB(aB.clone()); });
191 tests.put("test14bB", () -> { return test14bB(aB.clone()); });
192 tests.put("test14cB", () -> { return test14cB(aB.clone()); });
193 tests.put("test14dB", () -> { return test14dB(aB.clone()); });
194 tests.put("test14eB", () -> { return test14eB(aB.clone()); });
195 tests.put("test14fB", () -> { return test14fB(aB.clone()); });
196
197 tests.put("test15aB", () -> { return test15aB(aB.clone()); });
198 tests.put("test15bB", () -> { return test15bB(aB.clone()); });
199 tests.put("test15cB", () -> { return test15cB(aB.clone()); });
200
201 tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); });
202 tests.put("test16b", () -> { return test16b(aB.clone()); });
203
204 tests.put("test17a", () -> { return test17a(aL.clone()); });
205 tests.put("test17b", () -> { return test17b(aL.clone()); });
206 tests.put("test17c", () -> { return test17c(aL.clone()); });
207 tests.put("test17d", () -> { return test17d(aL.clone()); });
208
209 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
210 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
211
212 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
213 tests.put("test20", () -> { return test20(aB.clone()); });
214
215 // Compute gold value for all test methods before compilation
216 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
217 String name = entry.getKey();
218 TestFunction test = entry.getValue();
219 Object[] gold = test.run();
220 golds.put(name, gold);
221 }
222 }
223
224 @Warmup(100)
225 @Run(test = {"test0",
226 "test1a",
227 "test1b",
228 "test2",
229 "test3",
230 "test4",
231 "test5",
232 "test6",
233 "test7",
234 "test8",
235 "test9",
236 "test10a",
237 "test10b",
238 "test10c",
239 "test10d",
240 "test10e",
241 "test11aB",
242 "test11aS",
243 "test11aI",
244 "test11aL",
245 "test11bB",
246 "test11bS",
247 "test11bI",
248 "test11bL",
249 "test11cB",
250 "test11cS",
251 "test11cI",
252 "test11cL",
253 "test11dB",
254 "test11dS",
255 "test11dI",
256 "test11dL",
257 "test12",
258 "test13aIL",
259 "test13aIB",
260 "test13aIS",
261 "test13aBSIL",
262 "test13bIL",
263 "test13bIB",
264 "test13bIS",
265 "test13bBSIL",
266 "test14aB",
267 "test14bB",
268 "test14cB",
269 "test14dB",
270 "test14eB",
271 "test14fB",
272 "test15aB",
273 "test15bB",
274 "test15cB",
275 "test16a",
276 "test16b",
277 "test17a",
278 "test17b",
279 "test17c",
280 "test17d",
281 "test18a",
282 "test18b",
283 "test19",
284 "test20"})
285 public void runTests() {
286 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
287 String name = entry.getKey();
288 TestFunction test = entry.getValue();
289 // Recall gold value from before compilation
290 Object[] gold = golds.get(name);
291 // Compute new result
292 Object[] result = test.run();
293 // Compare gold and new result
294 verify(name, gold, result);
295 }
296 }
297
298 static byte[] generateB() {
299 byte[] a = new byte[RANGE];
300 for (int i = 0; i < a.length; i++) {
301 a[i] = (byte)RANDOM.nextInt();
302 }
303 return a;
304 }
305
306 static short[] generateS() {
307 short[] a = new short[RANGE];
308 for (int i = 0; i < a.length; i++) {
309 a[i] = (short)RANDOM.nextInt();
310 }
311 return a;
312 }
313
314 static int[] generateI() {
315 int[] a = new int[RANGE];
316 for (int i = 0; i < a.length; i++) {
317 a[i] = RANDOM.nextInt();
318 }
319 return a;
320 }
321
322 static long[] generateL() {
323 long[] a = new long[RANGE];
324 for (int i = 0; i < a.length; i++) {
325 a[i] = RANDOM.nextLong();
326 }
327 return a;
328 }
329
330 static void verify(String name, Object[] gold, Object[] result) {
331 if (gold.length != result.length) {
332 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
333 gold.length + ", result.length = " + result.length);
334 }
335 for (int i = 0; i < gold.length; i++) {
336 Object g = gold[i];
337 Object r = result[i];
338 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
339 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
340 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
341 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
342 }
343 if (g == r) {
344 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
345 " gold[" + i + "] == result[" + i + "]");
346 }
347 if (Array.getLength(g) != Array.getLength(r)) {
348 throw new RuntimeException("verify " + name + ": arrays must have same length:" +
349 " gold[" + i + "].length = " + Array.getLength(g) +
350 " result[" + i + "].length = " + Array.getLength(r));
351 }
352 Class c = g.getClass().getComponentType();
353 if (c == byte.class) {
354 verifyB(name, i, (byte[])g, (byte[])r);
355 } else if (c == short.class) {
356 verifyS(name, i, (short[])g, (short[])r);
357 } else if (c == int.class) {
358 verifyI(name, i, (int[])g, (int[])r);
359 } else if (c == long.class) {
360 verifyL(name, i, (long[])g, (long[])r);
361 } else {
362 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
363 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
364 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
365 }
366 }
367 }
368
369 static void verifyB(String name, int i, byte[] g, byte[] r) {
370 for (int j = 0; j < g.length; j++) {
371 if (g[j] != r[j]) {
372 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
373 " gold[" + i + "][" + j + "] = " + g[j] +
374 " result[" + i + "][" + j + "] = " + r[j]);
375 }
376 }
377 }
378
379 static void verifyS(String name, int i, short[] g, short[] r) {
380 for (int j = 0; j < g.length; j++) {
381 if (g[j] != r[j]) {
382 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
383 " gold[" + i + "][" + j + "] = " + g[j] +
384 " result[" + i + "][" + j + "] = " + r[j]);
385 }
386 }
387 }
388
389 static void verifyI(String name, int i, int[] g, int[] r) {
390 for (int j = 0; j < g.length; j++) {
391 if (g[j] != r[j]) {
392 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
393 " gold[" + i + "][" + j + "] = " + g[j] +
394 " result[" + i + "][" + j + "] = " + r[j]);
395 }
396 }
397 }
398
399 static void verifyL(String name, int i, long[] g, long[] r) {
400 for (int j = 0; j < g.length; j++) {
401 if (g[j] != r[j]) {
402 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
403 " gold[" + i + "][" + j + "] = " + g[j] +
404 " result[" + i + "][" + j + "] = " + r[j]);
405 }
406 }
407 }
408
409 @Test
410 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
411 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
412 IRNode.STORE_VECTOR, "> 0"},
413 applyIf = {"MaxVectorSize", ">=8"},
414 applyIfPlatform = {"64-bit", "true"},
415 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
416 static Object[] test0(byte[] a, byte[] b, byte mask) {
417 for (int i = 0; i < RANGE; i+=8) {
418 // Safe to vectorize with AlignVector
419 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
420 b[i+1] = (byte)(a[i+1] & mask);
421 b[i+2] = (byte)(a[i+2] & mask);
422 b[i+3] = (byte)(a[i+3] & mask);
423 }
424 return new Object[]{ a, b };
425 }
426
427 @Test
428 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
429 IRNode.AND_VB, "> 0",
430 IRNode.STORE_VECTOR, "> 0"},
431 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
432 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
433 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
434 applyIfPlatform = {"64-bit", "true"},
435 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
436 static Object[] test1a(byte[] a, byte[] b, byte mask) {
437 for (int i = 0; i < RANGE; i+=8) {
438 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
439 b[i+1] = (byte)(a[i+1] & mask);
440 b[i+2] = (byte)(a[i+2] & mask);
441 b[i+3] = (byte)(a[i+3] & mask);
442 b[i+4] = (byte)(a[i+4] & mask);
443 b[i+5] = (byte)(a[i+5] & mask);
444 b[i+6] = (byte)(a[i+6] & mask);
445 b[i+7] = (byte)(a[i+7] & mask);
446 }
447 return new Object[]{ a, b };
448 }
449
450 @Test
451 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
452 IRNode.AND_VB, "> 0",
453 IRNode.STORE_VECTOR, "> 0"},
454 applyIfOr = {"UseCompactObjectHeaders", "true", "AlignVector", "false"},
455 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
456 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
457 applyIfPlatform = {"64-bit", "true"},
458 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
459 static Object[] test1b(byte[] a, byte[] b, byte mask) {
460 for (int i = 4; i < RANGE-8; i+=8) {
461 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4 + iter*8
462 b[i+1] = (byte)(a[i+1] & mask);
463 b[i+2] = (byte)(a[i+2] & mask);
464 b[i+3] = (byte)(a[i+3] & mask);
465 b[i+4] = (byte)(a[i+4] & mask);
466 b[i+5] = (byte)(a[i+5] & mask);
467 b[i+6] = (byte)(a[i+6] & mask);
468 b[i+7] = (byte)(a[i+7] & mask);
469 }
470 return new Object[]{ a, b };
471 }
472
473 @Test
474 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
475 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
476 IRNode.STORE_VECTOR, "> 0"},
477 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
478 applyIfPlatform = {"64-bit", "true"},
479 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
480 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
481 IRNode.AND_VB, "= 0",
482 IRNode.STORE_VECTOR, "= 0"},
483 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
484 applyIfPlatform = {"64-bit", "true"},
485 applyIf = {"AlignVector", "true"})
486 static Object[] test2(byte[] a, byte[] b, byte mask) {
487 for (int i = 0; i < RANGE; i+=8) {
488 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
489 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
490 b[i+4] = (byte)(a[i+4] & mask);
491 b[i+5] = (byte)(a[i+5] & mask);
492 b[i+6] = (byte)(a[i+6] & mask);
493 }
494 return new Object[]{ a, b };
495 }
496
497 @Test
498 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
499 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
500 IRNode.STORE_VECTOR, "> 0"},
501 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
502 applyIfPlatform = {"64-bit", "true"},
503 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
504 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
505 IRNode.AND_VB, "= 0",
506 IRNode.STORE_VECTOR, "= 0"},
507 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
508 applyIfPlatform = {"64-bit", "true"},
509 applyIf = {"AlignVector", "true"})
510 static Object[] test3(byte[] a, byte[] b, byte mask) {
511 for (int i = 0; i < RANGE; i+=8) {
512 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
513
514 // Problematic for AlignVector
515 b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0
516
517 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes
518 b[i+4] = (byte)(a[i+4] & mask);
519 b[i+5] = (byte)(a[i+5] & mask);
520 b[i+6] = (byte)(a[i+6] & mask);
521 }
522 return new Object[]{ a, b };
523 }
524
525 @Test
526 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
527 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0",
528 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
529 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0",
530 IRNode.STORE_VECTOR, "> 0"},
531 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
532 applyIfPlatform = {"64-bit", "true"},
533 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
534 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
535 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
536 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
537 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
538 IRNode.STORE_VECTOR, "> 0"},
539 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
540 applyIfPlatform = {"64-bit", "true"},
541 applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"})
542 static Object[] test4(byte[] a, byte[] b, byte mask) {
543 for (int i = 0; i < RANGE/16; i++) {
544 // Problematic for AlignVector
545 b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned
546 b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask);
547 b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask);
548 b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask);
549
550 b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned
551 b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask);
552 b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask);
553 b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask);
554 b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask);
555 b[i*16 + 10] = (byte)(a[i*16 + 10] & mask);
556 b[i*16 + 11] = (byte)(a[i*16 + 11] & mask);
557 b[i*16 + 12] = (byte)(a[i*16 + 12] & mask);
558 }
559 return new Object[]{ a, b };
560 }
561
562 @Test
563 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
564 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
565 IRNode.STORE_VECTOR, "> 0"},
566 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
567 applyIfPlatform = {"64-bit", "true"},
568 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
569 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
570 IRNode.AND_VB, "= 0",
571 IRNode.STORE_VECTOR, "= 0"},
572 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
573 applyIfPlatform = {"64-bit", "true"},
574 applyIf = {"AlignVector", "true"})
575 static Object[] test5(byte[] a, byte[] b, byte mask, int inv) {
576 for (int i = 0; i < RANGE; i+=8) {
577 // Cannot align with AlignVector because of invariant
578 b[i+inv+0] = (byte)(a[i+inv+0] & mask);
579
580 b[i+inv+3] = (byte)(a[i+inv+3] & mask);
581 b[i+inv+4] = (byte)(a[i+inv+4] & mask);
582 b[i+inv+5] = (byte)(a[i+inv+5] & mask);
583 b[i+inv+6] = (byte)(a[i+inv+6] & mask);
584 }
585 return new Object[]{ a, b };
586 }
587
588 @Test
589 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
590 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
591 IRNode.STORE_VECTOR, "> 0"},
592 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
593 applyIfPlatform = {"64-bit", "true"},
594 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
595 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
596 IRNode.AND_VB, "= 0",
597 IRNode.STORE_VECTOR, "= 0"},
598 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
599 applyIfPlatform = {"64-bit", "true"},
600 applyIf = {"AlignVector", "true"})
601 static Object[] test6(byte[] a, byte[] b, byte mask) {
602 for (int i = 0; i < RANGE/8; i+=2) {
603 // Cannot align with AlignVector because offset is odd
604 b[i*4+0] = (byte)(a[i*4+0] & mask);
605
606 b[i*4+3] = (byte)(a[i*4+3] & mask);
607 b[i*4+4] = (byte)(a[i*4+4] & mask);
608 b[i*4+5] = (byte)(a[i*4+5] & mask);
609 b[i*4+6] = (byte)(a[i*4+6] & mask);
610 }
611 return new Object[]{ a, b };
612 }
613
614 @Test
615 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
616 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
617 IRNode.STORE_VECTOR, "> 0"},
618 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"},
619 applyIfPlatform = {"64-bit", "true"},
620 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
621 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
622 IRNode.AND_VS, "= 0",
623 IRNode.STORE_VECTOR, "= 0"},
624 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
625 applyIfPlatform = {"64-bit", "true"},
626 applyIf = {"AlignVector", "true"})
627 static Object[] test7(short[] a, short[] b, short mask) {
628 for (int i = 0; i < RANGE/8; i+=2) {
629 // Cannot align with AlignVector because offset is odd
630 b[i*4+0] = (short)(a[i*4+0] & mask);
631
632 b[i*4+3] = (short)(a[i*4+3] & mask);
633 b[i*4+4] = (short)(a[i*4+4] & mask);
634 b[i*4+5] = (short)(a[i*4+5] & mask);
635 b[i*4+6] = (short)(a[i*4+6] & mask);
636 }
637 return new Object[]{ a, b };
638 }
639
640 @Test
641 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
642 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
643 IRNode.STORE_VECTOR, "> 0"},
644 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
645 applyIfPlatform = {"64-bit", "true"},
646 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
647 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
648 IRNode.AND_VB, "= 0",
649 IRNode.STORE_VECTOR, "= 0"},
650 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
651 applyIfPlatform = {"64-bit", "true"},
652 applyIf = {"AlignVector", "true"})
653 static Object[] test8(byte[] a, byte[] b, byte mask, int init) {
654 for (int i = init; i < RANGE; i+=8) {
655 // Cannot align with AlignVector because of invariant (variable init becomes invar)
656 b[i+0] = (byte)(a[i+0] & mask);
657
658 b[i+3] = (byte)(a[i+3] & mask);
659 b[i+4] = (byte)(a[i+4] & mask);
660 b[i+5] = (byte)(a[i+5] & mask);
661 b[i+6] = (byte)(a[i+6] & mask);
662 }
663 return new Object[]{ a, b };
664 }
665
666 @Test
667 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
668 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
669 IRNode.STORE_VECTOR, "> 0"},
670 applyIf = {"MaxVectorSize", ">=8"},
671 applyIfPlatform = {"64-bit", "true"},
672 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
673 static Object[] test9(byte[] a, byte[] b, byte mask) {
674 // known non-zero init value does not affect offset, but has implicit effect on iv
675 for (int i = 13; i < RANGE-8; i+=8) {
676 b[i+0] = (byte)(a[i+0] & mask);
677
678 b[i+3] = (byte)(a[i+3] & mask);
679 b[i+4] = (byte)(a[i+4] & mask);
680 b[i+5] = (byte)(a[i+5] & mask);
681 b[i+6] = (byte)(a[i+6] & mask);
682 }
683 return new Object[]{ a, b };
684 }
685
686 @Test
687 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
688 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
689 IRNode.STORE_VECTOR, "> 0"},
690 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
691 applyIfPlatform = {"64-bit", "true"},
692 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
693 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
694 IRNode.AND_VB, "= 0",
695 IRNode.STORE_VECTOR, "= 0"},
696 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
697 applyIfPlatform = {"64-bit", "true"},
698 applyIf = {"AlignVector", "true"})
699 static Object[] test10a(byte[] a, byte[] b, byte mask) {
700 // This is not alignable with pre-loop, because of odd init.
701 for (int i = 3; i < RANGE-8; i+=8) {
702 b[i+0] = (byte)(a[i+0] & mask);
703 b[i+1] = (byte)(a[i+1] & mask);
704 b[i+2] = (byte)(a[i+2] & mask);
705 b[i+3] = (byte)(a[i+3] & mask);
706 }
707 return new Object[]{ a, b };
708 }
709
710 @Test
711 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
712 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
713 IRNode.STORE_VECTOR, "> 0"},
714 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
715 applyIfPlatform = {"64-bit", "true"},
716 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
717 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
718 IRNode.AND_VB, "= 0",
719 IRNode.STORE_VECTOR, "= 0"},
720 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
721 applyIfPlatform = {"64-bit", "true"},
722 applyIf = {"AlignVector", "true"})
723 static Object[] test10b(byte[] a, byte[] b, byte mask) {
724 // This is not alignable with pre-loop, because of odd init.
725 // Seems not correctly handled.
726 for (int i = 13; i < RANGE-8; i+=8) {
727 b[i+0] = (byte)(a[i+0] & mask);
728 b[i+1] = (byte)(a[i+1] & mask);
729 b[i+2] = (byte)(a[i+2] & mask);
730 b[i+3] = (byte)(a[i+3] & mask);
731 }
732 return new Object[]{ a, b };
733 }
734
735 @Test
736 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
737 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
738 IRNode.STORE_VECTOR, "> 0"},
739 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
740 applyIfPlatform = {"64-bit", "true"},
741 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
742 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
743 IRNode.AND_VS, "= 0",
744 IRNode.STORE_VECTOR, "= 0"},
745 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
746 applyIfPlatform = {"64-bit", "true"},
747 applyIf = {"AlignVector", "true"})
748 static Object[] test10c(short[] a, short[] b, short mask) {
749 // This is not alignable with pre-loop, because of odd init.
750 // Seems not correctly handled with MaxVectorSize >= 32.
751 for (int i = 13; i < RANGE-8; i+=8) {
752 b[i+0] = (short)(a[i+0] & mask);
753 b[i+1] = (short)(a[i+1] & mask);
754 b[i+2] = (short)(a[i+2] & mask);
755 b[i+3] = (short)(a[i+3] & mask);
756 }
757 return new Object[]{ a, b };
758 }
759
760 @Test
761 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
762 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
763 IRNode.STORE_VECTOR, "> 0"},
764 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "false"},
765 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
766 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
767 applyIfPlatform = {"64-bit", "true"},
768 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
769 static Object[] test10d(short[] a, short[] b, short mask) {
770 for (int i = 13; i < RANGE-16; i+=8) {
771 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
772 b[i+0+3] = (short)(a[i+0+3] & mask);
773 b[i+1+3] = (short)(a[i+1+3] & mask);
774 b[i+2+3] = (short)(a[i+2+3] & mask);
775 b[i+3+3] = (short)(a[i+3+3] & mask);
776 }
777 return new Object[]{ a, b };
778 }
779
780 @Test
781 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
782 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
783 IRNode.STORE_VECTOR, "> 0"},
784 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "true"},
785 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
786 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
787 applyIfPlatform = {"64-bit", "true"},
788 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
789 static Object[] test10e(short[] a, short[] b, short mask) {
790 for (int i = 11; i < RANGE-16; i+=8) {
791 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 11) + iter*16
792 b[i+0+3] = (short)(a[i+0+3] & mask);
793 b[i+1+3] = (short)(a[i+1+3] & mask);
794 b[i+2+3] = (short)(a[i+2+3] & mask);
795 b[i+3+3] = (short)(a[i+3+3] & mask);
796 }
797 return new Object[]{ a, b };
798 }
799
800 @Test
801 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
802 IRNode.AND_VB, "> 0",
803 IRNode.STORE_VECTOR, "> 0"},
804 applyIfPlatform = {"64-bit", "true"},
805 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
806 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
807 for (int i = 0; i < RANGE; i++) {
808 // always alignable
809 b[i+0] = (byte)(a[i+0] & mask);
810 }
811 return new Object[]{ a, b };
812 }
813
814 @Test
815 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
816 IRNode.AND_VS, "> 0",
817 IRNode.STORE_VECTOR, "> 0"},
818 applyIfPlatform = {"64-bit", "true"},
819 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
820 static Object[] test11aS(short[] a, short[] b, short mask) {
821 for (int i = 0; i < RANGE; i++) {
822 // always alignable
823 b[i+0] = (short)(a[i+0] & mask);
824 }
825 return new Object[]{ a, b };
826 }
827
828 @Test
829 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
830 IRNode.AND_VI, "> 0",
831 IRNode.STORE_VECTOR, "> 0"},
832 applyIfPlatform = {"64-bit", "true"},
833 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
834 static Object[] test11aI(int[] a, int[] b, int mask) {
835 for (int i = 0; i < RANGE; i++) {
836 // always alignable
837 b[i+0] = (int)(a[i+0] & mask);
838 }
839 return new Object[]{ a, b };
840 }
841
842 @Test
843 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
844 IRNode.AND_VL, "> 0",
845 IRNode.STORE_VECTOR, "> 0"},
846 applyIfPlatform = {"64-bit", "true"},
847 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
848 static Object[] test11aL(long[] a, long[] b, long mask) {
849 for (int i = 0; i < RANGE; i++) {
850 // always alignable
851 b[i+0] = (long)(a[i+0] & mask);
852 }
853 return new Object[]{ a, b };
854 }
855
856 @Test
857 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
858 IRNode.AND_VB, "> 0",
859 IRNode.STORE_VECTOR, "> 0"},
860 applyIfPlatform = {"64-bit", "true"},
861 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
862 static Object[] test11bB(byte[] a, byte[] b, byte mask) {
863 for (int i = 1; i < RANGE; i++) {
864 // always alignable
865 b[i+0] = (byte)(a[i+0] & mask);
866 }
867 return new Object[]{ a, b };
868 }
869
870 @Test
871 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
872 IRNode.AND_VS, "> 0",
873 IRNode.STORE_VECTOR, "> 0"},
874 applyIfPlatform = {"64-bit", "true"},
875 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
876 static Object[] test11bS(short[] a, short[] b, short mask) {
877 for (int i = 1; i < RANGE; i++) {
878 // always alignable
879 b[i+0] = (short)(a[i+0] & mask);
880 }
881 return new Object[]{ a, b };
882 }
883
884 @Test
885 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
886 IRNode.AND_VI, "> 0",
887 IRNode.STORE_VECTOR, "> 0"},
888 applyIfPlatform = {"64-bit", "true"},
889 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
890 static Object[] test11bI(int[] a, int[] b, int mask) {
891 for (int i = 1; i < RANGE; i++) {
892 // always alignable
893 b[i+0] = (int)(a[i+0] & mask);
894 }
895 return new Object[]{ a, b };
896 }
897
898 @Test
899 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
900 IRNode.AND_VL, "> 0",
901 IRNode.STORE_VECTOR, "> 0"},
902 applyIfPlatform = {"64-bit", "true"},
903 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
904 static Object[] test11bL(long[] a, long[] b, long mask) {
905 for (int i = 1; i < RANGE; i++) {
906 // always alignable
907 b[i+0] = (long)(a[i+0] & mask);
908 }
909 return new Object[]{ a, b };
910 }
911
912 @Test
913 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
914 IRNode.AND_VB, "> 0",
915 IRNode.STORE_VECTOR, "> 0"},
916 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
917 applyIfPlatform = {"64-bit", "true"},
918 applyIf = {"AlignVector", "false"})
919 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
920 IRNode.AND_VB, "= 0",
921 IRNode.STORE_VECTOR, "= 0"},
922 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
923 applyIfPlatform = {"64-bit", "true"},
924 applyIf = {"AlignVector", "true"})
925 static Object[] test11cB(byte[] a, byte[] b, byte mask) {
926 for (int i = 1; i < RANGE-1; i++) {
927 // 1 byte offset -> not alignable with AlignVector
928 b[i+0] = (byte)(a[i+1] & mask);
929 }
930 return new Object[]{ a, b };
931 }
932
933 @Test
934 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
935 IRNode.AND_VS, "> 0",
936 IRNode.STORE_VECTOR, "> 0"},
937 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
938 applyIfPlatform = {"64-bit", "true"},
939 applyIf = {"AlignVector", "false"})
940 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
941 IRNode.AND_VS, "= 0",
942 IRNode.STORE_VECTOR, "= 0"},
943 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
944 applyIfPlatform = {"64-bit", "true"},
945 applyIf = {"AlignVector", "true"})
946 static Object[] test11cS(short[] a, short[] b, short mask) {
947 for (int i = 1; i < RANGE-1; i++) {
948 // 2 byte offset -> not alignable with AlignVector
949 b[i+0] = (short)(a[i+1] & mask);
950 }
951 return new Object[]{ a, b };
952 }
953
954 @Test
955 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
956 IRNode.AND_VI, "> 0",
957 IRNode.STORE_VECTOR, "> 0"},
958 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
959 applyIfPlatform = {"64-bit", "true"},
960 applyIf = {"AlignVector", "false"})
961 @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
962 IRNode.AND_VI, "= 0",
963 IRNode.STORE_VECTOR, "= 0"},
964 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
965 applyIfPlatform = {"64-bit", "true"},
966 applyIf = {"AlignVector", "true"})
967 static Object[] test11cI(int[] a, int[] b, int mask) {
968 for (int i = 1; i < RANGE-1; i++) {
969 // 4 byte offset -> not alignable with AlignVector
970 b[i+0] = (int)(a[i+1] & mask);
971 }
972 return new Object[]{ a, b };
973 }
974
975 @Test
976 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
977 IRNode.AND_VL, "> 0",
978 IRNode.STORE_VECTOR, "> 0"},
979 applyIfPlatform = {"64-bit", "true"},
980 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
981 static Object[] test11cL(long[] a, long[] b, long mask) {
982 for (int i = 1; i < RANGE-1; i++) {
983 // always alignable (8 byte offset)
984 b[i+0] = (long)(a[i+1] & mask);
985 }
986 return new Object[]{ a, b };
987 }
988
989 @Test
990 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
991 IRNode.AND_VB, "> 0",
992 IRNode.STORE_VECTOR, "> 0"},
993 applyIfPlatform = {"64-bit", "true"},
994 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
995 static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) {
996 for (int i = 0; i < RANGE; i++) {
997 b[i+0+invar] = (byte)(a[i+0+invar] & mask);
998 }
999 return new Object[]{ a, b };
1000 }
1001
1002 @Test
1003 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1004 IRNode.AND_VS, "> 0",
1005 IRNode.STORE_VECTOR, "> 0"},
1006 applyIfPlatform = {"64-bit", "true"},
1007 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1008 static Object[] test11dS(short[] a, short[] b, short mask, int invar) {
1009 for (int i = 0; i < RANGE; i++) {
1010 b[i+0+invar] = (short)(a[i+0+invar] & mask);
1011 }
1012 return new Object[]{ a, b };
1013 }
1014
1015 @Test
1016 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1017 IRNode.AND_VI, "> 0",
1018 IRNode.STORE_VECTOR, "> 0"},
1019 applyIfPlatform = {"64-bit", "true"},
1020 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1021 static Object[] test11dI(int[] a, int[] b, int mask, int invar) {
1022 for (int i = 0; i < RANGE; i++) {
1023 b[i+0+invar] = (int)(a[i+0+invar] & mask);
1024 }
1025 return new Object[]{ a, b };
1026 }
1027
1028 @Test
1029 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
1030 IRNode.AND_VL, "> 0",
1031 IRNode.STORE_VECTOR, "> 0"},
1032 applyIfPlatform = {"64-bit", "true"},
1033 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1034 static Object[] test11dL(long[] a, long[] b, long mask, int invar) {
1035 for (int i = 0; i < RANGE; i++) {
1036 b[i+0+invar] = (long)(a[i+0+invar] & mask);
1037 }
1038 return new Object[]{ a, b };
1039 }
1040
1041 @Test
1042 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
1043 IRNode.AND_VB, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
1044 IRNode.STORE_VECTOR, "> 0"},
1045 applyIfPlatform = {"64-bit", "true"},
1046 applyIf = {"AlignVector", "false"},
1047 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1048 static Object[] test12(byte[] a, byte[] b, byte mask) {
1049 for (int i = 0; i < RANGE/16; i++) {
1050 // Non-power-of-2 stride. Vectorization of 4 bytes, then 2-bytes gap.
1051 b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask);
1052 b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask);
1053 b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask);
1054 b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask);
1055 }
1056 return new Object[]{ a, b };
1057 }
1058
1059 @Test
1060 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1061 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1062 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1063 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1064 IRNode.STORE_VECTOR, "> 0"},
1065 applyIfPlatform = {"64-bit", "true"},
1066 applyIfCPUFeature = {"avx2", "true"})
1067 // require avx to ensure vectors are larger than what unrolling produces
1068 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1069 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1070 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1071 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1072 IRNode.STORE_VECTOR, "> 0"},
1073 applyIfPlatform = {"riscv64", "true"},
1074 applyIfCPUFeature = {"rvv", "true"},
1075 applyIf = {"MaxVectorSize", ">=32"})
1076 static Object[] test13aIL(int[] a, long[] b) {
1077 for (int i = 0; i < RANGE; i++) {
1078 a[i]++;
1079 b[i]++;
1080 }
1081 return new Object[]{ a, b };
1082 }
1083
1084 @Test
1085 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1086 IRNode.LOAD_VECTOR_I, "> 0",
1087 IRNode.ADD_VB, "> 0",
1088 IRNode.ADD_VI, "> 0",
1089 IRNode.STORE_VECTOR, "> 0"},
1090 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1091 applyIfPlatform = {"64-bit", "true"},
1092 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1093 static Object[] test13aIB(int[] a, byte[] b) {
1094 for (int i = 0; i < RANGE; i++) {
1095 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1096 // = 16 (or 12 if UseCompactObjectHeaders=true)
1097 a[i]++;
1098 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1099 // = 16 (or 12 if UseCompactObjectHeaders=true)
1100 b[i]++;
1101 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1102 // If UseCompactObjectHeaders=false:
1103 // a: 0, 8, 16, 24, 32, ...
1104 // b: 0, 2, 4, 6, 8, ...
1105 // -> Ok, aligns every 8th iteration.
1106 // If UseCompactObjectHeaders=true:
1107 // a: 4, 12, 20, 28, 36, ...
1108 // b: 1, 3, 5, 7, 9, ...
1109 // -> we can never align both vectors!
1110 }
1111 return new Object[]{ a, b };
1112 }
1113
1114 @Test
1115 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1116 IRNode.LOAD_VECTOR_S, "> 0",
1117 IRNode.ADD_VI, "> 0",
1118 IRNode.ADD_VS, "> 0",
1119 IRNode.STORE_VECTOR, "> 0"},
1120 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1121 applyIfPlatform = {"64-bit", "true"},
1122 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1123 static Object[] test13aIS(int[] a, short[] b) {
1124 for (int i = 0; i < RANGE; i++) {
1125 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4*iter
1126 // = 16 (or 12 if UseCompactObjectHeaders=true)
1127 a[i]++;
1128 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1129 // = 16 (or 12 if UseCompactObjectHeaders=true)
1130 b[i]++;
1131 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1132 // If UseCompactObjectHeaders=false:
1133 // a: iter % 2 == 0
1134 // b: iter % 4 == 0
1135 // -> Ok, aligns every 4th iteration.
1136 // If UseCompactObjectHeaders=true:
1137 // a: iter % 2 = 1
1138 // b: iter % 4 = 2
1139 // -> we can never align both vectors!
1140 }
1141 return new Object[]{ a, b };
1142 }
1143
1144 @Test
1145 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1146 IRNode.LOAD_VECTOR_S, "> 0",
1147 IRNode.LOAD_VECTOR_I, "> 0",
1148 IRNode.LOAD_VECTOR_L, "> 0",
1149 IRNode.ADD_VB, "> 0",
1150 IRNode.ADD_VS, "> 0",
1151 IRNode.ADD_VI, "> 0",
1152 IRNode.ADD_VL, "> 0",
1153 IRNode.STORE_VECTOR, "> 0"},
1154 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1155 applyIfPlatform = {"64-bit", "true"},
1156 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1157 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1158 for (int i = 0; i < RANGE; i++) {
1159 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1160 // = 16 (or 12 if UseCompactObjectHeaders=true)
1161 a[i]++;
1162 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1163 // = 16 (or 12 if UseCompactObjectHeaders=true)
1164 b[i]++;
1165 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1166 // = 16 (or 12 if UseCompactObjectHeaders=true)
1167 c[i]++;
1168 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8*iter
1169 // = 16 (always)
1170 d[i]++;
1171 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1172 // a: iter % 8 = 4
1173 // c: iter % 2 = 1
1174 // -> can never align both vectors!
1175 }
1176 return new Object[]{ a, b, c, d };
1177 }
1178
1179 @Test
1180 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1181 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1182 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1183 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1184 IRNode.STORE_VECTOR, "> 0"},
1185 applyIfPlatform = {"64-bit", "true"},
1186 applyIfCPUFeature = {"avx2", "true"})
1187 // require avx to ensure vectors are larger than what unrolling produces
1188 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1189 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1190 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1191 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1192 IRNode.STORE_VECTOR, "> 0"},
1193 applyIfPlatform = {"riscv64", "true"},
1194 applyIfCPUFeature = {"rvv", "true"},
1195 applyIf = {"MaxVectorSize", ">=32"})
1196 static Object[] test13bIL(int[] a, long[] b) {
1197 for (int i = 1; i < RANGE; i++) {
1198 a[i]++;
1199 b[i]++;
1200 }
1201 return new Object[]{ a, b };
1202 }
1203
1204 @Test
1205 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1206 IRNode.LOAD_VECTOR_I, "> 0",
1207 IRNode.ADD_VB, "> 0",
1208 IRNode.ADD_VI, "> 0",
1209 IRNode.STORE_VECTOR, "> 0"},
1210 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1211 applyIfPlatform = {"64-bit", "true"},
1212 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1213 static Object[] test13bIB(int[] a, byte[] b) {
1214 for (int i = 1; i < RANGE; i++) {
1215 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1216 // = 16 (or 12 if UseCompactObjectHeaders=true)
1217 a[i]++;
1218 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1219 // = 16 (or 12 if UseCompactObjectHeaders=true)
1220 b[i]++;
1221 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1222 // a: iter % 2 = 0
1223 // b: iter % 8 = 3
1224 // -> can never align both vectors!
1225 }
1226 return new Object[]{ a, b };
1227 }
1228
1229 @Test
1230 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1231 IRNode.LOAD_VECTOR_S, "> 0",
1232 IRNode.ADD_VI, "> 0",
1233 IRNode.ADD_VS, "> 0",
1234 IRNode.STORE_VECTOR, "> 0"},
1235 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1236 applyIfPlatform = {"64-bit", "true"},
1237 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1238 static Object[] test13bIS(int[] a, short[] b) {
1239 for (int i = 1; i < RANGE; i++) {
1240 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1241 // = 16 (or 12 if UseCompactObjectHeaders=true)
1242 a[i]++;
1243 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1244 // = 16 (or 12 if UseCompactObjectHeaders=true)
1245 b[i]++;
1246 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1247 // a: iter % 2 = 0
1248 // b: iter % 4 = 1
1249 // -> can never align both vectors!
1250 }
1251 return new Object[]{ a, b };
1252 }
1253
1254 @Test
1255 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1256 IRNode.LOAD_VECTOR_S, "> 0",
1257 IRNode.LOAD_VECTOR_I, "> 0",
1258 IRNode.LOAD_VECTOR_L, "> 0",
1259 IRNode.ADD_VB, "> 0",
1260 IRNode.ADD_VS, "> 0",
1261 IRNode.ADD_VI, "> 0",
1262 IRNode.ADD_VL, "> 0",
1263 IRNode.STORE_VECTOR, "> 0"},
1264 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1265 applyIfPlatform = {"64-bit", "true"},
1266 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1267 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1268 for (int i = 1; i < RANGE; i++) {
1269 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1270 // = 16 (or 12 if UseCompactObjectHeaders=true)
1271 a[i]++;
1272 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1273 // = 16 (or 12 if UseCompactObjectHeaders=true)
1274 b[i]++;
1275 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1276 // = 16 (or 12 if UseCompactObjectHeaders=true)
1277 c[i]++;
1278 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 + 8*iter
1279 // = 16 (always)
1280 d[i]++;
1281 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1282 // a: iter % 8 = 3
1283 // c: iter % 2 = 0
1284 // -> can never align both vectors!
1285 }
1286 return new Object[]{ a, b, c, d };
1287 }
1288
1289 @Test
1290 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1291 IRNode.ADD_VB, "= 0",
1292 IRNode.STORE_VECTOR, "= 0"},
1293 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1294 applyIfPlatform = {"64-bit", "true"},
1295 applyIf = {"AlignVector", "false"})
1296 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1297 IRNode.ADD_VB, "= 0",
1298 IRNode.STORE_VECTOR, "= 0"},
1299 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1300 applyIfPlatform = {"64-bit", "true"},
1301 applyIf = {"AlignVector", "true"})
1302 static Object[] test14aB(byte[] a) {
1303 // non-power-of-2 stride
1304 for (int i = 0; i < RANGE-20; i+=9) {
1305 // Since the stride is shorter than the vector length, there will be always
1306 // partial overlap of loads with previous stores, this leads to failure in
1307 // store-to-load-forwarding -> vectorization not profitable.
1308 a[i+0]++;
1309 a[i+1]++;
1310 a[i+2]++;
1311 a[i+3]++;
1312 a[i+4]++;
1313 a[i+5]++;
1314 a[i+6]++;
1315 a[i+7]++;
1316 a[i+8]++;
1317 a[i+9]++;
1318 a[i+10]++;
1319 a[i+11]++;
1320 a[i+12]++;
1321 a[i+13]++;
1322 a[i+14]++;
1323 a[i+15]++;
1324 }
1325 return new Object[]{ a };
1326 }
1327
1328 @Test
1329 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1330 IRNode.ADD_VB, "= 0",
1331 IRNode.STORE_VECTOR, "= 0"},
1332 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1333 applyIfPlatform = {"64-bit", "true"},
1334 applyIf = {"AlignVector", "false"})
1335 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1336 IRNode.ADD_VB, "= 0",
1337 IRNode.STORE_VECTOR, "= 0"},
1338 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1339 applyIfPlatform = {"64-bit", "true"},
1340 applyIf = {"AlignVector", "true"})
1341 static Object[] test14bB(byte[] a) {
1342 // non-power-of-2 stride
1343 for (int i = 0; i < RANGE-20; i+=3) {
1344 // Since the stride is shorter than the vector length, there will be always
1345 // partial overlap of loads with previous stores, this leads to failure in
1346 // store-to-load-forwarding -> vectorization not profitable.
1347 a[i+0]++;
1348 a[i+1]++;
1349 a[i+2]++;
1350 a[i+3]++;
1351 a[i+4]++;
1352 a[i+5]++;
1353 a[i+6]++;
1354 a[i+7]++;
1355 a[i+8]++;
1356 a[i+9]++;
1357 a[i+10]++;
1358 a[i+11]++;
1359 a[i+12]++;
1360 a[i+13]++;
1361 a[i+14]++;
1362 a[i+15]++;
1363 }
1364 return new Object[]{ a };
1365 }
1366
1367 @Test
1368 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1369 IRNode.ADD_VB, "= 0",
1370 IRNode.STORE_VECTOR, "= 0"},
1371 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1372 applyIfPlatform = {"64-bit", "true"},
1373 applyIf = {"AlignVector", "false"})
1374 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1375 IRNode.ADD_VB, "= 0",
1376 IRNode.STORE_VECTOR, "= 0"},
1377 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1378 applyIfPlatform = {"64-bit", "true"},
1379 applyIf = {"AlignVector", "true"})
1380 static Object[] test14cB(byte[] a) {
1381 // non-power-of-2 stride
1382 for (int i = 0; i < RANGE-20; i+=5) {
1383 // Since the stride is shorter than the vector length, there will be always
1384 // partial overlap of loads with previous stores, this leads to failure in
1385 // store-to-load-forwarding -> vectorization not profitable.
1386 a[i+0]++;
1387 a[i+1]++;
1388 a[i+2]++;
1389 a[i+3]++;
1390 a[i+4]++;
1391 a[i+5]++;
1392 a[i+6]++;
1393 a[i+7]++;
1394 a[i+8]++;
1395 a[i+9]++;
1396 a[i+10]++;
1397 a[i+11]++;
1398 a[i+12]++;
1399 a[i+13]++;
1400 a[i+14]++;
1401 a[i+15]++;
1402 }
1403 return new Object[]{ a };
1404 }
1405
1406 @Test
1407 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1408 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1409 IRNode.STORE_VECTOR, "> 0"},
1410 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1411 applyIfPlatform = {"64-bit", "true"},
1412 applyIf = {"AlignVector", "false"})
1413 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1414 IRNode.ADD_VB, "= 0",
1415 IRNode.STORE_VECTOR, "= 0"},
1416 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1417 applyIfPlatform = {"64-bit", "true"},
1418 applyIf = {"AlignVector", "true"})
1419 static Object[] test14dB(byte[] a) {
1420 // non-power-of-2 stride
1421 for (int i = 0; i < RANGE-20; i+=9) {
1422 a[i+0]++;
1423 a[i+1]++;
1424 a[i+2]++;
1425 a[i+3]++;
1426 a[i+4]++;
1427 a[i+5]++;
1428 a[i+6]++;
1429 a[i+7]++;
1430 }
1431 return new Object[]{ a };
1432 }
1433
1434 @Test
1435 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1436 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1437 IRNode.STORE_VECTOR, "> 0"},
1438 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1439 applyIfPlatform = {"64-bit", "true"},
1440 applyIf = {"AlignVector", "false"})
1441 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1442 IRNode.ADD_VB, "= 0",
1443 IRNode.STORE_VECTOR, "= 0"},
1444 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1445 applyIfPlatform = {"64-bit", "true"},
1446 applyIf = {"AlignVector", "true"})
1447 static Object[] test14eB(byte[] a) {
1448 // non-power-of-2 stride
1449 for (int i = 0; i < RANGE-32; i+=11) {
1450 a[i+0]++;
1451 a[i+1]++;
1452 a[i+2]++;
1453 a[i+3]++;
1454 a[i+4]++;
1455 a[i+5]++;
1456 a[i+6]++;
1457 a[i+7]++;
1458 }
1459 return new Object[]{ a };
1460 }
1461
1462 @Test
1463 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1464 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1465 IRNode.STORE_VECTOR, "> 0"},
1466 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1467 applyIfPlatform = {"64-bit", "true"},
1468 applyIf = {"AlignVector", "false"})
1469 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1470 IRNode.ADD_VB, "= 0",
1471 IRNode.STORE_VECTOR, "= 0"},
1472 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1473 applyIfPlatform = {"64-bit", "true"},
1474 applyIf = {"AlignVector", "true"})
1475 static Object[] test14fB(byte[] a) {
1476 // non-power-of-2 stride
1477 for (int i = 0; i < RANGE-40; i+=12) {
1478 a[i+0]++;
1479 a[i+1]++;
1480 a[i+2]++;
1481 a[i+3]++;
1482 a[i+4]++;
1483 a[i+5]++;
1484 a[i+6]++;
1485 a[i+7]++;
1486 }
1487 return new Object[]{ a };
1488 }
1489
1490 @Test
1491 // IR rules difficult because of modulo wrapping with offset after peeling.
1492 static Object[] test15aB(byte[] a) {
1493 // non-power-of-2 scale
1494 for (int i = 0; i < RANGE/64-20; i++) {
1495 a[53*i+0]++;
1496 a[53*i+1]++;
1497 a[53*i+2]++;
1498 a[53*i+3]++;
1499 a[53*i+4]++;
1500 a[53*i+5]++;
1501 a[53*i+6]++;
1502 a[53*i+7]++;
1503 a[53*i+8]++;
1504 a[53*i+9]++;
1505 a[53*i+10]++;
1506 a[53*i+11]++;
1507 a[53*i+12]++;
1508 a[53*i+13]++;
1509 a[53*i+14]++;
1510 a[53*i+15]++;
1511 }
1512 return new Object[]{ a };
1513 }
1514
1515 @Test
1516 // IR rules difficult because of modulo wrapping with offset after peeling.
1517 static Object[] test15bB(byte[] a) {
1518 // non-power-of-2 scale
1519 for (int i = 0; i < RANGE/64-20; i++) {
1520 a[25*i+0]++;
1521 a[25*i+1]++;
1522 a[25*i+2]++;
1523 a[25*i+3]++;
1524 a[25*i+4]++;
1525 a[25*i+5]++;
1526 a[25*i+6]++;
1527 a[25*i+7]++;
1528 a[25*i+8]++;
1529 a[25*i+9]++;
1530 a[25*i+10]++;
1531 a[25*i+11]++;
1532 a[25*i+12]++;
1533 a[25*i+13]++;
1534 a[25*i+14]++;
1535 a[25*i+15]++;
1536 }
1537 return new Object[]{ a };
1538 }
1539
1540 @Test
1541 // IR rules difficult because of modulo wrapping with offset after peeling.
1542 static Object[] test15cB(byte[] a) {
1543 // non-power-of-2 scale
1544 for (int i = 0; i < RANGE/64-20; i++) {
1545 a[19*i+0]++;
1546 a[19*i+1]++;
1547 a[19*i+2]++;
1548 a[19*i+3]++;
1549 a[19*i+4]++;
1550 a[19*i+5]++;
1551 a[19*i+6]++;
1552 a[19*i+7]++;
1553 a[19*i+8]++;
1554 a[19*i+9]++;
1555 a[19*i+10]++;
1556 a[19*i+11]++;
1557 a[19*i+12]++;
1558 a[19*i+13]++;
1559 a[19*i+14]++;
1560 a[19*i+15]++;
1561 }
1562 return new Object[]{ a };
1563 }
1564
1565 @Test
1566 static Object[] test16a(byte[] a, short[] b) {
1567 // infinite loop issues
1568 for (int i = 0; i < RANGE/2-20; i++) {
1569 a[2*i+0]++;
1570 a[2*i+1]++;
1571 a[2*i+2]++;
1572 a[2*i+3]++;
1573 a[2*i+4]++;
1574 a[2*i+5]++;
1575 a[2*i+6]++;
1576 a[2*i+7]++;
1577 a[2*i+8]++;
1578 a[2*i+9]++;
1579 a[2*i+10]++;
1580 a[2*i+11]++;
1581 a[2*i+12]++;
1582 a[2*i+13]++;
1583 a[2*i+14]++;
1584
1585 b[2*i+0]++;
1586 b[2*i+1]++;
1587 b[2*i+2]++;
1588 b[2*i+3]++;
1589 }
1590 return new Object[]{ a, b };
1591 }
1592
1593 @Test
1594 static Object[] test16b(byte[] a) {
1595 // infinite loop issues
1596 for (int i = 0; i < RANGE/2-20; i++) {
1597 a[2*i+0]++;
1598 a[2*i+1]++;
1599 a[2*i+2]++;
1600 a[2*i+3]++;
1601 a[2*i+4]++;
1602 a[2*i+5]++;
1603 a[2*i+6]++;
1604 a[2*i+7]++;
1605 a[2*i+8]++;
1606 a[2*i+9]++;
1607 a[2*i+10]++;
1608 a[2*i+11]++;
1609 a[2*i+12]++;
1610 a[2*i+13]++;
1611 a[2*i+14]++;
1612 }
1613 return new Object[]{ a };
1614 }
1615
1616 @Test
1617 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
1618 IRNode.ADD_VL, "> 0",
1619 IRNode.STORE_VECTOR, "> 0"},
1620 applyIfPlatform = {"64-bit", "true"},
1621 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1622 static Object[] test17a(long[] a) {
1623 // Unsafe: vectorizes with profiling (not xcomp)
1624 for (int i = 0; i < RANGE; i++) {
1625 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1626 long v = UNSAFE.getLongUnaligned(a, adr);
1627 UNSAFE.putLongUnaligned(a, adr, v + 1);
1628 }
1629 return new Object[]{ a };
1630 }
1631
1632 @Test
1633 // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs.
1634 static Object[] test17b(long[] a) {
1635 // Not alignable
1636 for (int i = 0; i < RANGE-1; i++) {
1637 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1638 long v = UNSAFE.getLongUnaligned(a, adr);
1639 UNSAFE.putLongUnaligned(a, adr, v + 1);
1640 }
1641 return new Object[]{ a };
1642 }
1643
1644 @Test
1645 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1646 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
1647 IRNode.STORE_VECTOR, "> 0"},
1648 applyIf = {"MaxVectorSize", ">=32"},
1649 applyIfPlatform = {"64-bit", "true"},
1650 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1651 static Object[] test17c(long[] a) {
1652 // Unsafe: aligned vectorizes
1653 for (int i = 0; i < RANGE-1; i+=4) {
1654 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1655 long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1656 long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1657 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1658 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1659 }
1660 return new Object[]{ a };
1661 }
1662
1663 @Test
1664 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1665 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
1666 IRNode.STORE_VECTOR, "> 0"},
1667 applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true", "rvv", "true"},
1668 applyIfPlatform = {"64-bit", "true"},
1669 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"})
1670 // Ensure vector width is large enough to fit 64 byte for longs:
1671 // The offsets are: 25, 33, 57, 65
1672 // In modulo 32: 25, 1, 25, 1 -> does not vectorize
1673 // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes
1674 // This problem is because we compute modulo vector width in memory_alignment.
1675 @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0",
1676 IRNode.ADD_VL, "= 0",
1677 IRNode.STORE_VECTOR, "= 0"},
1678 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1679 applyIfPlatform = {"64-bit", "true"},
1680 applyIf = {"AlignVector", "true"})
1681 static Object[] test17d(long[] a) {
1682 // Not alignable
1683 for (int i = 0; i < RANGE-1; i+=4) {
1684 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1685 long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1686 long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1687 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1688 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1689 }
1690 return new Object[]{ a };
1691 }
1692
1693 @Test
1694 static Object[] test18a(byte[] a, int[] b) {
1695 // scale = 0 --> no iv
1696 for (int i = 0; i < RANGE; i++) {
1697 a[0] = 1;
1698 b[i] = 2;
1699 a[1] = 1;
1700 }
1701 return new Object[]{ a, b };
1702 }
1703
1704 @Test
1705 static Object[] test18b(byte[] a, int[] b) {
1706 // scale = 0 --> no iv
1707 for (int i = 0; i < RANGE; i++) {
1708 a[1] = 1;
1709 b[i] = 2;
1710 a[2] = 1;
1711 }
1712 return new Object[]{ a, b };
1713 }
1714
1715 @Test
1716 static Object[] test19(int[] a, int[] b) {
1717 for (int i = 5000; i > 0; i--) {
1718 a[RANGE_FINAL - i] = b[RANGE_FINAL - i];
1719 }
1720 return new Object[]{ a, b };
1721 }
1722
1723 @Test
1724 static Object[] test20(byte[] a) {
1725 // Example where it is easy to pass alignment check,
1726 // but used to fail the alignment calculation
1727 for (int i = 1; i < RANGE/2-50; i++) {
1728 a[2*i+0+30]++;
1729 a[2*i+1+30]++;
1730 a[2*i+2+30]++;
1731 a[2*i+3+30]++;
1732 }
1733 return new Object[]{ a };
1734 }
1735 }