1 /*
2 * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package compiler.loopopts.superword;
25
26 import compiler.lib.ir_framework.*;
27 import jdk.test.lib.Utils;
28 import jdk.test.whitebox.WhiteBox;
29 import jdk.internal.misc.Unsafe;
30 import java.lang.reflect.Array;
31 import java.util.Map;
32 import java.util.HashMap;
33 import java.util.Random;
34 import java.nio.ByteOrder;
35
36 /*
37 * @test id=NoAlignVector
38 * @bug 8310190
39 * @key randomness
40 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
41 * @modules java.base/jdk.internal.misc
42 * @library /test/lib /
43 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector
44 */
45
46 /*
47 * @test id=AlignVector
48 * @bug 8310190
49 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
50 * @modules java.base/jdk.internal.misc
51 * @library /test/lib /
52 * @run driver compiler.loopopts.superword.TestAlignVector AlignVector
53 */
54
55 /*
56 * @test id=VerifyAlignVector
57 * @bug 8310190
58 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
59 * @modules java.base/jdk.internal.misc
60 * @library /test/lib /
61 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector
62 */
63
64 /*
65 * @test id=NoAlignVector-COH
66 * @bug 8310190
67 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
68 * @modules java.base/jdk.internal.misc
69 * @library /test/lib /
70 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector-COH
71 */
72
73 /*
74 * @test id=VerifyAlignVector-COH
75 * @bug 8310190
76 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
77 * @modules java.base/jdk.internal.misc
78 * @library /test/lib /
79 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector-COH
80 */
81
82 public class TestAlignVector {
83 static int RANGE = 1024*8;
84 static int RANGE_FINAL = 1024*8;
85 private static final Unsafe UNSAFE = Unsafe.getUnsafe();
86 private static final Random RANDOM = Utils.getRandomInstance();
87
88 // Inputs
89 byte[] aB;
90 byte[] bB;
91 byte mB = (byte)31;
92 short[] aS;
93 short[] bS;
94 short mS = (short)0xF0F0;
95 int[] aI;
96 int[] bI;
97 int mI = 0xF0F0F0F0;
98 long[] aL;
99 long[] bL;
100 long mL = 0xF0F0F0F0F0F0F0F0L;
101
102 // List of tests
103 Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
104
105 // List of gold, the results from the first run before compilation
106 Map<String,Object[]> golds = new HashMap<String,Object[]>();
107
108 interface TestFunction {
109 Object[] run();
110 }
111
112 public static void main(String[] args) {
113 TestFramework framework = new TestFramework(TestAlignVector.class);
114 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
115 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250");
116
117 switch (args[0]) {
118 case "NoAlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); }
119 case "AlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); }
120 case "VerifyAlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
121 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
122 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
123 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
124 }
125 framework.start();
126 }
127
128 public TestAlignVector() {
129 // Generate input once
130 aB = generateB();
131 bB = generateB();
132 aS = generateS();
133 bS = generateS();
134 aI = generateI();
135 bI = generateI();
136 aL = generateL();
137 bL = generateL();
138
139 // Add all tests to list
140 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
141 tests.put("test1a", () -> { return test1a(aB.clone(), bB.clone(), mB); });
142 tests.put("test1b", () -> { return test1b(aB.clone(), bB.clone(), mB); });
143 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
144 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
145 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
146 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
147 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
148 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
149 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
150 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
151 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
152
153 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
154 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
155 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
156 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
157 tests.put("test10e", () -> { return test10e(aS.clone(), bS.clone(), mS); });
158
159 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
160 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
161 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
162 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
163
164 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
165 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
166 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
167 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
168
169 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
170 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
171 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
172 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
173
174 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
175 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
176 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
177 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
178
179 tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); });
180
181 tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); });
182 tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); });
183 tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); });
184 tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
185
186 tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); });
187 tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); });
188 tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); });
189 tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
190
191 tests.put("test14aB", () -> { return test14aB(aB.clone()); });
192 tests.put("test14bB", () -> { return test14bB(aB.clone()); });
193 tests.put("test14cB", () -> { return test14cB(aB.clone()); });
194 tests.put("test14dB", () -> { return test14dB(aB.clone()); });
195 tests.put("test14eB", () -> { return test14eB(aB.clone()); });
196 tests.put("test14fB", () -> { return test14fB(aB.clone()); });
197
198 tests.put("test15aB", () -> { return test15aB(aB.clone()); });
199 tests.put("test15bB", () -> { return test15bB(aB.clone()); });
200 tests.put("test15cB", () -> { return test15cB(aB.clone()); });
201
202 tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); });
203 tests.put("test16b", () -> { return test16b(aB.clone()); });
204
205 tests.put("test17a", () -> { return test17a(aL.clone()); });
206 tests.put("test17b", () -> { return test17b(aL.clone()); });
207 tests.put("test17c", () -> { return test17c(aL.clone()); });
208 tests.put("test17d", () -> { return test17d(aL.clone()); });
209
210 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
211 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
212
213 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
214 tests.put("test20", () -> { return test20(aB.clone()); });
215
216 // Compute gold value for all test methods before compilation
217 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
218 String name = entry.getKey();
219 TestFunction test = entry.getValue();
220 Object[] gold = test.run();
221 golds.put(name, gold);
222 }
223 }
224
225 @Warmup(100)
226 @Run(test = {"test0",
227 "test1a",
228 "test1b",
229 "test2",
230 "test3",
231 "test4",
232 "test5",
233 "test6",
234 "test7",
235 "test8",
236 "test9",
237 "test10a",
238 "test10b",
239 "test10c",
240 "test10d",
241 "test10e",
242 "test11aB",
243 "test11aS",
244 "test11aI",
245 "test11aL",
246 "test11bB",
247 "test11bS",
248 "test11bI",
249 "test11bL",
250 "test11cB",
251 "test11cS",
252 "test11cI",
253 "test11cL",
254 "test11dB",
255 "test11dS",
256 "test11dI",
257 "test11dL",
258 "test12",
259 "test13aIL",
260 "test13aIB",
261 "test13aIS",
262 "test13aBSIL",
263 "test13bIL",
264 "test13bIB",
265 "test13bIS",
266 "test13bBSIL",
267 "test14aB",
268 "test14bB",
269 "test14cB",
270 "test14dB",
271 "test14eB",
272 "test14fB",
273 "test15aB",
274 "test15bB",
275 "test15cB",
276 "test16a",
277 "test16b",
278 "test17a",
279 "test17b",
280 "test17c",
281 "test17d",
282 "test18a",
283 "test18b",
284 "test19",
285 "test20"})
286 public void runTests() {
287 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
288 String name = entry.getKey();
289 TestFunction test = entry.getValue();
290 // Recall gold value from before compilation
291 Object[] gold = golds.get(name);
292 // Compute new result
293 Object[] result = test.run();
294 // Compare gold and new result
295 verify(name, gold, result);
296 }
297 }
298
299 static byte[] generateB() {
300 byte[] a = new byte[RANGE];
301 for (int i = 0; i < a.length; i++) {
302 a[i] = (byte)RANDOM.nextInt();
303 }
304 return a;
305 }
306
307 static short[] generateS() {
308 short[] a = new short[RANGE];
309 for (int i = 0; i < a.length; i++) {
310 a[i] = (short)RANDOM.nextInt();
311 }
312 return a;
313 }
314
315 static int[] generateI() {
316 int[] a = new int[RANGE];
317 for (int i = 0; i < a.length; i++) {
318 a[i] = RANDOM.nextInt();
319 }
320 return a;
321 }
322
323 static long[] generateL() {
324 long[] a = new long[RANGE];
325 for (int i = 0; i < a.length; i++) {
326 a[i] = RANDOM.nextLong();
327 }
328 return a;
329 }
330
331 static void verify(String name, Object[] gold, Object[] result) {
332 if (gold.length != result.length) {
333 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
334 gold.length + ", result.length = " + result.length);
335 }
336 for (int i = 0; i < gold.length; i++) {
337 Object g = gold[i];
338 Object r = result[i];
339 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
340 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
341 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
342 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
343 }
344 if (g == r) {
345 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
346 " gold[" + i + "] == result[" + i + "]");
347 }
348 if (Array.getLength(g) != Array.getLength(r)) {
349 throw new RuntimeException("verify " + name + ": arrays must have same length:" +
350 " gold[" + i + "].length = " + Array.getLength(g) +
351 " result[" + i + "].length = " + Array.getLength(r));
352 }
353 Class c = g.getClass().getComponentType();
354 if (c == byte.class) {
355 verifyB(name, i, (byte[])g, (byte[])r);
356 } else if (c == short.class) {
357 verifyS(name, i, (short[])g, (short[])r);
358 } else if (c == int.class) {
359 verifyI(name, i, (int[])g, (int[])r);
360 } else if (c == long.class) {
361 verifyL(name, i, (long[])g, (long[])r);
362 } else {
363 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
364 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
365 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
366 }
367 }
368 }
369
370 static void verifyB(String name, int i, byte[] g, byte[] r) {
371 for (int j = 0; j < g.length; j++) {
372 if (g[j] != r[j]) {
373 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
374 " gold[" + i + "][" + j + "] = " + g[j] +
375 " result[" + i + "][" + j + "] = " + r[j]);
376 }
377 }
378 }
379
380 static void verifyS(String name, int i, short[] g, short[] r) {
381 for (int j = 0; j < g.length; j++) {
382 if (g[j] != r[j]) {
383 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
384 " gold[" + i + "][" + j + "] = " + g[j] +
385 " result[" + i + "][" + j + "] = " + r[j]);
386 }
387 }
388 }
389
390 static void verifyI(String name, int i, int[] g, int[] r) {
391 for (int j = 0; j < g.length; j++) {
392 if (g[j] != r[j]) {
393 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
394 " gold[" + i + "][" + j + "] = " + g[j] +
395 " result[" + i + "][" + j + "] = " + r[j]);
396 }
397 }
398 }
399
400 static void verifyL(String name, int i, long[] g, long[] r) {
401 for (int j = 0; j < g.length; j++) {
402 if (g[j] != r[j]) {
403 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
404 " gold[" + i + "][" + j + "] = " + g[j] +
405 " result[" + i + "][" + j + "] = " + r[j]);
406 }
407 }
408 }
409
410 @Test
411 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
412 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
413 IRNode.STORE_VECTOR, "> 0"},
414 applyIf = {"MaxVectorSize", ">=8"},
415 applyIfPlatform = {"64-bit", "true"},
416 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
417 static Object[] test0(byte[] a, byte[] b, byte mask) {
418 for (int i = 0; i < RANGE; i+=8) {
419 // Safe to vectorize with AlignVector
420 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
421 b[i+1] = (byte)(a[i+1] & mask);
422 b[i+2] = (byte)(a[i+2] & mask);
423 b[i+3] = (byte)(a[i+3] & mask);
424 }
425 return new Object[]{ a, b };
426 }
427
428 @Test
429 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
430 IRNode.AND_VB, "> 0",
431 IRNode.STORE_VECTOR, "> 0"},
432 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
433 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
434 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
435 applyIfPlatform = {"64-bit", "true"},
436 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
437 static Object[] test1a(byte[] a, byte[] b, byte mask) {
438 for (int i = 0; i < RANGE; i+=8) {
439 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
440 b[i+1] = (byte)(a[i+1] & mask);
441 b[i+2] = (byte)(a[i+2] & mask);
442 b[i+3] = (byte)(a[i+3] & mask);
443 b[i+4] = (byte)(a[i+4] & mask);
444 b[i+5] = (byte)(a[i+5] & mask);
445 b[i+6] = (byte)(a[i+6] & mask);
446 b[i+7] = (byte)(a[i+7] & mask);
447 }
448 return new Object[]{ a, b };
449 }
450
451 @Test
452 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
453 IRNode.AND_VB, "> 0",
454 IRNode.STORE_VECTOR, "> 0"},
455 applyIfOr = {"UseCompactObjectHeaders", "true", "AlignVector", "false"},
456 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
457 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
458 applyIfPlatform = {"64-bit", "true"},
459 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
460 static Object[] test1b(byte[] a, byte[] b, byte mask) {
461 for (int i = 4; i < RANGE-8; i+=8) {
462 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4 + iter*8
463 b[i+1] = (byte)(a[i+1] & mask);
464 b[i+2] = (byte)(a[i+2] & mask);
465 b[i+3] = (byte)(a[i+3] & mask);
466 b[i+4] = (byte)(a[i+4] & mask);
467 b[i+5] = (byte)(a[i+5] & mask);
468 b[i+6] = (byte)(a[i+6] & mask);
469 b[i+7] = (byte)(a[i+7] & mask);
470 }
471 return new Object[]{ a, b };
472 }
473
474 @Test
475 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
476 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
477 IRNode.STORE_VECTOR, "> 0"},
478 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
479 applyIfPlatform = {"64-bit", "true"},
480 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
481 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
482 IRNode.AND_VB, "= 0",
483 IRNode.STORE_VECTOR, "= 0"},
484 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
485 applyIfPlatform = {"64-bit", "true"},
486 applyIf = {"AlignVector", "true"})
487 static Object[] test2(byte[] a, byte[] b, byte mask) {
488 for (int i = 0; i < RANGE; i+=8) {
489 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
490 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
491 b[i+4] = (byte)(a[i+4] & mask);
492 b[i+5] = (byte)(a[i+5] & mask);
493 b[i+6] = (byte)(a[i+6] & mask);
494 }
495 return new Object[]{ a, b };
496 }
497
498 @Test
499 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
500 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
501 IRNode.STORE_VECTOR, "> 0"},
502 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
503 applyIfPlatform = {"64-bit", "true"},
504 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
505 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
506 IRNode.AND_VB, "= 0",
507 IRNode.STORE_VECTOR, "= 0"},
508 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
509 applyIfPlatform = {"64-bit", "true"},
510 applyIf = {"AlignVector", "true"})
511 static Object[] test3(byte[] a, byte[] b, byte mask) {
512 for (int i = 0; i < RANGE; i+=8) {
513 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
514
515 // Problematic for AlignVector
516 b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0
517
518 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes
519 b[i+4] = (byte)(a[i+4] & mask);
520 b[i+5] = (byte)(a[i+5] & mask);
521 b[i+6] = (byte)(a[i+6] & mask);
522 }
523 return new Object[]{ a, b };
524 }
525
526 @Test
527 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
528 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0",
529 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
530 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0",
531 IRNode.STORE_VECTOR, "> 0"},
532 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
533 applyIfPlatform = {"64-bit", "true"},
534 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
535 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
536 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
537 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
538 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
539 IRNode.STORE_VECTOR, "> 0"},
540 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
541 applyIfPlatform = {"64-bit", "true"},
542 applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"})
543 static Object[] test4(byte[] a, byte[] b, byte mask) {
544 for (int i = 0; i < RANGE/16; i++) {
545 // Problematic for AlignVector
546 b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned
547 b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask);
548 b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask);
549 b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask);
550
551 b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned
552 b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask);
553 b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask);
554 b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask);
555 b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask);
556 b[i*16 + 10] = (byte)(a[i*16 + 10] & mask);
557 b[i*16 + 11] = (byte)(a[i*16 + 11] & mask);
558 b[i*16 + 12] = (byte)(a[i*16 + 12] & mask);
559 }
560 return new Object[]{ a, b };
561 }
562
563 @Test
564 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
565 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
566 IRNode.STORE_VECTOR, "> 0"},
567 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
568 applyIfPlatform = {"64-bit", "true"},
569 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
570 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
571 IRNode.AND_VB, "= 0",
572 IRNode.STORE_VECTOR, "= 0"},
573 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
574 applyIfPlatform = {"64-bit", "true"},
575 applyIf = {"AlignVector", "true"})
576 static Object[] test5(byte[] a, byte[] b, byte mask, int inv) {
577 for (int i = 0; i < RANGE; i+=8) {
578 // Cannot align with AlignVector because of invariant
579 b[i+inv+0] = (byte)(a[i+inv+0] & mask);
580
581 b[i+inv+3] = (byte)(a[i+inv+3] & mask);
582 b[i+inv+4] = (byte)(a[i+inv+4] & mask);
583 b[i+inv+5] = (byte)(a[i+inv+5] & mask);
584 b[i+inv+6] = (byte)(a[i+inv+6] & mask);
585 }
586 return new Object[]{ a, b };
587 }
588
589 @Test
590 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
591 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
592 IRNode.STORE_VECTOR, "> 0"},
593 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
594 applyIfPlatform = {"64-bit", "true"},
595 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
596 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
597 IRNode.AND_VB, "= 0",
598 IRNode.STORE_VECTOR, "= 0"},
599 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
600 applyIfPlatform = {"64-bit", "true"},
601 applyIf = {"AlignVector", "true"})
602 static Object[] test6(byte[] a, byte[] b, byte mask) {
603 for (int i = 0; i < RANGE/8; i+=2) {
604 // Cannot align with AlignVector because offset is odd
605 b[i*4+0] = (byte)(a[i*4+0] & mask);
606
607 b[i*4+3] = (byte)(a[i*4+3] & mask);
608 b[i*4+4] = (byte)(a[i*4+4] & mask);
609 b[i*4+5] = (byte)(a[i*4+5] & mask);
610 b[i*4+6] = (byte)(a[i*4+6] & mask);
611 }
612 return new Object[]{ a, b };
613 }
614
615 @Test
616 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
617 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
618 IRNode.STORE_VECTOR, "> 0"},
619 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"},
620 applyIfPlatform = {"64-bit", "true"},
621 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
622 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
623 IRNode.AND_VS, "= 0",
624 IRNode.STORE_VECTOR, "= 0"},
625 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
626 applyIfPlatform = {"64-bit", "true"},
627 applyIf = {"AlignVector", "true"})
628 static Object[] test7(short[] a, short[] b, short mask) {
629 for (int i = 0; i < RANGE/8; i+=2) {
630 // Cannot align with AlignVector because offset is odd
631 b[i*4+0] = (short)(a[i*4+0] & mask);
632
633 b[i*4+3] = (short)(a[i*4+3] & mask);
634 b[i*4+4] = (short)(a[i*4+4] & mask);
635 b[i*4+5] = (short)(a[i*4+5] & mask);
636 b[i*4+6] = (short)(a[i*4+6] & mask);
637 }
638 return new Object[]{ a, b };
639 }
640
641 @Test
642 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
643 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
644 IRNode.STORE_VECTOR, "> 0"},
645 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
646 applyIfPlatform = {"64-bit", "true"},
647 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
648 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
649 IRNode.AND_VB, "= 0",
650 IRNode.STORE_VECTOR, "= 0"},
651 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
652 applyIfPlatform = {"64-bit", "true"},
653 applyIf = {"AlignVector", "true"})
654 static Object[] test8(byte[] a, byte[] b, byte mask, int init) {
655 for (int i = init; i < RANGE; i+=8) {
656 // Cannot align with AlignVector because of invariant (variable init becomes invar)
657 b[i+0] = (byte)(a[i+0] & mask);
658
659 b[i+3] = (byte)(a[i+3] & mask);
660 b[i+4] = (byte)(a[i+4] & mask);
661 b[i+5] = (byte)(a[i+5] & mask);
662 b[i+6] = (byte)(a[i+6] & mask);
663 }
664 return new Object[]{ a, b };
665 }
666
667 @Test
668 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
669 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
670 IRNode.STORE_VECTOR, "> 0"},
671 applyIf = {"MaxVectorSize", ">=8"},
672 applyIfPlatform = {"64-bit", "true"},
673 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
674 static Object[] test9(byte[] a, byte[] b, byte mask) {
675 // known non-zero init value does not affect offset, but has implicit effect on iv
676 for (int i = 13; i < RANGE-8; i+=8) {
677 b[i+0] = (byte)(a[i+0] & mask);
678
679 b[i+3] = (byte)(a[i+3] & mask);
680 b[i+4] = (byte)(a[i+4] & mask);
681 b[i+5] = (byte)(a[i+5] & mask);
682 b[i+6] = (byte)(a[i+6] & mask);
683 }
684 return new Object[]{ a, b };
685 }
686
687 @Test
688 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
689 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
690 IRNode.STORE_VECTOR, "> 0"},
691 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
692 applyIfPlatform = {"64-bit", "true"},
693 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
694 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
695 IRNode.AND_VB, "= 0",
696 IRNode.STORE_VECTOR, "= 0"},
697 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
698 applyIfPlatform = {"64-bit", "true"},
699 applyIf = {"AlignVector", "true"})
700 static Object[] test10a(byte[] a, byte[] b, byte mask) {
701 // This is not alignable with pre-loop, because of odd init.
702 for (int i = 3; i < RANGE-8; i+=8) {
703 b[i+0] = (byte)(a[i+0] & mask);
704 b[i+1] = (byte)(a[i+1] & mask);
705 b[i+2] = (byte)(a[i+2] & mask);
706 b[i+3] = (byte)(a[i+3] & mask);
707 }
708 return new Object[]{ a, b };
709 }
710
711 @Test
712 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
713 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
714 IRNode.STORE_VECTOR, "> 0"},
715 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
716 applyIfPlatform = {"64-bit", "true"},
717 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
718 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
719 IRNode.AND_VB, "= 0",
720 IRNode.STORE_VECTOR, "= 0"},
721 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
722 applyIfPlatform = {"64-bit", "true"},
723 applyIf = {"AlignVector", "true"})
724 static Object[] test10b(byte[] a, byte[] b, byte mask) {
725 // This is not alignable with pre-loop, because of odd init.
726 // Seems not correctly handled.
727 for (int i = 13; i < RANGE-8; i+=8) {
728 b[i+0] = (byte)(a[i+0] & mask);
729 b[i+1] = (byte)(a[i+1] & mask);
730 b[i+2] = (byte)(a[i+2] & mask);
731 b[i+3] = (byte)(a[i+3] & mask);
732 }
733 return new Object[]{ a, b };
734 }
735
736 @Test
737 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
738 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
739 IRNode.STORE_VECTOR, "> 0"},
740 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
741 applyIfPlatform = {"64-bit", "true"},
742 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
743 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
744 IRNode.AND_VS, "= 0",
745 IRNode.STORE_VECTOR, "= 0"},
746 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
747 applyIfPlatform = {"64-bit", "true"},
748 applyIf = {"AlignVector", "true"})
749 static Object[] test10c(short[] a, short[] b, short mask) {
750 // This is not alignable with pre-loop, because of odd init.
751 // Seems not correctly handled with MaxVectorSize >= 32.
752 for (int i = 13; i < RANGE-8; i+=8) {
753 b[i+0] = (short)(a[i+0] & mask);
754 b[i+1] = (short)(a[i+1] & mask);
755 b[i+2] = (short)(a[i+2] & mask);
756 b[i+3] = (short)(a[i+3] & mask);
757 }
758 return new Object[]{ a, b };
759 }
760
761 @Test
762 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
763 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
764 IRNode.STORE_VECTOR, "> 0"},
765 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "false"},
766 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
767 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
768 applyIfPlatform = {"64-bit", "true"},
769 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
770 static Object[] test10d(short[] a, short[] b, short mask) {
771 for (int i = 13; i < RANGE-16; i+=8) {
772 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
773 b[i+0+3] = (short)(a[i+0+3] & mask);
774 b[i+1+3] = (short)(a[i+1+3] & mask);
775 b[i+2+3] = (short)(a[i+2+3] & mask);
776 b[i+3+3] = (short)(a[i+3+3] & mask);
777 }
778 return new Object[]{ a, b };
779 }
780
781 @Test
782 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
783 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
784 IRNode.STORE_VECTOR, "> 0"},
785 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "true"},
786 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12.
787 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out.
788 applyIfPlatform = {"64-bit", "true"},
789 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
790 static Object[] test10e(short[] a, short[] b, short mask) {
791 for (int i = 11; i < RANGE-16; i+=8) {
792 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 11) + iter*16
793 b[i+0+3] = (short)(a[i+0+3] & mask);
794 b[i+1+3] = (short)(a[i+1+3] & mask);
795 b[i+2+3] = (short)(a[i+2+3] & mask);
796 b[i+3+3] = (short)(a[i+3+3] & mask);
797 }
798 return new Object[]{ a, b };
799 }
800
801 @Test
802 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
803 IRNode.AND_VB, "> 0",
804 IRNode.STORE_VECTOR, "> 0"},
805 applyIfPlatform = {"64-bit", "true"},
806 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
807 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
808 for (int i = 0; i < RANGE; i++) {
809 // always alignable
810 b[i+0] = (byte)(a[i+0] & mask);
811 }
812 return new Object[]{ a, b };
813 }
814
815 @Test
816 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
817 IRNode.AND_VS, "> 0",
818 IRNode.STORE_VECTOR, "> 0"},
819 applyIfPlatform = {"64-bit", "true"},
820 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
821 static Object[] test11aS(short[] a, short[] b, short mask) {
822 for (int i = 0; i < RANGE; i++) {
823 // always alignable
824 b[i+0] = (short)(a[i+0] & mask);
825 }
826 return new Object[]{ a, b };
827 }
828
829 @Test
830 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
831 IRNode.AND_VI, "> 0",
832 IRNode.STORE_VECTOR, "> 0"},
833 applyIfPlatform = {"64-bit", "true"},
834 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
835 static Object[] test11aI(int[] a, int[] b, int mask) {
836 for (int i = 0; i < RANGE; i++) {
837 // always alignable
838 b[i+0] = (int)(a[i+0] & mask);
839 }
840 return new Object[]{ a, b };
841 }
842
843 @Test
844 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
845 IRNode.AND_VL, "> 0",
846 IRNode.STORE_VECTOR, "> 0"},
847 applyIfPlatform = {"64-bit", "true"},
848 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
849 static Object[] test11aL(long[] a, long[] b, long mask) {
850 for (int i = 0; i < RANGE; i++) {
851 // always alignable
852 b[i+0] = (long)(a[i+0] & mask);
853 }
854 return new Object[]{ a, b };
855 }
856
857 @Test
858 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
859 IRNode.AND_VB, "> 0",
860 IRNode.STORE_VECTOR, "> 0"},
861 applyIfPlatform = {"64-bit", "true"},
862 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
863 static Object[] test11bB(byte[] a, byte[] b, byte mask) {
864 for (int i = 1; i < RANGE; i++) {
865 // always alignable
866 b[i+0] = (byte)(a[i+0] & mask);
867 }
868 return new Object[]{ a, b };
869 }
870
871 @Test
872 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
873 IRNode.AND_VS, "> 0",
874 IRNode.STORE_VECTOR, "> 0"},
875 applyIfPlatform = {"64-bit", "true"},
876 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
877 static Object[] test11bS(short[] a, short[] b, short mask) {
878 for (int i = 1; i < RANGE; i++) {
879 // always alignable
880 b[i+0] = (short)(a[i+0] & mask);
881 }
882 return new Object[]{ a, b };
883 }
884
885 @Test
886 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
887 IRNode.AND_VI, "> 0",
888 IRNode.STORE_VECTOR, "> 0"},
889 applyIfPlatform = {"64-bit", "true"},
890 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
891 static Object[] test11bI(int[] a, int[] b, int mask) {
892 for (int i = 1; i < RANGE; i++) {
893 // always alignable
894 b[i+0] = (int)(a[i+0] & mask);
895 }
896 return new Object[]{ a, b };
897 }
898
899 @Test
900 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
901 IRNode.AND_VL, "> 0",
902 IRNode.STORE_VECTOR, "> 0"},
903 applyIfPlatform = {"64-bit", "true"},
904 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
905 static Object[] test11bL(long[] a, long[] b, long mask) {
906 for (int i = 1; i < RANGE; i++) {
907 // always alignable
908 b[i+0] = (long)(a[i+0] & mask);
909 }
910 return new Object[]{ a, b };
911 }
912
913 @Test
914 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
915 IRNode.AND_VB, "> 0",
916 IRNode.STORE_VECTOR, "> 0"},
917 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
918 applyIfPlatform = {"64-bit", "true"},
919 applyIf = {"AlignVector", "false"})
920 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
921 IRNode.AND_VB, "= 0",
922 IRNode.STORE_VECTOR, "= 0"},
923 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
924 applyIfPlatform = {"64-bit", "true"},
925 applyIf = {"AlignVector", "true"})
926 static Object[] test11cB(byte[] a, byte[] b, byte mask) {
927 for (int i = 1; i < RANGE-1; i++) {
928 // 1 byte offset -> not alignable with AlignVector
929 b[i+0] = (byte)(a[i+1] & mask);
930 }
931 return new Object[]{ a, b };
932 }
933
934 @Test
935 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
936 IRNode.AND_VS, "> 0",
937 IRNode.STORE_VECTOR, "> 0"},
938 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
939 applyIfPlatform = {"64-bit", "true"},
940 applyIf = {"AlignVector", "false"})
941 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
942 IRNode.AND_VS, "= 0",
943 IRNode.STORE_VECTOR, "= 0"},
944 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
945 applyIfPlatform = {"64-bit", "true"},
946 applyIf = {"AlignVector", "true"})
947 static Object[] test11cS(short[] a, short[] b, short mask) {
948 for (int i = 1; i < RANGE-1; i++) {
949 // 2 byte offset -> not alignable with AlignVector
950 b[i+0] = (short)(a[i+1] & mask);
951 }
952 return new Object[]{ a, b };
953 }
954
955 @Test
956 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
957 IRNode.AND_VI, "> 0",
958 IRNode.STORE_VECTOR, "> 0"},
959 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
960 applyIfPlatform = {"64-bit", "true"},
961 applyIf = {"AlignVector", "false"})
962 @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
963 IRNode.AND_VI, "= 0",
964 IRNode.STORE_VECTOR, "= 0"},
965 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
966 applyIfPlatform = {"64-bit", "true"},
967 applyIf = {"AlignVector", "true"})
968 static Object[] test11cI(int[] a, int[] b, int mask) {
969 for (int i = 1; i < RANGE-1; i++) {
970 // 4 byte offset -> not alignable with AlignVector
971 b[i+0] = (int)(a[i+1] & mask);
972 }
973 return new Object[]{ a, b };
974 }
975
976 @Test
977 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
978 IRNode.AND_VL, "> 0",
979 IRNode.STORE_VECTOR, "> 0"},
980 applyIfPlatform = {"64-bit", "true"},
981 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
982 static Object[] test11cL(long[] a, long[] b, long mask) {
983 for (int i = 1; i < RANGE-1; i++) {
984 // always alignable (8 byte offset)
985 b[i+0] = (long)(a[i+1] & mask);
986 }
987 return new Object[]{ a, b };
988 }
989
990 @Test
991 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
992 IRNode.AND_VB, "> 0",
993 IRNode.STORE_VECTOR, "> 0"},
994 applyIfPlatform = {"64-bit", "true"},
995 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
996 static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) {
997 for (int i = 0; i < RANGE; i++) {
998 b[i+0+invar] = (byte)(a[i+0+invar] & mask);
999 }
1000 return new Object[]{ a, b };
1001 }
1002
1003 @Test
1004 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1005 IRNode.AND_VS, "> 0",
1006 IRNode.STORE_VECTOR, "> 0"},
1007 applyIfPlatform = {"64-bit", "true"},
1008 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1009 static Object[] test11dS(short[] a, short[] b, short mask, int invar) {
1010 for (int i = 0; i < RANGE; i++) {
1011 b[i+0+invar] = (short)(a[i+0+invar] & mask);
1012 }
1013 return new Object[]{ a, b };
1014 }
1015
1016 @Test
1017 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1018 IRNode.AND_VI, "> 0",
1019 IRNode.STORE_VECTOR, "> 0"},
1020 applyIfPlatform = {"64-bit", "true"},
1021 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1022 static Object[] test11dI(int[] a, int[] b, int mask, int invar) {
1023 for (int i = 0; i < RANGE; i++) {
1024 b[i+0+invar] = (int)(a[i+0+invar] & mask);
1025 }
1026 return new Object[]{ a, b };
1027 }
1028
1029 @Test
1030 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
1031 IRNode.AND_VL, "> 0",
1032 IRNode.STORE_VECTOR, "> 0"},
1033 applyIfPlatform = {"64-bit", "true"},
1034 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1035 static Object[] test11dL(long[] a, long[] b, long mask, int invar) {
1036 for (int i = 0; i < RANGE; i++) {
1037 b[i+0+invar] = (long)(a[i+0+invar] & mask);
1038 }
1039 return new Object[]{ a, b };
1040 }
1041
1042 @Test
1043 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
1044 IRNode.AND_VB, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
1045 IRNode.STORE_VECTOR, "> 0"},
1046 applyIfPlatform = {"64-bit", "true"},
1047 applyIf = {"AlignVector", "false"},
1048 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1049 static Object[] test12(byte[] a, byte[] b, byte mask) {
1050 for (int i = 0; i < RANGE/16; i++) {
1051 // Non-power-of-2 stride. Vectorization of 4 bytes, then 2-bytes gap.
1052 b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask);
1053 b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask);
1054 b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask);
1055 b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask);
1056 }
1057 return new Object[]{ a, b };
1058 }
1059
1060 @Test
1061 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1062 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1063 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1064 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1065 IRNode.STORE_VECTOR, "> 0"},
1066 applyIfPlatform = {"64-bit", "true"},
1067 applyIfCPUFeature = {"avx2", "true"})
1068 // require avx to ensure vectors are larger than what unrolling produces
1069 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1070 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1071 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1072 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1073 IRNode.STORE_VECTOR, "> 0"},
1074 applyIfPlatform = {"riscv64", "true"},
1075 applyIfCPUFeature = {"rvv", "true"},
1076 applyIf = {"MaxVectorSize", ">=32"})
1077 static Object[] test13aIL(int[] a, long[] b) {
1078 for (int i = 0; i < RANGE; i++) {
1079 a[i]++;
1080 b[i]++;
1081 }
1082 return new Object[]{ a, b };
1083 }
1084
1085 @Test
1086 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1087 IRNode.LOAD_VECTOR_I, "> 0",
1088 IRNode.ADD_VB, "> 0",
1089 IRNode.ADD_VI, "> 0",
1090 IRNode.STORE_VECTOR, "> 0"},
1091 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1092 applyIfPlatform = {"64-bit", "true"},
1093 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1094 static Object[] test13aIB(int[] a, byte[] b) {
1095 for (int i = 0; i < RANGE; i++) {
1096 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1097 // = 16 (or 12 if UseCompactObjectHeaders=true)
1098 a[i]++;
1099 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1100 // = 16 (or 12 if UseCompactObjectHeaders=true)
1101 b[i]++;
1102 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1103 // If UseCompactObjectHeaders=false:
1104 // a: 0, 8, 16, 24, 32, ...
1105 // b: 0, 2, 4, 6, 8, ...
1106 // -> Ok, aligns every 8th iteration.
1107 // If UseCompactObjectHeaders=true:
1108 // a: 4, 12, 20, 28, 36, ...
1109 // b: 1, 3, 5, 7, 9, ...
1110 // -> we can never align both vectors!
1111 }
1112 return new Object[]{ a, b };
1113 }
1114
1115 @Test
1116 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1117 IRNode.LOAD_VECTOR_S, "> 0",
1118 IRNode.ADD_VI, "> 0",
1119 IRNode.ADD_VS, "> 0",
1120 IRNode.STORE_VECTOR, "> 0"},
1121 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1122 applyIfPlatform = {"64-bit", "true"},
1123 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1124 static Object[] test13aIS(int[] a, short[] b) {
1125 for (int i = 0; i < RANGE; i++) {
1126 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4*iter
1127 // = 16 (or 12 if UseCompactObjectHeaders=true)
1128 a[i]++;
1129 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1130 // = 16 (or 12 if UseCompactObjectHeaders=true)
1131 b[i]++;
1132 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold:
1133 // If UseCompactObjectHeaders=false:
1134 // a: iter % 2 == 0
1135 // b: iter % 4 == 0
1136 // -> Ok, aligns every 4th iteration.
1137 // If UseCompactObjectHeaders=true:
1138 // a: iter % 2 = 1
1139 // b: iter % 4 = 2
1140 // -> we can never align both vectors!
1141 }
1142 return new Object[]{ a, b };
1143 }
1144
1145 @Test
1146 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1147 IRNode.LOAD_VECTOR_S, "> 0",
1148 IRNode.LOAD_VECTOR_I, "> 0",
1149 IRNode.LOAD_VECTOR_L, "> 0",
1150 IRNode.ADD_VB, "> 0",
1151 IRNode.ADD_VS, "> 0",
1152 IRNode.ADD_VI, "> 0",
1153 IRNode.ADD_VL, "> 0",
1154 IRNode.STORE_VECTOR, "> 0"},
1155 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1156 applyIfPlatform = {"64-bit", "true"},
1157 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1158 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1159 for (int i = 0; i < RANGE; i++) {
1160 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter
1161 // = 16 (or 12 if UseCompactObjectHeaders=true)
1162 a[i]++;
1163 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter
1164 // = 16 (or 12 if UseCompactObjectHeaders=true)
1165 b[i]++;
1166 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter
1167 // = 16 (or 12 if UseCompactObjectHeaders=true)
1168 c[i]++;
1169 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8*iter
1170 // = 16 (always)
1171 d[i]++;
1172 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1173 // a: iter % 8 = 4
1174 // c: iter % 2 = 1
1175 // -> can never align both vectors!
1176 }
1177 return new Object[]{ a, b, c, d };
1178 }
1179
1180 @Test
1181 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1182 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1183 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1184 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1185 IRNode.STORE_VECTOR, "> 0"},
1186 applyIfPlatform = {"64-bit", "true"},
1187 applyIfCPUFeature = {"avx2", "true"})
1188 // require avx to ensure vectors are larger than what unrolling produces
1189 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1190 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1191 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1192 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1193 IRNode.STORE_VECTOR, "> 0"},
1194 applyIfPlatform = {"riscv64", "true"},
1195 applyIfCPUFeature = {"rvv", "true"},
1196 applyIf = {"MaxVectorSize", ">=32"})
1197 static Object[] test13bIL(int[] a, long[] b) {
1198 for (int i = 1; i < RANGE; i++) {
1199 a[i]++;
1200 b[i]++;
1201 }
1202 return new Object[]{ a, b };
1203 }
1204
1205 @Test
1206 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1207 IRNode.LOAD_VECTOR_I, "> 0",
1208 IRNode.ADD_VB, "> 0",
1209 IRNode.ADD_VI, "> 0",
1210 IRNode.STORE_VECTOR, "> 0"},
1211 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1212 applyIfPlatform = {"64-bit", "true"},
1213 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1214 static Object[] test13bIB(int[] a, byte[] b) {
1215 for (int i = 1; i < RANGE; i++) {
1216 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1217 // = 16 (or 12 if UseCompactObjectHeaders=true)
1218 a[i]++;
1219 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1220 // = 16 (or 12 if UseCompactObjectHeaders=true)
1221 b[i]++;
1222 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1223 // a: iter % 2 = 0
1224 // b: iter % 8 = 3
1225 // -> can never align both vectors!
1226 }
1227 return new Object[]{ a, b };
1228 }
1229
1230 @Test
1231 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1232 IRNode.LOAD_VECTOR_S, "> 0",
1233 IRNode.ADD_VI, "> 0",
1234 IRNode.ADD_VS, "> 0",
1235 IRNode.STORE_VECTOR, "> 0"},
1236 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1237 applyIfPlatform = {"64-bit", "true"},
1238 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1239 static Object[] test13bIS(int[] a, short[] b) {
1240 for (int i = 1; i < RANGE; i++) {
1241 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1242 // = 16 (or 12 if UseCompactObjectHeaders=true)
1243 a[i]++;
1244 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1245 // = 16 (or 12 if UseCompactObjectHeaders=true)
1246 b[i]++;
1247 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1248 // a: iter % 2 = 0
1249 // b: iter % 4 = 1
1250 // -> can never align both vectors!
1251 }
1252 return new Object[]{ a, b };
1253 }
1254
1255 @Test
1256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1257 IRNode.LOAD_VECTOR_S, "> 0",
1258 IRNode.LOAD_VECTOR_I, "> 0",
1259 IRNode.LOAD_VECTOR_L, "> 0",
1260 IRNode.ADD_VB, "> 0",
1261 IRNode.ADD_VS, "> 0",
1262 IRNode.ADD_VI, "> 0",
1263 IRNode.ADD_VL, "> 0",
1264 IRNode.STORE_VECTOR, "> 0"},
1265 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"},
1266 applyIfPlatform = {"64-bit", "true"},
1267 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1268 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1269 for (int i = 1; i < RANGE; i++) {
1270 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter
1271 // = 16 (or 12 if UseCompactObjectHeaders=true)
1272 a[i]++;
1273 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter
1274 // = 16 (or 12 if UseCompactObjectHeaders=true)
1275 b[i]++;
1276 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter
1277 // = 16 (or 12 if UseCompactObjectHeaders=true)
1278 c[i]++;
1279 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 + 8*iter
1280 // = 16 (always)
1281 d[i]++;
1282 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned:
1283 // a: iter % 8 = 3
1284 // c: iter % 2 = 0
1285 // -> can never align both vectors!
1286 }
1287 return new Object[]{ a, b, c, d };
1288 }
1289
1290 @Test
1291 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1292 IRNode.ADD_VB, "= 0",
1293 IRNode.STORE_VECTOR, "= 0"},
1294 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1295 applyIfPlatform = {"64-bit", "true"},
1296 applyIf = {"AlignVector", "false"})
1297 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1298 IRNode.ADD_VB, "= 0",
1299 IRNode.STORE_VECTOR, "= 0"},
1300 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1301 applyIfPlatform = {"64-bit", "true"},
1302 applyIf = {"AlignVector", "true"})
1303 static Object[] test14aB(byte[] a) {
1304 // non-power-of-2 stride
1305 for (int i = 0; i < RANGE-20; i+=9) {
1306 // Since the stride is shorter than the vector length, there will be always
1307 // partial overlap of loads with previous stores, this leads to failure in
1308 // store-to-load-forwarding -> vectorization not profitable.
1309 a[i+0]++;
1310 a[i+1]++;
1311 a[i+2]++;
1312 a[i+3]++;
1313 a[i+4]++;
1314 a[i+5]++;
1315 a[i+6]++;
1316 a[i+7]++;
1317 a[i+8]++;
1318 a[i+9]++;
1319 a[i+10]++;
1320 a[i+11]++;
1321 a[i+12]++;
1322 a[i+13]++;
1323 a[i+14]++;
1324 a[i+15]++;
1325 }
1326 return new Object[]{ a };
1327 }
1328
1329 @Test
1330 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1331 IRNode.ADD_VB, "= 0",
1332 IRNode.STORE_VECTOR, "= 0"},
1333 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1334 applyIfPlatform = {"64-bit", "true"},
1335 applyIf = {"AlignVector", "false"})
1336 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1337 IRNode.ADD_VB, "= 0",
1338 IRNode.STORE_VECTOR, "= 0"},
1339 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1340 applyIfPlatform = {"64-bit", "true"},
1341 applyIf = {"AlignVector", "true"})
1342 static Object[] test14bB(byte[] a) {
1343 // non-power-of-2 stride
1344 for (int i = 0; i < RANGE-20; i+=3) {
1345 // Since the stride is shorter than the vector length, there will be always
1346 // partial overlap of loads with previous stores, this leads to failure in
1347 // store-to-load-forwarding -> vectorization not profitable.
1348 a[i+0]++;
1349 a[i+1]++;
1350 a[i+2]++;
1351 a[i+3]++;
1352 a[i+4]++;
1353 a[i+5]++;
1354 a[i+6]++;
1355 a[i+7]++;
1356 a[i+8]++;
1357 a[i+9]++;
1358 a[i+10]++;
1359 a[i+11]++;
1360 a[i+12]++;
1361 a[i+13]++;
1362 a[i+14]++;
1363 a[i+15]++;
1364 }
1365 return new Object[]{ a };
1366 }
1367
1368 @Test
1369 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1370 IRNode.ADD_VB, "= 0",
1371 IRNode.STORE_VECTOR, "= 0"},
1372 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1373 applyIfPlatform = {"64-bit", "true"},
1374 applyIf = {"AlignVector", "false"})
1375 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1376 IRNode.ADD_VB, "= 0",
1377 IRNode.STORE_VECTOR, "= 0"},
1378 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1379 applyIfPlatform = {"64-bit", "true"},
1380 applyIf = {"AlignVector", "true"})
1381 static Object[] test14cB(byte[] a) {
1382 // non-power-of-2 stride
1383 for (int i = 0; i < RANGE-20; i+=5) {
1384 // Since the stride is shorter than the vector length, there will be always
1385 // partial overlap of loads with previous stores, this leads to failure in
1386 // store-to-load-forwarding -> vectorization not profitable.
1387 a[i+0]++;
1388 a[i+1]++;
1389 a[i+2]++;
1390 a[i+3]++;
1391 a[i+4]++;
1392 a[i+5]++;
1393 a[i+6]++;
1394 a[i+7]++;
1395 a[i+8]++;
1396 a[i+9]++;
1397 a[i+10]++;
1398 a[i+11]++;
1399 a[i+12]++;
1400 a[i+13]++;
1401 a[i+14]++;
1402 a[i+15]++;
1403 }
1404 return new Object[]{ a };
1405 }
1406
1407 @Test
1408 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1409 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1410 IRNode.STORE_VECTOR, "> 0"},
1411 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1412 applyIfPlatform = {"64-bit", "true"},
1413 applyIf = {"AlignVector", "false"})
1414 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1415 IRNode.ADD_VB, "= 0",
1416 IRNode.STORE_VECTOR, "= 0"},
1417 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1418 applyIfPlatform = {"64-bit", "true"},
1419 applyIf = {"AlignVector", "true"})
1420 static Object[] test14dB(byte[] a) {
1421 // non-power-of-2 stride
1422 for (int i = 0; i < RANGE-20; i+=9) {
1423 a[i+0]++;
1424 a[i+1]++;
1425 a[i+2]++;
1426 a[i+3]++;
1427 a[i+4]++;
1428 a[i+5]++;
1429 a[i+6]++;
1430 a[i+7]++;
1431 }
1432 return new Object[]{ a };
1433 }
1434
1435 @Test
1436 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1437 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1438 IRNode.STORE_VECTOR, "> 0"},
1439 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1440 applyIfPlatform = {"64-bit", "true"},
1441 applyIf = {"AlignVector", "false"})
1442 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1443 IRNode.ADD_VB, "= 0",
1444 IRNode.STORE_VECTOR, "= 0"},
1445 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1446 applyIfPlatform = {"64-bit", "true"},
1447 applyIf = {"AlignVector", "true"})
1448 static Object[] test14eB(byte[] a) {
1449 // non-power-of-2 stride
1450 for (int i = 0; i < RANGE-32; i+=11) {
1451 a[i+0]++;
1452 a[i+1]++;
1453 a[i+2]++;
1454 a[i+3]++;
1455 a[i+4]++;
1456 a[i+5]++;
1457 a[i+6]++;
1458 a[i+7]++;
1459 }
1460 return new Object[]{ a };
1461 }
1462
1463 @Test
1464 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1465 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1466 IRNode.STORE_VECTOR, "> 0"},
1467 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1468 applyIfPlatform = {"64-bit", "true"},
1469 applyIf = {"AlignVector", "false"})
1470 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1471 IRNode.ADD_VB, "= 0",
1472 IRNode.STORE_VECTOR, "= 0"},
1473 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1474 applyIfPlatform = {"64-bit", "true"},
1475 applyIf = {"AlignVector", "true"})
1476 static Object[] test14fB(byte[] a) {
1477 // non-power-of-2 stride
1478 for (int i = 0; i < RANGE-40; i+=12) {
1479 a[i+0]++;
1480 a[i+1]++;
1481 a[i+2]++;
1482 a[i+3]++;
1483 a[i+4]++;
1484 a[i+5]++;
1485 a[i+6]++;
1486 a[i+7]++;
1487 }
1488 return new Object[]{ a };
1489 }
1490
1491 @Test
1492 // IR rules difficult because of modulo wrapping with offset after peeling.
1493 static Object[] test15aB(byte[] a) {
1494 // non-power-of-2 scale
1495 for (int i = 0; i < RANGE/64-20; i++) {
1496 a[53*i+0]++;
1497 a[53*i+1]++;
1498 a[53*i+2]++;
1499 a[53*i+3]++;
1500 a[53*i+4]++;
1501 a[53*i+5]++;
1502 a[53*i+6]++;
1503 a[53*i+7]++;
1504 a[53*i+8]++;
1505 a[53*i+9]++;
1506 a[53*i+10]++;
1507 a[53*i+11]++;
1508 a[53*i+12]++;
1509 a[53*i+13]++;
1510 a[53*i+14]++;
1511 a[53*i+15]++;
1512 }
1513 return new Object[]{ a };
1514 }
1515
1516 @Test
1517 // IR rules difficult because of modulo wrapping with offset after peeling.
1518 static Object[] test15bB(byte[] a) {
1519 // non-power-of-2 scale
1520 for (int i = 0; i < RANGE/64-20; i++) {
1521 a[25*i+0]++;
1522 a[25*i+1]++;
1523 a[25*i+2]++;
1524 a[25*i+3]++;
1525 a[25*i+4]++;
1526 a[25*i+5]++;
1527 a[25*i+6]++;
1528 a[25*i+7]++;
1529 a[25*i+8]++;
1530 a[25*i+9]++;
1531 a[25*i+10]++;
1532 a[25*i+11]++;
1533 a[25*i+12]++;
1534 a[25*i+13]++;
1535 a[25*i+14]++;
1536 a[25*i+15]++;
1537 }
1538 return new Object[]{ a };
1539 }
1540
1541 @Test
1542 // IR rules difficult because of modulo wrapping with offset after peeling.
1543 static Object[] test15cB(byte[] a) {
1544 // non-power-of-2 scale
1545 for (int i = 0; i < RANGE/64-20; i++) {
1546 a[19*i+0]++;
1547 a[19*i+1]++;
1548 a[19*i+2]++;
1549 a[19*i+3]++;
1550 a[19*i+4]++;
1551 a[19*i+5]++;
1552 a[19*i+6]++;
1553 a[19*i+7]++;
1554 a[19*i+8]++;
1555 a[19*i+9]++;
1556 a[19*i+10]++;
1557 a[19*i+11]++;
1558 a[19*i+12]++;
1559 a[19*i+13]++;
1560 a[19*i+14]++;
1561 a[19*i+15]++;
1562 }
1563 return new Object[]{ a };
1564 }
1565
1566 @Test
1567 static Object[] test16a(byte[] a, short[] b) {
1568 // infinite loop issues
1569 for (int i = 0; i < RANGE/2-20; i++) {
1570 a[2*i+0]++;
1571 a[2*i+1]++;
1572 a[2*i+2]++;
1573 a[2*i+3]++;
1574 a[2*i+4]++;
1575 a[2*i+5]++;
1576 a[2*i+6]++;
1577 a[2*i+7]++;
1578 a[2*i+8]++;
1579 a[2*i+9]++;
1580 a[2*i+10]++;
1581 a[2*i+11]++;
1582 a[2*i+12]++;
1583 a[2*i+13]++;
1584 a[2*i+14]++;
1585
1586 b[2*i+0]++;
1587 b[2*i+1]++;
1588 b[2*i+2]++;
1589 b[2*i+3]++;
1590 }
1591 return new Object[]{ a, b };
1592 }
1593
1594 @Test
1595 static Object[] test16b(byte[] a) {
1596 // infinite loop issues
1597 for (int i = 0; i < RANGE/2-20; i++) {
1598 a[2*i+0]++;
1599 a[2*i+1]++;
1600 a[2*i+2]++;
1601 a[2*i+3]++;
1602 a[2*i+4]++;
1603 a[2*i+5]++;
1604 a[2*i+6]++;
1605 a[2*i+7]++;
1606 a[2*i+8]++;
1607 a[2*i+9]++;
1608 a[2*i+10]++;
1609 a[2*i+11]++;
1610 a[2*i+12]++;
1611 a[2*i+13]++;
1612 a[2*i+14]++;
1613 }
1614 return new Object[]{ a };
1615 }
1616
1617 @Test
1618 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
1619 IRNode.ADD_VL, "> 0",
1620 IRNode.STORE_VECTOR, "> 0"},
1621 applyIfPlatform = {"64-bit", "true"},
1622 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1623 static Object[] test17a(long[] a) {
1624 // Unsafe: vectorizes with profiling (not xcomp)
1625 for (int i = 0; i < RANGE; i++) {
1626 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1627 long v = UNSAFE.getLongUnaligned(a, adr);
1628 UNSAFE.putLongUnaligned(a, adr, v + 1);
1629 }
1630 return new Object[]{ a };
1631 }
1632
1633 @Test
1634 // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs.
1635 static Object[] test17b(long[] a) {
1636 // Not alignable
1637 for (int i = 0; i < RANGE-1; i++) {
1638 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1639 long v = UNSAFE.getLongUnaligned(a, adr);
1640 UNSAFE.putLongUnaligned(a, adr, v + 1);
1641 }
1642 return new Object[]{ a };
1643 }
1644
1645 @Test
1646 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1647 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
1648 IRNode.STORE_VECTOR, "> 0"},
1649 applyIf = {"MaxVectorSize", ">=32"},
1650 applyIfPlatform = {"64-bit", "true"},
1651 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1652 static Object[] test17c(long[] a) {
1653 // Unsafe: aligned vectorizes
1654 for (int i = 0; i < RANGE-1; i+=4) {
1655 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1656 long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1657 long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1658 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1659 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1660 }
1661 return new Object[]{ a };
1662 }
1663
1664 @Test
1665 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1666 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
1667 IRNode.STORE_VECTOR, "> 0"},
1668 applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true", "rvv", "true"},
1669 applyIfPlatform = {"64-bit", "true"},
1670 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"})
1671 // Ensure vector width is large enough to fit 64 byte for longs:
1672 // The offsets are: 25, 33, 57, 65
1673 // In modulo 32: 25, 1, 25, 1 -> does not vectorize
1674 // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes
1675 // This problem is because we compute modulo vector width in memory_alignment.
1676 @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0",
1677 IRNode.ADD_VL, "= 0",
1678 IRNode.STORE_VECTOR, "= 0"},
1679 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1680 applyIfPlatform = {"64-bit", "true"},
1681 applyIf = {"AlignVector", "true"})
1682 static Object[] test17d(long[] a) {
1683 // Not alignable
1684 for (int i = 0; i < RANGE-1; i+=4) {
1685 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1686 long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1687 long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1688 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1689 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1690 }
1691 return new Object[]{ a };
1692 }
1693
1694 @Test
1695 static Object[] test18a(byte[] a, int[] b) {
1696 // scale = 0 --> no iv
1697 for (int i = 0; i < RANGE; i++) {
1698 a[0] = 1;
1699 b[i] = 2;
1700 a[1] = 1;
1701 }
1702 return new Object[]{ a, b };
1703 }
1704
1705 @Test
1706 static Object[] test18b(byte[] a, int[] b) {
1707 // scale = 0 --> no iv
1708 for (int i = 0; i < RANGE; i++) {
1709 a[1] = 1;
1710 b[i] = 2;
1711 a[2] = 1;
1712 }
1713 return new Object[]{ a, b };
1714 }
1715
1716 @Test
1717 static Object[] test19(int[] a, int[] b) {
1718 for (int i = 5000; i > 0; i--) {
1719 a[RANGE_FINAL - i] = b[RANGE_FINAL - i];
1720 }
1721 return new Object[]{ a, b };
1722 }
1723
1724 @Test
1725 static Object[] test20(byte[] a) {
1726 // Example where it is easy to pass alignment check,
1727 // but used to fail the alignment calculation
1728 for (int i = 1; i < RANGE/2-50; i++) {
1729 a[2*i+0+30]++;
1730 a[2*i+1+30]++;
1731 a[2*i+2+30]++;
1732 a[2*i+3+30]++;
1733 }
1734 return new Object[]{ a };
1735 }
1736 }