1 /*
2 * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package compiler.loopopts.superword;
25
26 import compiler.lib.ir_framework.*;
27 import jdk.test.lib.Utils;
28 import jdk.test.whitebox.WhiteBox;
29 import jdk.internal.misc.Unsafe;
30 import java.lang.reflect.Array;
31 import java.util.Map;
32 import java.util.HashMap;
33 import java.util.Random;
34 import java.nio.ByteOrder;
35
36 /*
37 * @test id=NoAlignVector
38 * @bug 8310190
39 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
40 * @modules java.base/jdk.internal.misc
41 * @library /test/lib /
42 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector
43 */
44
45 /*
46 * @test id=AlignVector
47 * @bug 8310190
48 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
49 * @modules java.base/jdk.internal.misc
50 * @library /test/lib /
51 * @run driver compiler.loopopts.superword.TestAlignVector AlignVector
52 */
53
54 /*
55 * @test id=VerifyAlignVector
56 * @bug 8310190
57 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
58 * @modules java.base/jdk.internal.misc
59 * @library /test/lib /
60 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector
61 */
62
63 /*
64 * @test id=NoAlignVector-COH
65 * @bug 8310190
66 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
67 * @modules java.base/jdk.internal.misc
68 * @library /test/lib /
69 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector-COH
70 */
71
72 /*
73 * @test id=VerifyAlignVector-COH
74 * @bug 8310190
75 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
76 * @modules java.base/jdk.internal.misc
77 * @library /test/lib /
78 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector-COH
79 */
80
81 public class TestAlignVector {
82 static int RANGE = 1024*8;
83 static int RANGE_FINAL = 1024*8;
84 private static final Unsafe UNSAFE = Unsafe.getUnsafe();
85 private static final Random RANDOM = Utils.getRandomInstance();
86
87 // Inputs
88 byte[] aB;
89 byte[] bB;
90 byte mB = (byte)31;
91 short[] aS;
92 short[] bS;
93 short mS = (short)0xF0F0;
94 int[] aI;
95 int[] bI;
96 int mI = 0xF0F0F0F0;
97 long[] aL;
98 long[] bL;
99 long mL = 0xF0F0F0F0F0F0F0F0L;
100
101 // List of tests
102 Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
103
104 // List of gold, the results from the first run before compilation
105 Map<String,Object[]> golds = new HashMap<String,Object[]>();
106
107 interface TestFunction {
108 Object[] run();
109 }
110
111 public static void main(String[] args) {
112 TestFramework framework = new TestFramework(TestAlignVector.class);
113 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
114 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250");
115
116 switch (args[0]) {
117 case "NoAlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); }
118 case "AlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); }
119 case "VerifyAlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
120 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
121 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
122 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
123 }
124 framework.start();
125 }
126
127 public TestAlignVector() {
128 // Generate input once
129 aB = generateB();
130 bB = generateB();
131 aS = generateS();
132 bS = generateS();
133 aI = generateI();
134 bI = generateI();
135 aL = generateL();
136 bL = generateL();
137
138 // Add all tests to list
139 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
140 tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); });
141 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
142 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
143 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
144 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
145 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
146 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
147 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
149 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
150
151 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
152 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
153 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
154 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
155
156 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
157 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
158 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
159 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
160
161 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
162 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
163 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
164 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
165
166 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
167 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
168 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
169 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
170
171 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
172 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
173 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
174 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
175
176 tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); });
177
178 tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); });
179 tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); });
180 tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); });
181 tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
182
183 tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); });
184 tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); });
185 tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); });
186 tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
187
188 tests.put("test14aB", () -> { return test14aB(aB.clone()); });
189 tests.put("test14bB", () -> { return test14bB(aB.clone()); });
190 tests.put("test14cB", () -> { return test14cB(aB.clone()); });
191 tests.put("test14dB", () -> { return test14dB(aB.clone()); });
192 tests.put("test14eB", () -> { return test14eB(aB.clone()); });
193 tests.put("test14fB", () -> { return test14fB(aB.clone()); });
194
195 tests.put("test15aB", () -> { return test15aB(aB.clone()); });
196 tests.put("test15bB", () -> { return test15bB(aB.clone()); });
197 tests.put("test15cB", () -> { return test15cB(aB.clone()); });
198
199 tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); });
200 tests.put("test16b", () -> { return test16b(aB.clone()); });
201
202 tests.put("test17a", () -> { return test17a(aL.clone()); });
203 tests.put("test17b", () -> { return test17b(aL.clone()); });
204 tests.put("test17c", () -> { return test17c(aL.clone()); });
205 tests.put("test17d", () -> { return test17d(aL.clone()); });
206
207 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
208 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
209
210 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
211 tests.put("test20", () -> { return test20(aB.clone()); });
212
213 // Compute gold value for all test methods before compilation
214 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
215 String name = entry.getKey();
216 TestFunction test = entry.getValue();
217 Object[] gold = test.run();
218 golds.put(name, gold);
219 }
220 }
221
222 @Warmup(100)
223 @Run(test = {"test0",
224 "test1",
225 "test2",
226 "test3",
227 "test4",
228 "test5",
229 "test6",
230 "test7",
231 "test8",
232 "test9",
233 "test10a",
234 "test10b",
235 "test10c",
236 "test10d",
237 "test11aB",
238 "test11aS",
239 "test11aI",
240 "test11aL",
241 "test11bB",
242 "test11bS",
243 "test11bI",
244 "test11bL",
245 "test11cB",
246 "test11cS",
247 "test11cI",
248 "test11cL",
249 "test11dB",
250 "test11dS",
251 "test11dI",
252 "test11dL",
253 "test12",
254 "test13aIL",
255 "test13aIB",
256 "test13aIS",
257 "test13aBSIL",
258 "test13bIL",
259 "test13bIB",
260 "test13bIS",
261 "test13bBSIL",
262 "test14aB",
263 "test14bB",
264 "test14cB",
265 "test14dB",
266 "test14eB",
267 "test14fB",
268 "test15aB",
269 "test15bB",
270 "test15cB",
271 "test16a",
272 "test16b",
273 "test17a",
274 "test17b",
275 "test17c",
276 "test17d",
277 "test18a",
278 "test18b",
279 "test19",
280 "test20"})
281 public void runTests() {
282 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
283 String name = entry.getKey();
284 TestFunction test = entry.getValue();
285 // Recall gold value from before compilation
286 Object[] gold = golds.get(name);
287 // Compute new result
288 Object[] result = test.run();
289 // Compare gold and new result
290 verify(name, gold, result);
291 }
292 }
293
294 static byte[] generateB() {
295 byte[] a = new byte[RANGE];
296 for (int i = 0; i < a.length; i++) {
297 a[i] = (byte)RANDOM.nextInt();
298 }
299 return a;
300 }
301
302 static short[] generateS() {
303 short[] a = new short[RANGE];
304 for (int i = 0; i < a.length; i++) {
305 a[i] = (short)RANDOM.nextInt();
306 }
307 return a;
308 }
309
310 static int[] generateI() {
311 int[] a = new int[RANGE];
312 for (int i = 0; i < a.length; i++) {
313 a[i] = RANDOM.nextInt();
314 }
315 return a;
316 }
317
318 static long[] generateL() {
319 long[] a = new long[RANGE];
320 for (int i = 0; i < a.length; i++) {
321 a[i] = RANDOM.nextLong();
322 }
323 return a;
324 }
325
326 static void verify(String name, Object[] gold, Object[] result) {
327 if (gold.length != result.length) {
328 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
329 gold.length + ", result.length = " + result.length);
330 }
331 for (int i = 0; i < gold.length; i++) {
332 Object g = gold[i];
333 Object r = result[i];
334 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
335 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
336 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
337 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
338 }
339 if (g == r) {
340 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
341 " gold[" + i + "] == result[" + i + "]");
342 }
343 if (Array.getLength(g) != Array.getLength(r)) {
344 throw new RuntimeException("verify " + name + ": arrays must have same length:" +
345 " gold[" + i + "].length = " + Array.getLength(g) +
346 " result[" + i + "].length = " + Array.getLength(r));
347 }
348 Class c = g.getClass().getComponentType();
349 if (c == byte.class) {
350 verifyB(name, i, (byte[])g, (byte[])r);
351 } else if (c == short.class) {
352 verifyS(name, i, (short[])g, (short[])r);
353 } else if (c == int.class) {
354 verifyI(name, i, (int[])g, (int[])r);
355 } else if (c == long.class) {
356 verifyL(name, i, (long[])g, (long[])r);
357 } else {
358 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
359 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
360 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
361 }
362 }
363 }
364
365 static void verifyB(String name, int i, byte[] g, byte[] r) {
366 for (int j = 0; j < g.length; j++) {
367 if (g[j] != r[j]) {
368 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
369 " gold[" + i + "][" + j + "] = " + g[j] +
370 " result[" + i + "][" + j + "] = " + r[j]);
371 }
372 }
373 }
374
375 static void verifyS(String name, int i, short[] g, short[] r) {
376 for (int j = 0; j < g.length; j++) {
377 if (g[j] != r[j]) {
378 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
379 " gold[" + i + "][" + j + "] = " + g[j] +
380 " result[" + i + "][" + j + "] = " + r[j]);
381 }
382 }
383 }
384
385 static void verifyI(String name, int i, int[] g, int[] r) {
386 for (int j = 0; j < g.length; j++) {
387 if (g[j] != r[j]) {
388 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
389 " gold[" + i + "][" + j + "] = " + g[j] +
390 " result[" + i + "][" + j + "] = " + r[j]);
391 }
392 }
393 }
394
395 static void verifyL(String name, int i, long[] g, long[] r) {
396 for (int j = 0; j < g.length; j++) {
397 if (g[j] != r[j]) {
398 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
399 " gold[" + i + "][" + j + "] = " + g[j] +
400 " result[" + i + "][" + j + "] = " + r[j]);
401 }
402 }
403 }
404
405 @Test
406 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
407 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
408 IRNode.STORE_VECTOR, "> 0"},
409 applyIf = {"MaxVectorSize", ">=8"},
410 applyIfPlatform = {"64-bit", "true"},
411 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
412 static Object[] test0(byte[] a, byte[] b, byte mask) {
413 for (int i = 0; i < RANGE; i+=8) {
414 // Safe to vectorize with AlignVector
415 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
416 b[i+1] = (byte)(a[i+1] & mask);
417 b[i+2] = (byte)(a[i+2] & mask);
418 b[i+3] = (byte)(a[i+3] & mask);
419 }
420 return new Object[]{ a, b };
421 }
422
423 @Test
424 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
425 IRNode.AND_VB, "> 0",
426 IRNode.STORE_VECTOR, "> 0"},
427 applyIfPlatform = {"64-bit", "true"},
428 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
429 static Object[] test1(byte[] a, byte[] b, byte mask) {
430 for (int i = 0; i < RANGE; i+=8) {
431 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
432 b[i+1] = (byte)(a[i+1] & mask);
433 b[i+2] = (byte)(a[i+2] & mask);
434 b[i+3] = (byte)(a[i+3] & mask);
435 b[i+4] = (byte)(a[i+4] & mask);
436 b[i+5] = (byte)(a[i+5] & mask);
437 b[i+6] = (byte)(a[i+6] & mask);
438 b[i+7] = (byte)(a[i+7] & mask);
439 }
440 return new Object[]{ a, b };
441 }
442
443 @Test
444 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
445 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
446 IRNode.STORE_VECTOR, "> 0"},
447 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
448 applyIfPlatform = {"64-bit", "true"},
449 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
450 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
451 IRNode.AND_VB, "= 0",
452 IRNode.STORE_VECTOR, "= 0"},
453 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
454 applyIfPlatform = {"64-bit", "true"},
455 applyIf = {"AlignVector", "true"})
456 static Object[] test2(byte[] a, byte[] b, byte mask) {
457 for (int i = 0; i < RANGE; i+=8) {
458 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
459 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
460 b[i+4] = (byte)(a[i+4] & mask);
461 b[i+5] = (byte)(a[i+5] & mask);
462 b[i+6] = (byte)(a[i+6] & mask);
463 }
464 return new Object[]{ a, b };
465 }
466
467 @Test
468 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
469 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
470 IRNode.STORE_VECTOR, "> 0"},
471 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
472 applyIfPlatform = {"64-bit", "true"},
473 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
474 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
475 IRNode.AND_VB, "= 0",
476 IRNode.STORE_VECTOR, "= 0"},
477 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
478 applyIfPlatform = {"64-bit", "true"},
479 applyIf = {"AlignVector", "true"})
480 static Object[] test3(byte[] a, byte[] b, byte mask) {
481 for (int i = 0; i < RANGE; i+=8) {
482 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
483
484 // Problematic for AlignVector
485 b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0
486
487 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes
488 b[i+4] = (byte)(a[i+4] & mask);
489 b[i+5] = (byte)(a[i+5] & mask);
490 b[i+6] = (byte)(a[i+6] & mask);
491 }
492 return new Object[]{ a, b };
493 }
494
495 @Test
496 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
497 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0",
498 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
499 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0",
500 IRNode.STORE_VECTOR, "> 0"},
501 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
502 applyIfPlatform = {"64-bit", "true"},
503 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
504 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
505 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
506 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
507 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
508 IRNode.STORE_VECTOR, "> 0"},
509 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
510 applyIfPlatform = {"64-bit", "true"},
511 applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"})
512 static Object[] test4(byte[] a, byte[] b, byte mask) {
513 for (int i = 0; i < RANGE/16; i++) {
514 // Problematic for AlignVector
515 b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned
516 b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask);
517 b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask);
518 b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask);
519
520 b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned
521 b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask);
522 b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask);
523 b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask);
524 b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask);
525 b[i*16 + 10] = (byte)(a[i*16 + 10] & mask);
526 b[i*16 + 11] = (byte)(a[i*16 + 11] & mask);
527 b[i*16 + 12] = (byte)(a[i*16 + 12] & mask);
528 }
529 return new Object[]{ a, b };
530 }
531
532 @Test
533 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
534 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
535 IRNode.STORE_VECTOR, "> 0"},
536 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
537 applyIfPlatform = {"64-bit", "true"},
538 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
539 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
540 IRNode.AND_VB, "= 0",
541 IRNode.STORE_VECTOR, "= 0"},
542 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
543 applyIfPlatform = {"64-bit", "true"},
544 applyIf = {"AlignVector", "true"})
545 static Object[] test5(byte[] a, byte[] b, byte mask, int inv) {
546 for (int i = 0; i < RANGE; i+=8) {
547 // Cannot align with AlignVector because of invariant
548 b[i+inv+0] = (byte)(a[i+inv+0] & mask);
549
550 b[i+inv+3] = (byte)(a[i+inv+3] & mask);
551 b[i+inv+4] = (byte)(a[i+inv+4] & mask);
552 b[i+inv+5] = (byte)(a[i+inv+5] & mask);
553 b[i+inv+6] = (byte)(a[i+inv+6] & mask);
554 }
555 return new Object[]{ a, b };
556 }
557
558 @Test
559 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
560 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
561 IRNode.STORE_VECTOR, "> 0"},
562 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
563 applyIfPlatform = {"64-bit", "true"},
564 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
565 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
566 IRNode.AND_VB, "= 0",
567 IRNode.STORE_VECTOR, "= 0"},
568 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
569 applyIfPlatform = {"64-bit", "true"},
570 applyIf = {"AlignVector", "true"})
571 static Object[] test6(byte[] a, byte[] b, byte mask) {
572 for (int i = 0; i < RANGE/8; i+=2) {
573 // Cannot align with AlignVector because offset is odd
574 b[i*4+0] = (byte)(a[i*4+0] & mask);
575
576 b[i*4+3] = (byte)(a[i*4+3] & mask);
577 b[i*4+4] = (byte)(a[i*4+4] & mask);
578 b[i*4+5] = (byte)(a[i*4+5] & mask);
579 b[i*4+6] = (byte)(a[i*4+6] & mask);
580 }
581 return new Object[]{ a, b };
582 }
583
584 @Test
585 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
586 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
587 IRNode.STORE_VECTOR, "> 0"},
588 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"},
589 applyIfPlatform = {"64-bit", "true"},
590 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
591 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
592 IRNode.AND_VS, "= 0",
593 IRNode.STORE_VECTOR, "= 0"},
594 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
595 applyIfPlatform = {"64-bit", "true"},
596 applyIf = {"AlignVector", "true"})
597 static Object[] test7(short[] a, short[] b, short mask) {
598 for (int i = 0; i < RANGE/8; i+=2) {
599 // Cannot align with AlignVector because offset is odd
600 b[i*4+0] = (short)(a[i*4+0] & mask);
601
602 b[i*4+3] = (short)(a[i*4+3] & mask);
603 b[i*4+4] = (short)(a[i*4+4] & mask);
604 b[i*4+5] = (short)(a[i*4+5] & mask);
605 b[i*4+6] = (short)(a[i*4+6] & mask);
606 }
607 return new Object[]{ a, b };
608 }
609
610 @Test
611 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
612 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
613 IRNode.STORE_VECTOR, "> 0"},
614 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
615 applyIfPlatform = {"64-bit", "true"},
616 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
617 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
618 IRNode.AND_VB, "= 0",
619 IRNode.STORE_VECTOR, "= 0"},
620 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
621 applyIfPlatform = {"64-bit", "true"},
622 applyIf = {"AlignVector", "true"})
623 static Object[] test8(byte[] a, byte[] b, byte mask, int init) {
624 for (int i = init; i < RANGE; i+=8) {
625 // Cannot align with AlignVector because of invariant (variable init becomes invar)
626 b[i+0] = (byte)(a[i+0] & mask);
627
628 b[i+3] = (byte)(a[i+3] & mask);
629 b[i+4] = (byte)(a[i+4] & mask);
630 b[i+5] = (byte)(a[i+5] & mask);
631 b[i+6] = (byte)(a[i+6] & mask);
632 }
633 return new Object[]{ a, b };
634 }
635
636 @Test
637 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
638 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
639 IRNode.STORE_VECTOR, "> 0"},
640 applyIf = {"MaxVectorSize", ">=8"},
641 applyIfPlatform = {"64-bit", "true"},
642 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
643 static Object[] test9(byte[] a, byte[] b, byte mask) {
644 // known non-zero init value does not affect offset, but has implicit effect on iv
645 for (int i = 13; i < RANGE-8; i+=8) {
646 b[i+0] = (byte)(a[i+0] & mask);
647
648 b[i+3] = (byte)(a[i+3] & mask);
649 b[i+4] = (byte)(a[i+4] & mask);
650 b[i+5] = (byte)(a[i+5] & mask);
651 b[i+6] = (byte)(a[i+6] & mask);
652 }
653 return new Object[]{ a, b };
654 }
655
656 @Test
657 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
658 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
659 IRNode.STORE_VECTOR, "> 0"},
660 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
661 applyIfPlatform = {"64-bit", "true"},
662 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
663 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
664 IRNode.AND_VB, "= 0",
665 IRNode.STORE_VECTOR, "= 0"},
666 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
667 applyIfPlatform = {"64-bit", "true"},
668 applyIf = {"AlignVector", "true"})
669 static Object[] test10a(byte[] a, byte[] b, byte mask) {
670 // This is not alignable with pre-loop, because of odd init.
671 for (int i = 3; i < RANGE-8; i+=8) {
672 b[i+0] = (byte)(a[i+0] & mask);
673 b[i+1] = (byte)(a[i+1] & mask);
674 b[i+2] = (byte)(a[i+2] & mask);
675 b[i+3] = (byte)(a[i+3] & mask);
676 }
677 return new Object[]{ a, b };
678 }
679
680 @Test
681 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
682 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
683 IRNode.STORE_VECTOR, "> 0"},
684 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
685 applyIfPlatform = {"64-bit", "true"},
686 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
687 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
688 IRNode.AND_VB, "= 0",
689 IRNode.STORE_VECTOR, "= 0"},
690 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
691 applyIfPlatform = {"64-bit", "true"},
692 applyIf = {"AlignVector", "true"})
693 static Object[] test10b(byte[] a, byte[] b, byte mask) {
694 // This is not alignable with pre-loop, because of odd init.
695 // Seems not correctly handled.
696 for (int i = 13; i < RANGE-8; i+=8) {
697 b[i+0] = (byte)(a[i+0] & mask);
698 b[i+1] = (byte)(a[i+1] & mask);
699 b[i+2] = (byte)(a[i+2] & mask);
700 b[i+3] = (byte)(a[i+3] & mask);
701 }
702 return new Object[]{ a, b };
703 }
704
705 @Test
706 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
707 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
708 IRNode.STORE_VECTOR, "> 0"},
709 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
710 applyIfPlatform = {"64-bit", "true"},
711 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
712 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
713 IRNode.AND_VS, "= 0",
714 IRNode.STORE_VECTOR, "= 0"},
715 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
716 applyIfPlatform = {"64-bit", "true"},
717 applyIf = {"AlignVector", "true"})
718 static Object[] test10c(short[] a, short[] b, short mask) {
719 // This is not alignable with pre-loop, because of odd init.
720 // Seems not correctly handled with MaxVectorSize >= 32.
721 for (int i = 13; i < RANGE-8; i+=8) {
722 b[i+0] = (short)(a[i+0] & mask);
723 b[i+1] = (short)(a[i+1] & mask);
724 b[i+2] = (short)(a[i+2] & mask);
725 b[i+3] = (short)(a[i+3] & mask);
726 }
727 return new Object[]{ a, b };
728 }
729
730 @Test
731 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
732 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
733 IRNode.STORE_VECTOR, "> 0"},
734 applyIf = {"MaxVectorSize", ">=16"},
735 applyIfPlatform = {"64-bit", "true"},
736 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
737 static Object[] test10d(short[] a, short[] b, short mask) {
738 for (int i = 13; i < RANGE-16; i+=8) {
739 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
740 b[i+0+3] = (short)(a[i+0+3] & mask);
741 b[i+1+3] = (short)(a[i+1+3] & mask);
742 b[i+2+3] = (short)(a[i+2+3] & mask);
743 b[i+3+3] = (short)(a[i+3+3] & mask);
744 }
745 return new Object[]{ a, b };
746 }
747
748 @Test
749 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
750 IRNode.AND_VB, "> 0",
751 IRNode.STORE_VECTOR, "> 0"},
752 applyIfPlatform = {"64-bit", "true"},
753 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
754 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
755 for (int i = 0; i < RANGE; i++) {
756 // always alignable
757 b[i+0] = (byte)(a[i+0] & mask);
758 }
759 return new Object[]{ a, b };
760 }
761
762 @Test
763 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
764 IRNode.AND_VS, "> 0",
765 IRNode.STORE_VECTOR, "> 0"},
766 applyIfPlatform = {"64-bit", "true"},
767 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
768 static Object[] test11aS(short[] a, short[] b, short mask) {
769 for (int i = 0; i < RANGE; i++) {
770 // always alignable
771 b[i+0] = (short)(a[i+0] & mask);
772 }
773 return new Object[]{ a, b };
774 }
775
776 @Test
777 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
778 IRNode.AND_VI, "> 0",
779 IRNode.STORE_VECTOR, "> 0"},
780 applyIfPlatform = {"64-bit", "true"},
781 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
782 static Object[] test11aI(int[] a, int[] b, int mask) {
783 for (int i = 0; i < RANGE; i++) {
784 // always alignable
785 b[i+0] = (int)(a[i+0] & mask);
786 }
787 return new Object[]{ a, b };
788 }
789
790 @Test
791 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
792 IRNode.AND_VL, "> 0",
793 IRNode.STORE_VECTOR, "> 0"},
794 applyIfPlatform = {"64-bit", "true"},
795 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
796 static Object[] test11aL(long[] a, long[] b, long mask) {
797 for (int i = 0; i < RANGE; i++) {
798 // always alignable
799 b[i+0] = (long)(a[i+0] & mask);
800 }
801 return new Object[]{ a, b };
802 }
803
804 @Test
805 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
806 IRNode.AND_VB, "> 0",
807 IRNode.STORE_VECTOR, "> 0"},
808 applyIfPlatform = {"64-bit", "true"},
809 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
810 static Object[] test11bB(byte[] a, byte[] b, byte mask) {
811 for (int i = 1; i < RANGE; i++) {
812 // always alignable
813 b[i+0] = (byte)(a[i+0] & mask);
814 }
815 return new Object[]{ a, b };
816 }
817
818 @Test
819 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
820 IRNode.AND_VS, "> 0",
821 IRNode.STORE_VECTOR, "> 0"},
822 applyIfPlatform = {"64-bit", "true"},
823 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
824 static Object[] test11bS(short[] a, short[] b, short mask) {
825 for (int i = 1; i < RANGE; i++) {
826 // always alignable
827 b[i+0] = (short)(a[i+0] & mask);
828 }
829 return new Object[]{ a, b };
830 }
831
832 @Test
833 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
834 IRNode.AND_VI, "> 0",
835 IRNode.STORE_VECTOR, "> 0"},
836 applyIfPlatform = {"64-bit", "true"},
837 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
838 static Object[] test11bI(int[] a, int[] b, int mask) {
839 for (int i = 1; i < RANGE; i++) {
840 // always alignable
841 b[i+0] = (int)(a[i+0] & mask);
842 }
843 return new Object[]{ a, b };
844 }
845
846 @Test
847 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
848 IRNode.AND_VL, "> 0",
849 IRNode.STORE_VECTOR, "> 0"},
850 applyIfPlatform = {"64-bit", "true"},
851 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
852 static Object[] test11bL(long[] a, long[] b, long mask) {
853 for (int i = 1; i < RANGE; i++) {
854 // always alignable
855 b[i+0] = (long)(a[i+0] & mask);
856 }
857 return new Object[]{ a, b };
858 }
859
860 @Test
861 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
862 IRNode.AND_VB, "> 0",
863 IRNode.STORE_VECTOR, "> 0"},
864 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
865 applyIfPlatform = {"64-bit", "true"},
866 applyIf = {"AlignVector", "false"})
867 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
868 IRNode.AND_VB, "= 0",
869 IRNode.STORE_VECTOR, "= 0"},
870 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
871 applyIfPlatform = {"64-bit", "true"},
872 applyIf = {"AlignVector", "true"})
873 static Object[] test11cB(byte[] a, byte[] b, byte mask) {
874 for (int i = 1; i < RANGE-1; i++) {
875 // 1 byte offset -> not alignable with AlignVector
876 b[i+0] = (byte)(a[i+1] & mask);
877 }
878 return new Object[]{ a, b };
879 }
880
881 @Test
882 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
883 IRNode.AND_VS, "> 0",
884 IRNode.STORE_VECTOR, "> 0"},
885 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
886 applyIfPlatform = {"64-bit", "true"},
887 applyIf = {"AlignVector", "false"})
888 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
889 IRNode.AND_VS, "= 0",
890 IRNode.STORE_VECTOR, "= 0"},
891 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
892 applyIfPlatform = {"64-bit", "true"},
893 applyIf = {"AlignVector", "true"})
894 static Object[] test11cS(short[] a, short[] b, short mask) {
895 for (int i = 1; i < RANGE-1; i++) {
896 // 2 byte offset -> not alignable with AlignVector
897 b[i+0] = (short)(a[i+1] & mask);
898 }
899 return new Object[]{ a, b };
900 }
901
902 @Test
903 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
904 IRNode.AND_VI, "> 0",
905 IRNode.STORE_VECTOR, "> 0"},
906 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
907 applyIfPlatform = {"64-bit", "true"},
908 applyIf = {"AlignVector", "false"})
909 @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
910 IRNode.AND_VI, "= 0",
911 IRNode.STORE_VECTOR, "= 0"},
912 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
913 applyIfPlatform = {"64-bit", "true"},
914 applyIf = {"AlignVector", "true"})
915 static Object[] test11cI(int[] a, int[] b, int mask) {
916 for (int i = 1; i < RANGE-1; i++) {
917 // 4 byte offset -> not alignable with AlignVector
918 b[i+0] = (int)(a[i+1] & mask);
919 }
920 return new Object[]{ a, b };
921 }
922
923 @Test
924 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
925 IRNode.AND_VL, "> 0",
926 IRNode.STORE_VECTOR, "> 0"},
927 applyIfPlatform = {"64-bit", "true"},
928 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
929 static Object[] test11cL(long[] a, long[] b, long mask) {
930 for (int i = 1; i < RANGE-1; i++) {
931 // always alignable (8 byte offset)
932 b[i+0] = (long)(a[i+1] & mask);
933 }
934 return new Object[]{ a, b };
935 }
936
937 @Test
938 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
939 IRNode.AND_VB, "> 0",
940 IRNode.STORE_VECTOR, "> 0"},
941 applyIfPlatform = {"64-bit", "true"},
942 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
943 static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) {
944 for (int i = 0; i < RANGE; i++) {
945 b[i+0+invar] = (byte)(a[i+0+invar] & mask);
946 }
947 return new Object[]{ a, b };
948 }
949
950 @Test
951 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
952 IRNode.AND_VS, "> 0",
953 IRNode.STORE_VECTOR, "> 0"},
954 applyIfPlatform = {"64-bit", "true"},
955 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
956 static Object[] test11dS(short[] a, short[] b, short mask, int invar) {
957 for (int i = 0; i < RANGE; i++) {
958 b[i+0+invar] = (short)(a[i+0+invar] & mask);
959 }
960 return new Object[]{ a, b };
961 }
962
963 @Test
964 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
965 IRNode.AND_VI, "> 0",
966 IRNode.STORE_VECTOR, "> 0"},
967 applyIfPlatform = {"64-bit", "true"},
968 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
969 static Object[] test11dI(int[] a, int[] b, int mask, int invar) {
970 for (int i = 0; i < RANGE; i++) {
971 b[i+0+invar] = (int)(a[i+0+invar] & mask);
972 }
973 return new Object[]{ a, b };
974 }
975
976 @Test
977 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
978 IRNode.AND_VL, "> 0",
979 IRNode.STORE_VECTOR, "> 0"},
980 applyIfPlatform = {"64-bit", "true"},
981 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
982 static Object[] test11dL(long[] a, long[] b, long mask, int invar) {
983 for (int i = 0; i < RANGE; i++) {
984 b[i+0+invar] = (long)(a[i+0+invar] & mask);
985 }
986 return new Object[]{ a, b };
987 }
988
989 @Test
990 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
991 IRNode.AND_VB, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
992 IRNode.STORE_VECTOR, "> 0"},
993 applyIfPlatform = {"64-bit", "true"},
994 applyIf = {"AlignVector", "false"},
995 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
996 static Object[] test12(byte[] a, byte[] b, byte mask) {
997 for (int i = 0; i < RANGE/16; i++) {
998 // Non-power-of-2 stride. Vectorization of 4 bytes, then 2-bytes gap.
999 b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask);
1000 b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask);
1001 b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask);
1002 b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask);
1003 }
1004 return new Object[]{ a, b };
1005 }
1006
1007 @Test
1008 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1009 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1010 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1011 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1012 IRNode.STORE_VECTOR, "> 0"},
1013 applyIfPlatform = {"64-bit", "true"},
1014 applyIfCPUFeatureOr = {"avx2", "true", "rvv", "true"})
1015 // require avx to ensure vectors are larger than what unrolling produces
1016 static Object[] test13aIL(int[] a, long[] b) {
1017 for (int i = 0; i < RANGE; i++) {
1018 a[i]++;
1019 b[i]++;
1020 }
1021 return new Object[]{ a, b };
1022 }
1023
1024 @Test
1025 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1026 IRNode.LOAD_VECTOR_I, "> 0",
1027 IRNode.ADD_VB, "> 0",
1028 IRNode.ADD_VI, "> 0",
1029 IRNode.STORE_VECTOR, "> 0"},
1030 applyIfPlatform = {"64-bit", "true"},
1031 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1032 static Object[] test13aIB(int[] a, byte[] b) {
1033 for (int i = 0; i < RANGE; i++) {
1034 a[i]++;
1035 b[i]++;
1036 }
1037 return new Object[]{ a, b };
1038 }
1039
1040 @Test
1041 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1042 IRNode.LOAD_VECTOR_S, "> 0",
1043 IRNode.ADD_VI, "> 0",
1044 IRNode.ADD_VS, "> 0",
1045 IRNode.STORE_VECTOR, "> 0"},
1046 applyIfPlatform = {"64-bit", "true"},
1047 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1048 static Object[] test13aIS(int[] a, short[] b) {
1049 for (int i = 0; i < RANGE; i++) {
1050 a[i]++;
1051 b[i]++;
1052 }
1053 return new Object[]{ a, b };
1054 }
1055
1056 @Test
1057 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1058 IRNode.LOAD_VECTOR_S, "> 0",
1059 IRNode.LOAD_VECTOR_I, "> 0",
1060 IRNode.LOAD_VECTOR_L, "> 0",
1061 IRNode.ADD_VB, "> 0",
1062 IRNode.ADD_VS, "> 0",
1063 IRNode.ADD_VI, "> 0",
1064 IRNode.ADD_VL, "> 0",
1065 IRNode.STORE_VECTOR, "> 0"},
1066 applyIfPlatform = {"64-bit", "true"},
1067 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1068 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1069 for (int i = 0; i < RANGE; i++) {
1070 a[i]++;
1071 b[i]++;
1072 c[i]++;
1073 d[i]++;
1074 }
1075 return new Object[]{ a, b, c, d };
1076 }
1077
1078 @Test
1079 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1080 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1081 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1082 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1083 IRNode.STORE_VECTOR, "> 0"},
1084 applyIfPlatform = {"64-bit", "true"},
1085 applyIfCPUFeatureOr = {"avx2", "true", "rvv", "true"})
1086 // require avx to ensure vectors are larger than what unrolling produces
1087 static Object[] test13bIL(int[] a, long[] b) {
1088 for (int i = 1; i < RANGE; i++) {
1089 a[i]++;
1090 b[i]++;
1091 }
1092 return new Object[]{ a, b };
1093 }
1094
1095 @Test
1096 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1097 IRNode.LOAD_VECTOR_I, "> 0",
1098 IRNode.ADD_VB, "> 0",
1099 IRNode.ADD_VI, "> 0",
1100 IRNode.STORE_VECTOR, "> 0"},
1101 applyIfPlatform = {"64-bit", "true"},
1102 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1103 static Object[] test13bIB(int[] a, byte[] b) {
1104 for (int i = 1; i < RANGE; i++) {
1105 a[i]++;
1106 b[i]++;
1107 }
1108 return new Object[]{ a, b };
1109 }
1110
1111 @Test
1112 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1113 IRNode.LOAD_VECTOR_S, "> 0",
1114 IRNode.ADD_VI, "> 0",
1115 IRNode.ADD_VS, "> 0",
1116 IRNode.STORE_VECTOR, "> 0"},
1117 applyIfPlatform = {"64-bit", "true"},
1118 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1119 static Object[] test13bIS(int[] a, short[] b) {
1120 for (int i = 1; i < RANGE; i++) {
1121 a[i]++;
1122 b[i]++;
1123 }
1124 return new Object[]{ a, b };
1125 }
1126
1127 @Test
1128 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1129 IRNode.LOAD_VECTOR_S, "> 0",
1130 IRNode.LOAD_VECTOR_I, "> 0",
1131 IRNode.LOAD_VECTOR_L, "> 0",
1132 IRNode.ADD_VB, "> 0",
1133 IRNode.ADD_VS, "> 0",
1134 IRNode.ADD_VI, "> 0",
1135 IRNode.ADD_VL, "> 0",
1136 IRNode.STORE_VECTOR, "> 0"},
1137 applyIfPlatform = {"64-bit", "true"},
1138 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1139 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1140 for (int i = 1; i < RANGE; i++) {
1141 a[i]++;
1142 b[i]++;
1143 c[i]++;
1144 d[i]++;
1145 }
1146 return new Object[]{ a, b, c, d };
1147 }
1148
1149 @Test
1150 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1151 IRNode.ADD_VB, "= 0",
1152 IRNode.STORE_VECTOR, "= 0"},
1153 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1154 applyIfPlatform = {"64-bit", "true"},
1155 applyIf = {"AlignVector", "false"})
1156 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1157 IRNode.ADD_VB, "= 0",
1158 IRNode.STORE_VECTOR, "= 0"},
1159 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1160 applyIfPlatform = {"64-bit", "true"},
1161 applyIf = {"AlignVector", "true"})
1162 static Object[] test14aB(byte[] a) {
1163 // non-power-of-2 stride
1164 for (int i = 0; i < RANGE-20; i+=9) {
1165 // Since the stride is shorter than the vector length, there will be always
1166 // partial overlap of loads with previous stores, this leads to failure in
1167 // store-to-load-forwarding -> vectorization not profitable.
1168 a[i+0]++;
1169 a[i+1]++;
1170 a[i+2]++;
1171 a[i+3]++;
1172 a[i+4]++;
1173 a[i+5]++;
1174 a[i+6]++;
1175 a[i+7]++;
1176 a[i+8]++;
1177 a[i+9]++;
1178 a[i+10]++;
1179 a[i+11]++;
1180 a[i+12]++;
1181 a[i+13]++;
1182 a[i+14]++;
1183 a[i+15]++;
1184 }
1185 return new Object[]{ a };
1186 }
1187
1188 @Test
1189 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1190 IRNode.ADD_VB, "= 0",
1191 IRNode.STORE_VECTOR, "= 0"},
1192 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1193 applyIfPlatform = {"64-bit", "true"},
1194 applyIf = {"AlignVector", "false"})
1195 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1196 IRNode.ADD_VB, "= 0",
1197 IRNode.STORE_VECTOR, "= 0"},
1198 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1199 applyIfPlatform = {"64-bit", "true"},
1200 applyIf = {"AlignVector", "true"})
1201 static Object[] test14bB(byte[] a) {
1202 // non-power-of-2 stride
1203 for (int i = 0; i < RANGE-20; i+=3) {
1204 // Since the stride is shorter than the vector length, there will be always
1205 // partial overlap of loads with previous stores, this leads to failure in
1206 // store-to-load-forwarding -> vectorization not profitable.
1207 a[i+0]++;
1208 a[i+1]++;
1209 a[i+2]++;
1210 a[i+3]++;
1211 a[i+4]++;
1212 a[i+5]++;
1213 a[i+6]++;
1214 a[i+7]++;
1215 a[i+8]++;
1216 a[i+9]++;
1217 a[i+10]++;
1218 a[i+11]++;
1219 a[i+12]++;
1220 a[i+13]++;
1221 a[i+14]++;
1222 a[i+15]++;
1223 }
1224 return new Object[]{ a };
1225 }
1226
1227 @Test
1228 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1229 IRNode.ADD_VB, "= 0",
1230 IRNode.STORE_VECTOR, "= 0"},
1231 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1232 applyIfPlatform = {"64-bit", "true"},
1233 applyIf = {"AlignVector", "false"})
1234 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1235 IRNode.ADD_VB, "= 0",
1236 IRNode.STORE_VECTOR, "= 0"},
1237 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1238 applyIfPlatform = {"64-bit", "true"},
1239 applyIf = {"AlignVector", "true"})
1240 static Object[] test14cB(byte[] a) {
1241 // non-power-of-2 stride
1242 for (int i = 0; i < RANGE-20; i+=5) {
1243 // Since the stride is shorter than the vector length, there will be always
1244 // partial overlap of loads with previous stores, this leads to failure in
1245 // store-to-load-forwarding -> vectorization not profitable.
1246 a[i+0]++;
1247 a[i+1]++;
1248 a[i+2]++;
1249 a[i+3]++;
1250 a[i+4]++;
1251 a[i+5]++;
1252 a[i+6]++;
1253 a[i+7]++;
1254 a[i+8]++;
1255 a[i+9]++;
1256 a[i+10]++;
1257 a[i+11]++;
1258 a[i+12]++;
1259 a[i+13]++;
1260 a[i+14]++;
1261 a[i+15]++;
1262 }
1263 return new Object[]{ a };
1264 }
1265
1266 @Test
1267 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1268 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1269 IRNode.STORE_VECTOR, "> 0"},
1270 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1271 applyIfPlatform = {"64-bit", "true"},
1272 applyIf = {"AlignVector", "false"})
1273 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1274 IRNode.ADD_VB, "= 0",
1275 IRNode.STORE_VECTOR, "= 0"},
1276 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1277 applyIfPlatform = {"64-bit", "true"},
1278 applyIf = {"AlignVector", "true"})
1279 static Object[] test14dB(byte[] a) {
1280 // non-power-of-2 stride
1281 for (int i = 0; i < RANGE-20; i+=9) {
1282 a[i+0]++;
1283 a[i+1]++;
1284 a[i+2]++;
1285 a[i+3]++;
1286 a[i+4]++;
1287 a[i+5]++;
1288 a[i+6]++;
1289 a[i+7]++;
1290 }
1291 return new Object[]{ a };
1292 }
1293
1294 @Test
1295 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1296 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1297 IRNode.STORE_VECTOR, "> 0"},
1298 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1299 applyIfPlatform = {"64-bit", "true"},
1300 applyIf = {"AlignVector", "false"})
1301 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1302 IRNode.ADD_VB, "= 0",
1303 IRNode.STORE_VECTOR, "= 0"},
1304 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1305 applyIfPlatform = {"64-bit", "true"},
1306 applyIf = {"AlignVector", "true"})
1307 static Object[] test14eB(byte[] a) {
1308 // non-power-of-2 stride
1309 for (int i = 0; i < RANGE-32; i+=11) {
1310 a[i+0]++;
1311 a[i+1]++;
1312 a[i+2]++;
1313 a[i+3]++;
1314 a[i+4]++;
1315 a[i+5]++;
1316 a[i+6]++;
1317 a[i+7]++;
1318 }
1319 return new Object[]{ a };
1320 }
1321
1322 @Test
1323 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1324 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1325 IRNode.STORE_VECTOR, "> 0"},
1326 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1327 applyIfPlatform = {"64-bit", "true"},
1328 applyIf = {"AlignVector", "false"})
1329 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1330 IRNode.ADD_VB, "= 0",
1331 IRNode.STORE_VECTOR, "= 0"},
1332 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1333 applyIfPlatform = {"64-bit", "true"},
1334 applyIf = {"AlignVector", "true"})
1335 static Object[] test14fB(byte[] a) {
1336 // non-power-of-2 stride
1337 for (int i = 0; i < RANGE-40; i+=12) {
1338 a[i+0]++;
1339 a[i+1]++;
1340 a[i+2]++;
1341 a[i+3]++;
1342 a[i+4]++;
1343 a[i+5]++;
1344 a[i+6]++;
1345 a[i+7]++;
1346 }
1347 return new Object[]{ a };
1348 }
1349
1350 @Test
1351 // IR rules difficult because of modulo wrapping with offset after peeling.
1352 static Object[] test15aB(byte[] a) {
1353 // non-power-of-2 scale
1354 for (int i = 0; i < RANGE/64-20; i++) {
1355 a[53*i+0]++;
1356 a[53*i+1]++;
1357 a[53*i+2]++;
1358 a[53*i+3]++;
1359 a[53*i+4]++;
1360 a[53*i+5]++;
1361 a[53*i+6]++;
1362 a[53*i+7]++;
1363 a[53*i+8]++;
1364 a[53*i+9]++;
1365 a[53*i+10]++;
1366 a[53*i+11]++;
1367 a[53*i+12]++;
1368 a[53*i+13]++;
1369 a[53*i+14]++;
1370 a[53*i+15]++;
1371 }
1372 return new Object[]{ a };
1373 }
1374
1375 @Test
1376 // IR rules difficult because of modulo wrapping with offset after peeling.
1377 static Object[] test15bB(byte[] a) {
1378 // non-power-of-2 scale
1379 for (int i = 0; i < RANGE/64-20; i++) {
1380 a[25*i+0]++;
1381 a[25*i+1]++;
1382 a[25*i+2]++;
1383 a[25*i+3]++;
1384 a[25*i+4]++;
1385 a[25*i+5]++;
1386 a[25*i+6]++;
1387 a[25*i+7]++;
1388 a[25*i+8]++;
1389 a[25*i+9]++;
1390 a[25*i+10]++;
1391 a[25*i+11]++;
1392 a[25*i+12]++;
1393 a[25*i+13]++;
1394 a[25*i+14]++;
1395 a[25*i+15]++;
1396 }
1397 return new Object[]{ a };
1398 }
1399
1400 @Test
1401 // IR rules difficult because of modulo wrapping with offset after peeling.
1402 static Object[] test15cB(byte[] a) {
1403 // non-power-of-2 scale
1404 for (int i = 0; i < RANGE/64-20; i++) {
1405 a[19*i+0]++;
1406 a[19*i+1]++;
1407 a[19*i+2]++;
1408 a[19*i+3]++;
1409 a[19*i+4]++;
1410 a[19*i+5]++;
1411 a[19*i+6]++;
1412 a[19*i+7]++;
1413 a[19*i+8]++;
1414 a[19*i+9]++;
1415 a[19*i+10]++;
1416 a[19*i+11]++;
1417 a[19*i+12]++;
1418 a[19*i+13]++;
1419 a[19*i+14]++;
1420 a[19*i+15]++;
1421 }
1422 return new Object[]{ a };
1423 }
1424
1425 @Test
1426 static Object[] test16a(byte[] a, short[] b) {
1427 // infinite loop issues
1428 for (int i = 0; i < RANGE/2-20; i++) {
1429 a[2*i+0]++;
1430 a[2*i+1]++;
1431 a[2*i+2]++;
1432 a[2*i+3]++;
1433 a[2*i+4]++;
1434 a[2*i+5]++;
1435 a[2*i+6]++;
1436 a[2*i+7]++;
1437 a[2*i+8]++;
1438 a[2*i+9]++;
1439 a[2*i+10]++;
1440 a[2*i+11]++;
1441 a[2*i+12]++;
1442 a[2*i+13]++;
1443 a[2*i+14]++;
1444
1445 b[2*i+0]++;
1446 b[2*i+1]++;
1447 b[2*i+2]++;
1448 b[2*i+3]++;
1449 }
1450 return new Object[]{ a, b };
1451 }
1452
1453 @Test
1454 static Object[] test16b(byte[] a) {
1455 // infinite loop issues
1456 for (int i = 0; i < RANGE/2-20; i++) {
1457 a[2*i+0]++;
1458 a[2*i+1]++;
1459 a[2*i+2]++;
1460 a[2*i+3]++;
1461 a[2*i+4]++;
1462 a[2*i+5]++;
1463 a[2*i+6]++;
1464 a[2*i+7]++;
1465 a[2*i+8]++;
1466 a[2*i+9]++;
1467 a[2*i+10]++;
1468 a[2*i+11]++;
1469 a[2*i+12]++;
1470 a[2*i+13]++;
1471 a[2*i+14]++;
1472 }
1473 return new Object[]{ a };
1474 }
1475
1476 @Test
1477 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
1478 IRNode.ADD_VL, "> 0",
1479 IRNode.STORE_VECTOR, "> 0"},
1480 applyIfPlatform = {"64-bit", "true"},
1481 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1482 static Object[] test17a(long[] a) {
1483 // Unsafe: vectorizes with profiling (not xcomp)
1484 for (int i = 0; i < RANGE; i++) {
1485 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1486 long v = UNSAFE.getLongUnaligned(a, adr);
1487 UNSAFE.putLongUnaligned(a, adr, v + 1);
1488 }
1489 return new Object[]{ a };
1490 }
1491
1492 @Test
1493 // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs.
1494 static Object[] test17b(long[] a) {
1495 // Not alignable
1496 for (int i = 0; i < RANGE-1; i++) {
1497 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1498 long v = UNSAFE.getLongUnaligned(a, adr);
1499 UNSAFE.putLongUnaligned(a, adr, v + 1);
1500 }
1501 return new Object[]{ a };
1502 }
1503
1504 @Test
1505 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1506 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
1507 IRNode.STORE_VECTOR, "> 0"},
1508 applyIf = {"MaxVectorSize", ">=32"},
1509 applyIfPlatform = {"64-bit", "true"},
1510 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1511 static Object[] test17c(long[] a) {
1512 // Unsafe: aligned vectorizes
1513 for (int i = 0; i < RANGE-1; i+=4) {
1514 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1515 long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1516 long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1517 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1518 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1519 }
1520 return new Object[]{ a };
1521 }
1522
1523 @Test
1524 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1525 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
1526 IRNode.STORE_VECTOR, "> 0"},
1527 applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true", "rvv", "true"},
1528 applyIfPlatform = {"64-bit", "true"},
1529 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"})
1530 // Ensure vector width is large enough to fit 64 byte for longs:
1531 // The offsets are: 25, 33, 57, 65
1532 // In modulo 32: 25, 1, 25, 1 -> does not vectorize
1533 // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes
1534 // This problem is because we compute modulo vector width in memory_alignment.
1535 @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0",
1536 IRNode.ADD_VL, "= 0",
1537 IRNode.STORE_VECTOR, "= 0"},
1538 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1539 applyIfPlatform = {"64-bit", "true"},
1540 applyIf = {"AlignVector", "true"})
1541 static Object[] test17d(long[] a) {
1542 // Not alignable
1543 for (int i = 0; i < RANGE-1; i+=4) {
1544 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1545 long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1546 long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1547 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1548 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1549 }
1550 return new Object[]{ a };
1551 }
1552
1553 @Test
1554 static Object[] test18a(byte[] a, int[] b) {
1555 // scale = 0 --> no iv
1556 for (int i = 0; i < RANGE; i++) {
1557 a[0] = 1;
1558 b[i] = 2;
1559 a[1] = 1;
1560 }
1561 return new Object[]{ a, b };
1562 }
1563
1564 @Test
1565 static Object[] test18b(byte[] a, int[] b) {
1566 // scale = 0 --> no iv
1567 for (int i = 0; i < RANGE; i++) {
1568 a[1] = 1;
1569 b[i] = 2;
1570 a[2] = 1;
1571 }
1572 return new Object[]{ a, b };
1573 }
1574
1575 @Test
1576 static Object[] test19(int[] a, int[] b) {
1577 for (int i = 5000; i > 0; i--) {
1578 a[RANGE_FINAL - i] = b[RANGE_FINAL - i];
1579 }
1580 return new Object[]{ a, b };
1581 }
1582
1583 @Test
1584 static Object[] test20(byte[] a) {
1585 // Example where it is easy to pass alignment check,
1586 // but used to fail the alignment calculation
1587 for (int i = 1; i < RANGE/2-50; i++) {
1588 a[2*i+0+30]++;
1589 a[2*i+1+30]++;
1590 a[2*i+2+30]++;
1591 a[2*i+3+30]++;
1592 }
1593 return new Object[]{ a };
1594 }
1595 }