1 /*
2 * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package compiler.loopopts.superword;
25
26 import compiler.lib.ir_framework.*;
27 import jdk.test.lib.Utils;
28 import jdk.test.whitebox.WhiteBox;
29 import jdk.internal.misc.Unsafe;
30 import java.lang.reflect.Array;
31 import java.util.Map;
32 import java.util.HashMap;
33 import java.util.Random;
34 import java.nio.ByteOrder;
35
36 /*
37 * @test id=NoAlignVector
38 * @bug 8310190
39 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
40 * @modules java.base/jdk.internal.misc
41 * @library /test/lib /
42 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector
43 */
44
45 /*
46 * @test id=AlignVector
47 * @bug 8310190
48 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
49 * @modules java.base/jdk.internal.misc
50 * @library /test/lib /
51 * @run driver compiler.loopopts.superword.TestAlignVector AlignVector
52 */
53
54 /*
55 * @test id=VerifyAlignVector
56 * @bug 8310190
57 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
58 * @modules java.base/jdk.internal.misc
59 * @library /test/lib /
60 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector
61 */
62
63 /*
64 * @test id=NoAlignVector-COH
65 * @bug 8310190
66 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
67 * @modules java.base/jdk.internal.misc
68 * @library /test/lib /
69 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector-COH
70 */
71
72 /*
73 * @test id=VerifyAlignVector-COH
74 * @bug 8310190
75 * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
76 * @modules java.base/jdk.internal.misc
77 * @library /test/lib /
78 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector-COH
79 */
80
81 public class TestAlignVector {
82 static int RANGE = 1024*8;
83 static int RANGE_FINAL = 1024*8;
84 private static final Unsafe UNSAFE = Unsafe.getUnsafe();
85 private static final Random RANDOM = Utils.getRandomInstance();
86
87 // Inputs
88 byte[] aB;
89 byte[] bB;
90 byte mB = (byte)31;
91 short[] aS;
92 short[] bS;
93 short mS = (short)0xF0F0;
94 int[] aI;
95 int[] bI;
96 int mI = 0xF0F0F0F0;
97 long[] aL;
98 long[] bL;
99 long mL = 0xF0F0F0F0F0F0F0F0L;
100
101 // List of tests
102 Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
103
104 // List of gold, the results from the first run before compilation
105 Map<String,Object[]> golds = new HashMap<String,Object[]>();
106
107 interface TestFunction {
108 Object[] run();
109 }
110
111 public static void main(String[] args) {
112 TestFramework framework = new TestFramework(TestAlignVector.class);
113 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
114 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250");
115
116 switch (args[0]) {
117 case "NoAlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); }
118 case "AlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); }
119 case "VerifyAlignVector" -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
120 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
121 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
122 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
123 }
124 framework.start();
125 }
126
127 public TestAlignVector() {
128 // Generate input once
129 aB = generateB();
130 bB = generateB();
131 aS = generateS();
132 bS = generateS();
133 aI = generateI();
134 bI = generateI();
135 aL = generateL();
136 bL = generateL();
137
138 // Add all tests to list
139 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); });
140 tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); });
141 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); });
142 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); });
143 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); });
144 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
145 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); });
146 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); });
147 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
149 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); });
150
151 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); });
152 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); });
153 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); });
154 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); });
155
156 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); });
157 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); });
158 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); });
159 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); });
160
161 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); });
162 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); });
163 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); });
164 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); });
165
166 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); });
167 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); });
168 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); });
169 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); });
170
171 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
172 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
173 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
174 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
175
176 tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); });
177
178 tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); });
179 tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); });
180 tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); });
181 tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
182
183 tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); });
184 tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); });
185 tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); });
186 tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
187
188 tests.put("test14aB", () -> { return test14aB(aB.clone()); });
189 tests.put("test14bB", () -> { return test14bB(aB.clone()); });
190 tests.put("test14cB", () -> { return test14cB(aB.clone()); });
191 tests.put("test14dB", () -> { return test14dB(aB.clone()); });
192 tests.put("test14eB", () -> { return test14eB(aB.clone()); });
193 tests.put("test14fB", () -> { return test14fB(aB.clone()); });
194
195 tests.put("test15aB", () -> { return test15aB(aB.clone()); });
196 tests.put("test15bB", () -> { return test15bB(aB.clone()); });
197 tests.put("test15cB", () -> { return test15cB(aB.clone()); });
198
199 tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); });
200 tests.put("test16b", () -> { return test16b(aB.clone()); });
201
202 tests.put("test17a", () -> { return test17a(aL.clone()); });
203 tests.put("test17b", () -> { return test17b(aL.clone()); });
204 tests.put("test17c", () -> { return test17c(aL.clone()); });
205 tests.put("test17d", () -> { return test17d(aL.clone()); });
206
207 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); });
208 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); });
209
210 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); });
211 tests.put("test20", () -> { return test20(aB.clone()); });
212
213 // Compute gold value for all test methods before compilation
214 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
215 String name = entry.getKey();
216 TestFunction test = entry.getValue();
217 Object[] gold = test.run();
218 golds.put(name, gold);
219 }
220 }
221
222 @Warmup(100)
223 @Run(test = {"test0",
224 "test1",
225 "test2",
226 "test3",
227 "test4",
228 "test5",
229 "test6",
230 "test7",
231 "test8",
232 "test9",
233 "test10a",
234 "test10b",
235 "test10c",
236 "test10d",
237 "test11aB",
238 "test11aS",
239 "test11aI",
240 "test11aL",
241 "test11bB",
242 "test11bS",
243 "test11bI",
244 "test11bL",
245 "test11cB",
246 "test11cS",
247 "test11cI",
248 "test11cL",
249 "test11dB",
250 "test11dS",
251 "test11dI",
252 "test11dL",
253 "test12",
254 "test13aIL",
255 "test13aIB",
256 "test13aIS",
257 "test13aBSIL",
258 "test13bIL",
259 "test13bIB",
260 "test13bIS",
261 "test13bBSIL",
262 "test14aB",
263 "test14bB",
264 "test14cB",
265 "test14dB",
266 "test14eB",
267 "test14fB",
268 "test15aB",
269 "test15bB",
270 "test15cB",
271 "test16a",
272 "test16b",
273 "test17a",
274 "test17b",
275 "test17c",
276 "test17d",
277 "test18a",
278 "test18b",
279 "test19",
280 "test20"})
281 public void runTests() {
282 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
283 String name = entry.getKey();
284 TestFunction test = entry.getValue();
285 // Recall gold value from before compilation
286 Object[] gold = golds.get(name);
287 // Compute new result
288 Object[] result = test.run();
289 // Compare gold and new result
290 verify(name, gold, result);
291 }
292 }
293
294 static byte[] generateB() {
295 byte[] a = new byte[RANGE];
296 for (int i = 0; i < a.length; i++) {
297 a[i] = (byte)RANDOM.nextInt();
298 }
299 return a;
300 }
301
302 static short[] generateS() {
303 short[] a = new short[RANGE];
304 for (int i = 0; i < a.length; i++) {
305 a[i] = (short)RANDOM.nextInt();
306 }
307 return a;
308 }
309
310 static int[] generateI() {
311 int[] a = new int[RANGE];
312 for (int i = 0; i < a.length; i++) {
313 a[i] = RANDOM.nextInt();
314 }
315 return a;
316 }
317
318 static long[] generateL() {
319 long[] a = new long[RANGE];
320 for (int i = 0; i < a.length; i++) {
321 a[i] = RANDOM.nextLong();
322 }
323 return a;
324 }
325
326 static void verify(String name, Object[] gold, Object[] result) {
327 if (gold.length != result.length) {
328 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
329 gold.length + ", result.length = " + result.length);
330 }
331 for (int i = 0; i < gold.length; i++) {
332 Object g = gold[i];
333 Object r = result[i];
334 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
335 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
336 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
337 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
338 }
339 if (g == r) {
340 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
341 " gold[" + i + "] == result[" + i + "]");
342 }
343 if (Array.getLength(g) != Array.getLength(r)) {
344 throw new RuntimeException("verify " + name + ": arrays must have same length:" +
345 " gold[" + i + "].length = " + Array.getLength(g) +
346 " result[" + i + "].length = " + Array.getLength(r));
347 }
348 Class c = g.getClass().getComponentType();
349 if (c == byte.class) {
350 verifyB(name, i, (byte[])g, (byte[])r);
351 } else if (c == short.class) {
352 verifyS(name, i, (short[])g, (short[])r);
353 } else if (c == int.class) {
354 verifyI(name, i, (int[])g, (int[])r);
355 } else if (c == long.class) {
356 verifyL(name, i, (long[])g, (long[])r);
357 } else {
358 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
359 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
360 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
361 }
362 }
363 }
364
365 static void verifyB(String name, int i, byte[] g, byte[] r) {
366 for (int j = 0; j < g.length; j++) {
367 if (g[j] != r[j]) {
368 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
369 " gold[" + i + "][" + j + "] = " + g[j] +
370 " result[" + i + "][" + j + "] = " + r[j]);
371 }
372 }
373 }
374
375 static void verifyS(String name, int i, short[] g, short[] r) {
376 for (int j = 0; j < g.length; j++) {
377 if (g[j] != r[j]) {
378 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
379 " gold[" + i + "][" + j + "] = " + g[j] +
380 " result[" + i + "][" + j + "] = " + r[j]);
381 }
382 }
383 }
384
385 static void verifyI(String name, int i, int[] g, int[] r) {
386 for (int j = 0; j < g.length; j++) {
387 if (g[j] != r[j]) {
388 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
389 " gold[" + i + "][" + j + "] = " + g[j] +
390 " result[" + i + "][" + j + "] = " + r[j]);
391 }
392 }
393 }
394
395 static void verifyL(String name, int i, long[] g, long[] r) {
396 for (int j = 0; j < g.length; j++) {
397 if (g[j] != r[j]) {
398 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
399 " gold[" + i + "][" + j + "] = " + g[j] +
400 " result[" + i + "][" + j + "] = " + r[j]);
401 }
402 }
403 }
404
405 @Test
406 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
407 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
408 IRNode.STORE_VECTOR, "> 0"},
409 applyIf = {"MaxVectorSize", ">=8"},
410 applyIfPlatform = {"64-bit", "true"},
411 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
412 static Object[] test0(byte[] a, byte[] b, byte mask) {
413 for (int i = 0; i < RANGE; i+=8) {
414 // Safe to vectorize with AlignVector
415 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
416 b[i+1] = (byte)(a[i+1] & mask);
417 b[i+2] = (byte)(a[i+2] & mask);
418 b[i+3] = (byte)(a[i+3] & mask);
419 }
420 return new Object[]{ a, b };
421 }
422
423 @Test
424 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
425 IRNode.AND_VB, "> 0",
426 IRNode.STORE_VECTOR, "> 0"},
427 applyIfPlatform = {"64-bit", "true"},
428 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
429 static Object[] test1(byte[] a, byte[] b, byte mask) {
430 for (int i = 0; i < RANGE; i+=8) {
431 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
432 b[i+1] = (byte)(a[i+1] & mask);
433 b[i+2] = (byte)(a[i+2] & mask);
434 b[i+3] = (byte)(a[i+3] & mask);
435 b[i+4] = (byte)(a[i+4] & mask);
436 b[i+5] = (byte)(a[i+5] & mask);
437 b[i+6] = (byte)(a[i+6] & mask);
438 b[i+7] = (byte)(a[i+7] & mask);
439 }
440 return new Object[]{ a, b };
441 }
442
443 @Test
444 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
445 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
446 IRNode.STORE_VECTOR, "> 0"},
447 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
448 applyIfPlatform = {"64-bit", "true"},
449 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
450 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
451 IRNode.AND_VB, "= 0",
452 IRNode.STORE_VECTOR, "= 0"},
453 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
454 applyIfPlatform = {"64-bit", "true"},
455 applyIf = {"AlignVector", "true"})
456 static Object[] test2(byte[] a, byte[] b, byte mask) {
457 for (int i = 0; i < RANGE; i+=8) {
458 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
459 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
460 b[i+4] = (byte)(a[i+4] & mask);
461 b[i+5] = (byte)(a[i+5] & mask);
462 b[i+6] = (byte)(a[i+6] & mask);
463 }
464 return new Object[]{ a, b };
465 }
466
467 @Test
468 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
469 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
470 IRNode.STORE_VECTOR, "> 0"},
471 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
472 applyIfPlatform = {"64-bit", "true"},
473 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
474 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
475 IRNode.AND_VB, "= 0",
476 IRNode.STORE_VECTOR, "= 0"},
477 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
478 applyIfPlatform = {"64-bit", "true"},
479 applyIf = {"AlignVector", "true"})
480 static Object[] test3(byte[] a, byte[] b, byte mask) {
481 for (int i = 0; i < RANGE; i+=8) {
482 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
483
484 // Problematic for AlignVector
485 b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0
486
487 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes
488 b[i+4] = (byte)(a[i+4] & mask);
489 b[i+5] = (byte)(a[i+5] & mask);
490 b[i+6] = (byte)(a[i+6] & mask);
491 }
492 return new Object[]{ a, b };
493 }
494
495 @Test
496 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
497 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0",
498 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
499 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0",
500 IRNode.STORE_VECTOR, "> 0"},
501 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
502 applyIfPlatform = {"64-bit", "true"},
503 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
504 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
505 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
506 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
507 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
508 IRNode.STORE_VECTOR, "> 0"},
509 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
510 applyIfPlatform = {"64-bit", "true"},
511 applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"})
512 static Object[] test4(byte[] a, byte[] b, byte mask) {
513 for (int i = 0; i < RANGE/16; i++) {
514 // Problematic for AlignVector
515 b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned
516 b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask);
517 b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask);
518 b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask);
519
520 b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned
521 b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask);
522 b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask);
523 b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask);
524 b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask);
525 b[i*16 + 10] = (byte)(a[i*16 + 10] & mask);
526 b[i*16 + 11] = (byte)(a[i*16 + 11] & mask);
527 b[i*16 + 12] = (byte)(a[i*16 + 12] & mask);
528 }
529 return new Object[]{ a, b };
530 }
531
532 @Test
533 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
534 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
535 IRNode.STORE_VECTOR, "> 0"},
536 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
537 applyIfPlatform = {"64-bit", "true"},
538 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
539 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
540 IRNode.AND_VB, "= 0",
541 IRNode.STORE_VECTOR, "= 0"},
542 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
543 applyIfPlatform = {"64-bit", "true"},
544 applyIf = {"AlignVector", "true"})
545 static Object[] test5(byte[] a, byte[] b, byte mask, int inv) {
546 for (int i = 0; i < RANGE; i+=8) {
547 // Cannot align with AlignVector because of invariant
548 b[i+inv+0] = (byte)(a[i+inv+0] & mask);
549
550 b[i+inv+3] = (byte)(a[i+inv+3] & mask);
551 b[i+inv+4] = (byte)(a[i+inv+4] & mask);
552 b[i+inv+5] = (byte)(a[i+inv+5] & mask);
553 b[i+inv+6] = (byte)(a[i+inv+6] & mask);
554 }
555 return new Object[]{ a, b };
556 }
557
558 @Test
559 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
560 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
561 IRNode.STORE_VECTOR, "> 0"},
562 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
563 applyIfPlatform = {"64-bit", "true"},
564 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
565 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
566 IRNode.AND_VB, "= 0",
567 IRNode.STORE_VECTOR, "= 0"},
568 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
569 applyIfPlatform = {"64-bit", "true"},
570 applyIf = {"AlignVector", "true"})
571 static Object[] test6(byte[] a, byte[] b, byte mask) {
572 for (int i = 0; i < RANGE/8; i+=2) {
573 // Cannot align with AlignVector because offset is odd
574 b[i*4+0] = (byte)(a[i*4+0] & mask);
575
576 b[i*4+3] = (byte)(a[i*4+3] & mask);
577 b[i*4+4] = (byte)(a[i*4+4] & mask);
578 b[i*4+5] = (byte)(a[i*4+5] & mask);
579 b[i*4+6] = (byte)(a[i*4+6] & mask);
580 }
581 return new Object[]{ a, b };
582 }
583
584 @Test
585 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
586 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
587 IRNode.STORE_VECTOR, "> 0"},
588 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"},
589 applyIfPlatform = {"64-bit", "true"},
590 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
591 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
592 IRNode.AND_VS, "= 0",
593 IRNode.STORE_VECTOR, "= 0"},
594 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
595 applyIfPlatform = {"64-bit", "true"},
596 applyIf = {"AlignVector", "true"})
597 static Object[] test7(short[] a, short[] b, short mask) {
598 for (int i = 0; i < RANGE/8; i+=2) {
599 // Cannot align with AlignVector because offset is odd
600 b[i*4+0] = (short)(a[i*4+0] & mask);
601
602 b[i*4+3] = (short)(a[i*4+3] & mask);
603 b[i*4+4] = (short)(a[i*4+4] & mask);
604 b[i*4+5] = (short)(a[i*4+5] & mask);
605 b[i*4+6] = (short)(a[i*4+6] & mask);
606 }
607 return new Object[]{ a, b };
608 }
609
610 @Test
611 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
612 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
613 IRNode.STORE_VECTOR, "> 0"},
614 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
615 applyIfPlatform = {"64-bit", "true"},
616 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
617 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
618 IRNode.AND_VB, "= 0",
619 IRNode.STORE_VECTOR, "= 0"},
620 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
621 applyIfPlatform = {"64-bit", "true"},
622 applyIf = {"AlignVector", "true"})
623 static Object[] test8(byte[] a, byte[] b, byte mask, int init) {
624 for (int i = init; i < RANGE; i+=8) {
625 // Cannot align with AlignVector because of invariant (variable init becomes invar)
626 b[i+0] = (byte)(a[i+0] & mask);
627
628 b[i+3] = (byte)(a[i+3] & mask);
629 b[i+4] = (byte)(a[i+4] & mask);
630 b[i+5] = (byte)(a[i+5] & mask);
631 b[i+6] = (byte)(a[i+6] & mask);
632 }
633 return new Object[]{ a, b };
634 }
635
636 @Test
637 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
638 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
639 IRNode.STORE_VECTOR, "> 0"},
640 applyIf = {"MaxVectorSize", ">=8"},
641 applyIfPlatform = {"64-bit", "true"},
642 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
643 static Object[] test9(byte[] a, byte[] b, byte mask) {
644 // known non-zero init value does not affect offset, but has implicit effect on iv
645 for (int i = 13; i < RANGE-8; i+=8) {
646 b[i+0] = (byte)(a[i+0] & mask);
647
648 b[i+3] = (byte)(a[i+3] & mask);
649 b[i+4] = (byte)(a[i+4] & mask);
650 b[i+5] = (byte)(a[i+5] & mask);
651 b[i+6] = (byte)(a[i+6] & mask);
652 }
653 return new Object[]{ a, b };
654 }
655
656 @Test
657 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
658 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
659 IRNode.STORE_VECTOR, "> 0"},
660 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
661 applyIfPlatform = {"64-bit", "true"},
662 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
663 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
664 IRNode.AND_VB, "= 0",
665 IRNode.STORE_VECTOR, "= 0"},
666 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
667 applyIfPlatform = {"64-bit", "true"},
668 applyIf = {"AlignVector", "true"})
669 static Object[] test10a(byte[] a, byte[] b, byte mask) {
670 // This is not alignable with pre-loop, because of odd init.
671 for (int i = 3; i < RANGE-8; i+=8) {
672 b[i+0] = (byte)(a[i+0] & mask);
673 b[i+1] = (byte)(a[i+1] & mask);
674 b[i+2] = (byte)(a[i+2] & mask);
675 b[i+3] = (byte)(a[i+3] & mask);
676 }
677 return new Object[]{ a, b };
678 }
679
680 @Test
681 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
682 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0",
683 IRNode.STORE_VECTOR, "> 0"},
684 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
685 applyIfPlatform = {"64-bit", "true"},
686 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
687 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
688 IRNode.AND_VB, "= 0",
689 IRNode.STORE_VECTOR, "= 0"},
690 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
691 applyIfPlatform = {"64-bit", "true"},
692 applyIf = {"AlignVector", "true"})
693 static Object[] test10b(byte[] a, byte[] b, byte mask) {
694 // This is not alignable with pre-loop, because of odd init.
695 // Seems not correctly handled.
696 for (int i = 13; i < RANGE-8; i+=8) {
697 b[i+0] = (byte)(a[i+0] & mask);
698 b[i+1] = (byte)(a[i+1] & mask);
699 b[i+2] = (byte)(a[i+2] & mask);
700 b[i+3] = (byte)(a[i+3] & mask);
701 }
702 return new Object[]{ a, b };
703 }
704
705 @Test
706 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
707 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
708 IRNode.STORE_VECTOR, "> 0"},
709 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
710 applyIfPlatform = {"64-bit", "true"},
711 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
712 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
713 IRNode.AND_VS, "= 0",
714 IRNode.STORE_VECTOR, "= 0"},
715 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
716 applyIfPlatform = {"64-bit", "true"},
717 applyIf = {"AlignVector", "true"})
718 static Object[] test10c(short[] a, short[] b, short mask) {
719 // This is not alignable with pre-loop, because of odd init.
720 // Seems not correctly handled with MaxVectorSize >= 32.
721 for (int i = 13; i < RANGE-8; i+=8) {
722 b[i+0] = (short)(a[i+0] & mask);
723 b[i+1] = (short)(a[i+1] & mask);
724 b[i+2] = (short)(a[i+2] & mask);
725 b[i+3] = (short)(a[i+3] & mask);
726 }
727 return new Object[]{ a, b };
728 }
729
730 @Test
731 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
732 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0",
733 IRNode.STORE_VECTOR, "> 0"},
734 applyIf = {"MaxVectorSize", ">=16"},
735 applyIfPlatform = {"64-bit", "true"},
736 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
737 static Object[] test10d(short[] a, short[] b, short mask) {
738 for (int i = 13; i < RANGE-16; i+=8) {
739 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
740 b[i+0+3] = (short)(a[i+0+3] & mask);
741 b[i+1+3] = (short)(a[i+1+3] & mask);
742 b[i+2+3] = (short)(a[i+2+3] & mask);
743 b[i+3+3] = (short)(a[i+3+3] & mask);
744 }
745 return new Object[]{ a, b };
746 }
747
748 @Test
749 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
750 IRNode.AND_VB, "> 0",
751 IRNode.STORE_VECTOR, "> 0"},
752 applyIfPlatform = {"64-bit", "true"},
753 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
754 static Object[] test11aB(byte[] a, byte[] b, byte mask) {
755 for (int i = 0; i < RANGE; i++) {
756 // always alignable
757 b[i+0] = (byte)(a[i+0] & mask);
758 }
759 return new Object[]{ a, b };
760 }
761
762 @Test
763 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
764 IRNode.AND_VS, "> 0",
765 IRNode.STORE_VECTOR, "> 0"},
766 applyIfPlatform = {"64-bit", "true"},
767 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
768 static Object[] test11aS(short[] a, short[] b, short mask) {
769 for (int i = 0; i < RANGE; i++) {
770 // always alignable
771 b[i+0] = (short)(a[i+0] & mask);
772 }
773 return new Object[]{ a, b };
774 }
775
776 @Test
777 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
778 IRNode.AND_VI, "> 0",
779 IRNode.STORE_VECTOR, "> 0"},
780 applyIfPlatform = {"64-bit", "true"},
781 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
782 static Object[] test11aI(int[] a, int[] b, int mask) {
783 for (int i = 0; i < RANGE; i++) {
784 // always alignable
785 b[i+0] = (int)(a[i+0] & mask);
786 }
787 return new Object[]{ a, b };
788 }
789
790 @Test
791 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
792 IRNode.AND_VL, "> 0",
793 IRNode.STORE_VECTOR, "> 0"},
794 applyIfPlatform = {"64-bit", "true"},
795 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
796 static Object[] test11aL(long[] a, long[] b, long mask) {
797 for (int i = 0; i < RANGE; i++) {
798 // always alignable
799 b[i+0] = (long)(a[i+0] & mask);
800 }
801 return new Object[]{ a, b };
802 }
803
804 @Test
805 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
806 IRNode.AND_VB, "> 0",
807 IRNode.STORE_VECTOR, "> 0"},
808 applyIfPlatform = {"64-bit", "true"},
809 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
810 static Object[] test11bB(byte[] a, byte[] b, byte mask) {
811 for (int i = 1; i < RANGE; i++) {
812 // always alignable
813 b[i+0] = (byte)(a[i+0] & mask);
814 }
815 return new Object[]{ a, b };
816 }
817
818 @Test
819 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
820 IRNode.AND_VS, "> 0",
821 IRNode.STORE_VECTOR, "> 0"},
822 applyIfPlatform = {"64-bit", "true"},
823 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
824 static Object[] test11bS(short[] a, short[] b, short mask) {
825 for (int i = 1; i < RANGE; i++) {
826 // always alignable
827 b[i+0] = (short)(a[i+0] & mask);
828 }
829 return new Object[]{ a, b };
830 }
831
832 @Test
833 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
834 IRNode.AND_VI, "> 0",
835 IRNode.STORE_VECTOR, "> 0"},
836 applyIfPlatform = {"64-bit", "true"},
837 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
838 static Object[] test11bI(int[] a, int[] b, int mask) {
839 for (int i = 1; i < RANGE; i++) {
840 // always alignable
841 b[i+0] = (int)(a[i+0] & mask);
842 }
843 return new Object[]{ a, b };
844 }
845
846 @Test
847 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
848 IRNode.AND_VL, "> 0",
849 IRNode.STORE_VECTOR, "> 0"},
850 applyIfPlatform = {"64-bit", "true"},
851 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
852 static Object[] test11bL(long[] a, long[] b, long mask) {
853 for (int i = 1; i < RANGE; i++) {
854 // always alignable
855 b[i+0] = (long)(a[i+0] & mask);
856 }
857 return new Object[]{ a, b };
858 }
859
860 @Test
861 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
862 IRNode.AND_VB, "> 0",
863 IRNode.STORE_VECTOR, "> 0"},
864 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
865 applyIfPlatform = {"64-bit", "true"},
866 applyIf = {"AlignVector", "false"})
867 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
868 IRNode.AND_VB, "= 0",
869 IRNode.STORE_VECTOR, "= 0"},
870 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
871 applyIfPlatform = {"64-bit", "true"},
872 applyIf = {"AlignVector", "true"})
873 static Object[] test11cB(byte[] a, byte[] b, byte mask) {
874 for (int i = 1; i < RANGE-1; i++) {
875 // 1 byte offset -> not alignable with AlignVector
876 b[i+0] = (byte)(a[i+1] & mask);
877 }
878 return new Object[]{ a, b };
879 }
880
881 @Test
882 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
883 IRNode.AND_VS, "> 0",
884 IRNode.STORE_VECTOR, "> 0"},
885 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
886 applyIfPlatform = {"64-bit", "true"},
887 applyIf = {"AlignVector", "false"})
888 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
889 IRNode.AND_VS, "= 0",
890 IRNode.STORE_VECTOR, "= 0"},
891 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
892 applyIfPlatform = {"64-bit", "true"},
893 applyIf = {"AlignVector", "true"})
894 static Object[] test11cS(short[] a, short[] b, short mask) {
895 for (int i = 1; i < RANGE-1; i++) {
896 // 2 byte offset -> not alignable with AlignVector
897 b[i+0] = (short)(a[i+1] & mask);
898 }
899 return new Object[]{ a, b };
900 }
901
902 @Test
903 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
904 IRNode.AND_VI, "> 0",
905 IRNode.STORE_VECTOR, "> 0"},
906 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
907 applyIfPlatform = {"64-bit", "true"},
908 applyIf = {"AlignVector", "false"})
909 @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
910 IRNode.AND_VI, "= 0",
911 IRNode.STORE_VECTOR, "= 0"},
912 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
913 applyIfPlatform = {"64-bit", "true"},
914 applyIf = {"AlignVector", "true"})
915 static Object[] test11cI(int[] a, int[] b, int mask) {
916 for (int i = 1; i < RANGE-1; i++) {
917 // 4 byte offset -> not alignable with AlignVector
918 b[i+0] = (int)(a[i+1] & mask);
919 }
920 return new Object[]{ a, b };
921 }
922
923 @Test
924 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
925 IRNode.AND_VL, "> 0",
926 IRNode.STORE_VECTOR, "> 0"},
927 applyIfPlatform = {"64-bit", "true"},
928 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
929 static Object[] test11cL(long[] a, long[] b, long mask) {
930 for (int i = 1; i < RANGE-1; i++) {
931 // always alignable (8 byte offset)
932 b[i+0] = (long)(a[i+1] & mask);
933 }
934 return new Object[]{ a, b };
935 }
936
937 @Test
938 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
939 IRNode.AND_VB, "> 0",
940 IRNode.STORE_VECTOR, "> 0"},
941 applyIfPlatform = {"64-bit", "true"},
942 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
943 static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) {
944 for (int i = 0; i < RANGE; i++) {
945 b[i+0+invar] = (byte)(a[i+0+invar] & mask);
946 }
947 return new Object[]{ a, b };
948 }
949
950 @Test
951 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
952 IRNode.AND_VS, "> 0",
953 IRNode.STORE_VECTOR, "> 0"},
954 applyIfPlatform = {"64-bit", "true"},
955 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
956 static Object[] test11dS(short[] a, short[] b, short mask, int invar) {
957 for (int i = 0; i < RANGE; i++) {
958 b[i+0+invar] = (short)(a[i+0+invar] & mask);
959 }
960 return new Object[]{ a, b };
961 }
962
963 @Test
964 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
965 IRNode.AND_VI, "> 0",
966 IRNode.STORE_VECTOR, "> 0"},
967 applyIfPlatform = {"64-bit", "true"},
968 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
969 static Object[] test11dI(int[] a, int[] b, int mask, int invar) {
970 for (int i = 0; i < RANGE; i++) {
971 b[i+0+invar] = (int)(a[i+0+invar] & mask);
972 }
973 return new Object[]{ a, b };
974 }
975
976 @Test
977 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
978 IRNode.AND_VL, "> 0",
979 IRNode.STORE_VECTOR, "> 0"},
980 applyIfPlatform = {"64-bit", "true"},
981 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
982 static Object[] test11dL(long[] a, long[] b, long mask, int invar) {
983 for (int i = 0; i < RANGE; i++) {
984 b[i+0+invar] = (long)(a[i+0+invar] & mask);
985 }
986 return new Object[]{ a, b };
987 }
988
989 @Test
990 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
991 IRNode.AND_VB, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
992 IRNode.STORE_VECTOR, "> 0"},
993 applyIfPlatform = {"64-bit", "true"},
994 applyIf = {"AlignVector", "false"},
995 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
996 static Object[] test12(byte[] a, byte[] b, byte mask) {
997 for (int i = 0; i < RANGE/16; i++) {
998 // Non-power-of-2 stride. Vectorization of 4 bytes, then 2-bytes gap.
999 b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask);
1000 b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask);
1001 b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask);
1002 b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask);
1003 }
1004 return new Object[]{ a, b };
1005 }
1006
1007 @Test
1008 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1009 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1010 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1011 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1012 IRNode.STORE_VECTOR, "> 0"},
1013 applyIfPlatform = {"64-bit", "true"},
1014 applyIfCPUFeature = {"avx2", "true"})
1015 // require avx to ensure vectors are larger than what unrolling produces
1016 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1017 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1018 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1019 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1020 IRNode.STORE_VECTOR, "> 0"},
1021 applyIfPlatform = {"riscv64", "true"},
1022 applyIfCPUFeature = {"rvv", "true"},
1023 applyIf = {"MaxVectorSize", ">=32"})
1024 static Object[] test13aIL(int[] a, long[] b) {
1025 for (int i = 0; i < RANGE; i++) {
1026 a[i]++;
1027 b[i]++;
1028 }
1029 return new Object[]{ a, b };
1030 }
1031
1032 @Test
1033 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1034 IRNode.LOAD_VECTOR_I, "> 0",
1035 IRNode.ADD_VB, "> 0",
1036 IRNode.ADD_VI, "> 0",
1037 IRNode.STORE_VECTOR, "> 0"},
1038 applyIfPlatform = {"64-bit", "true"},
1039 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1040 static Object[] test13aIB(int[] a, byte[] b) {
1041 for (int i = 0; i < RANGE; i++) {
1042 a[i]++;
1043 b[i]++;
1044 }
1045 return new Object[]{ a, b };
1046 }
1047
1048 @Test
1049 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1050 IRNode.LOAD_VECTOR_S, "> 0",
1051 IRNode.ADD_VI, "> 0",
1052 IRNode.ADD_VS, "> 0",
1053 IRNode.STORE_VECTOR, "> 0"},
1054 applyIfPlatform = {"64-bit", "true"},
1055 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1056 static Object[] test13aIS(int[] a, short[] b) {
1057 for (int i = 0; i < RANGE; i++) {
1058 a[i]++;
1059 b[i]++;
1060 }
1061 return new Object[]{ a, b };
1062 }
1063
1064 @Test
1065 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1066 IRNode.LOAD_VECTOR_S, "> 0",
1067 IRNode.LOAD_VECTOR_I, "> 0",
1068 IRNode.LOAD_VECTOR_L, "> 0",
1069 IRNode.ADD_VB, "> 0",
1070 IRNode.ADD_VS, "> 0",
1071 IRNode.ADD_VI, "> 0",
1072 IRNode.ADD_VL, "> 0",
1073 IRNode.STORE_VECTOR, "> 0"},
1074 applyIfPlatform = {"64-bit", "true"},
1075 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1076 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1077 for (int i = 0; i < RANGE; i++) {
1078 a[i]++;
1079 b[i]++;
1080 c[i]++;
1081 d[i]++;
1082 }
1083 return new Object[]{ a, b, c, d };
1084 }
1085
1086 @Test
1087 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1088 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1089 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1090 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1091 IRNode.STORE_VECTOR, "> 0"},
1092 applyIfPlatform = {"64-bit", "true"},
1093 applyIfCPUFeature = {"avx2", "true"})
1094 // require avx to ensure vectors are larger than what unrolling produces
1095 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1096 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1097 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1098 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1099 IRNode.STORE_VECTOR, "> 0"},
1100 applyIfPlatform = {"riscv64", "true"},
1101 applyIfCPUFeature = {"rvv", "true"},
1102 applyIf = {"MaxVectorSize", ">=32"})
1103 static Object[] test13bIL(int[] a, long[] b) {
1104 for (int i = 1; i < RANGE; i++) {
1105 a[i]++;
1106 b[i]++;
1107 }
1108 return new Object[]{ a, b };
1109 }
1110
1111 @Test
1112 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1113 IRNode.LOAD_VECTOR_I, "> 0",
1114 IRNode.ADD_VB, "> 0",
1115 IRNode.ADD_VI, "> 0",
1116 IRNode.STORE_VECTOR, "> 0"},
1117 applyIfPlatform = {"64-bit", "true"},
1118 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1119 static Object[] test13bIB(int[] a, byte[] b) {
1120 for (int i = 1; i < RANGE; i++) {
1121 a[i]++;
1122 b[i]++;
1123 }
1124 return new Object[]{ a, b };
1125 }
1126
1127 @Test
1128 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1129 IRNode.LOAD_VECTOR_S, "> 0",
1130 IRNode.ADD_VI, "> 0",
1131 IRNode.ADD_VS, "> 0",
1132 IRNode.STORE_VECTOR, "> 0"},
1133 applyIfPlatform = {"64-bit", "true"},
1134 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1135 static Object[] test13bIS(int[] a, short[] b) {
1136 for (int i = 1; i < RANGE; i++) {
1137 a[i]++;
1138 b[i]++;
1139 }
1140 return new Object[]{ a, b };
1141 }
1142
1143 @Test
1144 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1145 IRNode.LOAD_VECTOR_S, "> 0",
1146 IRNode.LOAD_VECTOR_I, "> 0",
1147 IRNode.LOAD_VECTOR_L, "> 0",
1148 IRNode.ADD_VB, "> 0",
1149 IRNode.ADD_VS, "> 0",
1150 IRNode.ADD_VI, "> 0",
1151 IRNode.ADD_VL, "> 0",
1152 IRNode.STORE_VECTOR, "> 0"},
1153 applyIfPlatform = {"64-bit", "true"},
1154 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1155 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1156 for (int i = 1; i < RANGE; i++) {
1157 a[i]++;
1158 b[i]++;
1159 c[i]++;
1160 d[i]++;
1161 }
1162 return new Object[]{ a, b, c, d };
1163 }
1164
1165 @Test
1166 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1167 IRNode.ADD_VB, "= 0",
1168 IRNode.STORE_VECTOR, "= 0"},
1169 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1170 applyIfPlatform = {"64-bit", "true"},
1171 applyIf = {"AlignVector", "false"})
1172 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1173 IRNode.ADD_VB, "= 0",
1174 IRNode.STORE_VECTOR, "= 0"},
1175 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1176 applyIfPlatform = {"64-bit", "true"},
1177 applyIf = {"AlignVector", "true"})
1178 static Object[] test14aB(byte[] a) {
1179 // non-power-of-2 stride
1180 for (int i = 0; i < RANGE-20; i+=9) {
1181 // Since the stride is shorter than the vector length, there will be always
1182 // partial overlap of loads with previous stores, this leads to failure in
1183 // store-to-load-forwarding -> vectorization not profitable.
1184 a[i+0]++;
1185 a[i+1]++;
1186 a[i+2]++;
1187 a[i+3]++;
1188 a[i+4]++;
1189 a[i+5]++;
1190 a[i+6]++;
1191 a[i+7]++;
1192 a[i+8]++;
1193 a[i+9]++;
1194 a[i+10]++;
1195 a[i+11]++;
1196 a[i+12]++;
1197 a[i+13]++;
1198 a[i+14]++;
1199 a[i+15]++;
1200 }
1201 return new Object[]{ a };
1202 }
1203
1204 @Test
1205 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1206 IRNode.ADD_VB, "= 0",
1207 IRNode.STORE_VECTOR, "= 0"},
1208 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1209 applyIfPlatform = {"64-bit", "true"},
1210 applyIf = {"AlignVector", "false"})
1211 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1212 IRNode.ADD_VB, "= 0",
1213 IRNode.STORE_VECTOR, "= 0"},
1214 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1215 applyIfPlatform = {"64-bit", "true"},
1216 applyIf = {"AlignVector", "true"})
1217 static Object[] test14bB(byte[] a) {
1218 // non-power-of-2 stride
1219 for (int i = 0; i < RANGE-20; i+=3) {
1220 // Since the stride is shorter than the vector length, there will be always
1221 // partial overlap of loads with previous stores, this leads to failure in
1222 // store-to-load-forwarding -> vectorization not profitable.
1223 a[i+0]++;
1224 a[i+1]++;
1225 a[i+2]++;
1226 a[i+3]++;
1227 a[i+4]++;
1228 a[i+5]++;
1229 a[i+6]++;
1230 a[i+7]++;
1231 a[i+8]++;
1232 a[i+9]++;
1233 a[i+10]++;
1234 a[i+11]++;
1235 a[i+12]++;
1236 a[i+13]++;
1237 a[i+14]++;
1238 a[i+15]++;
1239 }
1240 return new Object[]{ a };
1241 }
1242
1243 @Test
1244 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1245 IRNode.ADD_VB, "= 0",
1246 IRNode.STORE_VECTOR, "= 0"},
1247 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1248 applyIfPlatform = {"64-bit", "true"},
1249 applyIf = {"AlignVector", "false"})
1250 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1251 IRNode.ADD_VB, "= 0",
1252 IRNode.STORE_VECTOR, "= 0"},
1253 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1254 applyIfPlatform = {"64-bit", "true"},
1255 applyIf = {"AlignVector", "true"})
1256 static Object[] test14cB(byte[] a) {
1257 // non-power-of-2 stride
1258 for (int i = 0; i < RANGE-20; i+=5) {
1259 // Since the stride is shorter than the vector length, there will be always
1260 // partial overlap of loads with previous stores, this leads to failure in
1261 // store-to-load-forwarding -> vectorization not profitable.
1262 a[i+0]++;
1263 a[i+1]++;
1264 a[i+2]++;
1265 a[i+3]++;
1266 a[i+4]++;
1267 a[i+5]++;
1268 a[i+6]++;
1269 a[i+7]++;
1270 a[i+8]++;
1271 a[i+9]++;
1272 a[i+10]++;
1273 a[i+11]++;
1274 a[i+12]++;
1275 a[i+13]++;
1276 a[i+14]++;
1277 a[i+15]++;
1278 }
1279 return new Object[]{ a };
1280 }
1281
1282 @Test
1283 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1284 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1285 IRNode.STORE_VECTOR, "> 0"},
1286 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1287 applyIfPlatform = {"64-bit", "true"},
1288 applyIf = {"AlignVector", "false"})
1289 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1290 IRNode.ADD_VB, "= 0",
1291 IRNode.STORE_VECTOR, "= 0"},
1292 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1293 applyIfPlatform = {"64-bit", "true"},
1294 applyIf = {"AlignVector", "true"})
1295 static Object[] test14dB(byte[] a) {
1296 // non-power-of-2 stride
1297 for (int i = 0; i < RANGE-20; i+=9) {
1298 a[i+0]++;
1299 a[i+1]++;
1300 a[i+2]++;
1301 a[i+3]++;
1302 a[i+4]++;
1303 a[i+5]++;
1304 a[i+6]++;
1305 a[i+7]++;
1306 }
1307 return new Object[]{ a };
1308 }
1309
1310 @Test
1311 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1312 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1313 IRNode.STORE_VECTOR, "> 0"},
1314 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1315 applyIfPlatform = {"64-bit", "true"},
1316 applyIf = {"AlignVector", "false"})
1317 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1318 IRNode.ADD_VB, "= 0",
1319 IRNode.STORE_VECTOR, "= 0"},
1320 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1321 applyIfPlatform = {"64-bit", "true"},
1322 applyIf = {"AlignVector", "true"})
1323 static Object[] test14eB(byte[] a) {
1324 // non-power-of-2 stride
1325 for (int i = 0; i < RANGE-32; i+=11) {
1326 a[i+0]++;
1327 a[i+1]++;
1328 a[i+2]++;
1329 a[i+3]++;
1330 a[i+4]++;
1331 a[i+5]++;
1332 a[i+6]++;
1333 a[i+7]++;
1334 }
1335 return new Object[]{ a };
1336 }
1337
1338 @Test
1339 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1340 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1341 IRNode.STORE_VECTOR, "> 0"},
1342 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1343 applyIfPlatform = {"64-bit", "true"},
1344 applyIf = {"AlignVector", "false"})
1345 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1346 IRNode.ADD_VB, "= 0",
1347 IRNode.STORE_VECTOR, "= 0"},
1348 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1349 applyIfPlatform = {"64-bit", "true"},
1350 applyIf = {"AlignVector", "true"})
1351 static Object[] test14fB(byte[] a) {
1352 // non-power-of-2 stride
1353 for (int i = 0; i < RANGE-40; i+=12) {
1354 a[i+0]++;
1355 a[i+1]++;
1356 a[i+2]++;
1357 a[i+3]++;
1358 a[i+4]++;
1359 a[i+5]++;
1360 a[i+6]++;
1361 a[i+7]++;
1362 }
1363 return new Object[]{ a };
1364 }
1365
1366 @Test
1367 // IR rules difficult because of modulo wrapping with offset after peeling.
1368 static Object[] test15aB(byte[] a) {
1369 // non-power-of-2 scale
1370 for (int i = 0; i < RANGE/64-20; i++) {
1371 a[53*i+0]++;
1372 a[53*i+1]++;
1373 a[53*i+2]++;
1374 a[53*i+3]++;
1375 a[53*i+4]++;
1376 a[53*i+5]++;
1377 a[53*i+6]++;
1378 a[53*i+7]++;
1379 a[53*i+8]++;
1380 a[53*i+9]++;
1381 a[53*i+10]++;
1382 a[53*i+11]++;
1383 a[53*i+12]++;
1384 a[53*i+13]++;
1385 a[53*i+14]++;
1386 a[53*i+15]++;
1387 }
1388 return new Object[]{ a };
1389 }
1390
1391 @Test
1392 // IR rules difficult because of modulo wrapping with offset after peeling.
1393 static Object[] test15bB(byte[] a) {
1394 // non-power-of-2 scale
1395 for (int i = 0; i < RANGE/64-20; i++) {
1396 a[25*i+0]++;
1397 a[25*i+1]++;
1398 a[25*i+2]++;
1399 a[25*i+3]++;
1400 a[25*i+4]++;
1401 a[25*i+5]++;
1402 a[25*i+6]++;
1403 a[25*i+7]++;
1404 a[25*i+8]++;
1405 a[25*i+9]++;
1406 a[25*i+10]++;
1407 a[25*i+11]++;
1408 a[25*i+12]++;
1409 a[25*i+13]++;
1410 a[25*i+14]++;
1411 a[25*i+15]++;
1412 }
1413 return new Object[]{ a };
1414 }
1415
1416 @Test
1417 // IR rules difficult because of modulo wrapping with offset after peeling.
1418 static Object[] test15cB(byte[] a) {
1419 // non-power-of-2 scale
1420 for (int i = 0; i < RANGE/64-20; i++) {
1421 a[19*i+0]++;
1422 a[19*i+1]++;
1423 a[19*i+2]++;
1424 a[19*i+3]++;
1425 a[19*i+4]++;
1426 a[19*i+5]++;
1427 a[19*i+6]++;
1428 a[19*i+7]++;
1429 a[19*i+8]++;
1430 a[19*i+9]++;
1431 a[19*i+10]++;
1432 a[19*i+11]++;
1433 a[19*i+12]++;
1434 a[19*i+13]++;
1435 a[19*i+14]++;
1436 a[19*i+15]++;
1437 }
1438 return new Object[]{ a };
1439 }
1440
1441 @Test
1442 static Object[] test16a(byte[] a, short[] b) {
1443 // infinite loop issues
1444 for (int i = 0; i < RANGE/2-20; i++) {
1445 a[2*i+0]++;
1446 a[2*i+1]++;
1447 a[2*i+2]++;
1448 a[2*i+3]++;
1449 a[2*i+4]++;
1450 a[2*i+5]++;
1451 a[2*i+6]++;
1452 a[2*i+7]++;
1453 a[2*i+8]++;
1454 a[2*i+9]++;
1455 a[2*i+10]++;
1456 a[2*i+11]++;
1457 a[2*i+12]++;
1458 a[2*i+13]++;
1459 a[2*i+14]++;
1460
1461 b[2*i+0]++;
1462 b[2*i+1]++;
1463 b[2*i+2]++;
1464 b[2*i+3]++;
1465 }
1466 return new Object[]{ a, b };
1467 }
1468
1469 @Test
1470 static Object[] test16b(byte[] a) {
1471 // infinite loop issues
1472 for (int i = 0; i < RANGE/2-20; i++) {
1473 a[2*i+0]++;
1474 a[2*i+1]++;
1475 a[2*i+2]++;
1476 a[2*i+3]++;
1477 a[2*i+4]++;
1478 a[2*i+5]++;
1479 a[2*i+6]++;
1480 a[2*i+7]++;
1481 a[2*i+8]++;
1482 a[2*i+9]++;
1483 a[2*i+10]++;
1484 a[2*i+11]++;
1485 a[2*i+12]++;
1486 a[2*i+13]++;
1487 a[2*i+14]++;
1488 }
1489 return new Object[]{ a };
1490 }
1491
1492 @Test
1493 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
1494 IRNode.ADD_VL, "> 0",
1495 IRNode.STORE_VECTOR, "> 0"},
1496 applyIfPlatform = {"64-bit", "true"},
1497 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1498 static Object[] test17a(long[] a) {
1499 // Unsafe: vectorizes with profiling (not xcomp)
1500 for (int i = 0; i < RANGE; i++) {
1501 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1502 long v = UNSAFE.getLongUnaligned(a, adr);
1503 UNSAFE.putLongUnaligned(a, adr, v + 1);
1504 }
1505 return new Object[]{ a };
1506 }
1507
1508 @Test
1509 // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs.
1510 static Object[] test17b(long[] a) {
1511 // Not alignable
1512 for (int i = 0; i < RANGE-1; i++) {
1513 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1514 long v = UNSAFE.getLongUnaligned(a, adr);
1515 UNSAFE.putLongUnaligned(a, adr, v + 1);
1516 }
1517 return new Object[]{ a };
1518 }
1519
1520 @Test
1521 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1522 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
1523 IRNode.STORE_VECTOR, "> 0"},
1524 applyIf = {"MaxVectorSize", ">=32"},
1525 applyIfPlatform = {"64-bit", "true"},
1526 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1527 static Object[] test17c(long[] a) {
1528 // Unsafe: aligned vectorizes
1529 for (int i = 0; i < RANGE-1; i+=4) {
1530 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1531 long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1532 long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1533 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1534 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1535 }
1536 return new Object[]{ a };
1537 }
1538
1539 @Test
1540 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1541 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0",
1542 IRNode.STORE_VECTOR, "> 0"},
1543 applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true", "rvv", "true"},
1544 applyIfPlatform = {"64-bit", "true"},
1545 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"})
1546 // Ensure vector width is large enough to fit 64 byte for longs:
1547 // The offsets are: 25, 33, 57, 65
1548 // In modulo 32: 25, 1, 25, 1 -> does not vectorize
1549 // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes
1550 // This problem is because we compute modulo vector width in memory_alignment.
1551 @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0",
1552 IRNode.ADD_VL, "= 0",
1553 IRNode.STORE_VECTOR, "= 0"},
1554 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1555 applyIfPlatform = {"64-bit", "true"},
1556 applyIf = {"AlignVector", "true"})
1557 static Object[] test17d(long[] a) {
1558 // Not alignable
1559 for (int i = 0; i < RANGE-1; i+=4) {
1560 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1561 long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1562 long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1563 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1564 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1565 }
1566 return new Object[]{ a };
1567 }
1568
1569 @Test
1570 static Object[] test18a(byte[] a, int[] b) {
1571 // scale = 0 --> no iv
1572 for (int i = 0; i < RANGE; i++) {
1573 a[0] = 1;
1574 b[i] = 2;
1575 a[1] = 1;
1576 }
1577 return new Object[]{ a, b };
1578 }
1579
1580 @Test
1581 static Object[] test18b(byte[] a, int[] b) {
1582 // scale = 0 --> no iv
1583 for (int i = 0; i < RANGE; i++) {
1584 a[1] = 1;
1585 b[i] = 2;
1586 a[2] = 1;
1587 }
1588 return new Object[]{ a, b };
1589 }
1590
1591 @Test
1592 static Object[] test19(int[] a, int[] b) {
1593 for (int i = 5000; i > 0; i--) {
1594 a[RANGE_FINAL - i] = b[RANGE_FINAL - i];
1595 }
1596 return new Object[]{ a, b };
1597 }
1598
1599 @Test
1600 static Object[] test20(byte[] a) {
1601 // Example where it is easy to pass alignment check,
1602 // but used to fail the alignment calculation
1603 for (int i = 1; i < RANGE/2-50; i++) {
1604 a[2*i+0+30]++;
1605 a[2*i+1+30]++;
1606 a[2*i+2+30]++;
1607 a[2*i+3+30]++;
1608 }
1609 return new Object[]{ a };
1610 }
1611 }