1 /*
2 * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package compiler.loopopts.superword;
25
26 import compiler.lib.ir_framework.*;
27 import jdk.test.lib.Utils;
28 import jdk.test.whitebox.WhiteBox;
29 import java.lang.reflect.Array;
30 import java.util.Map;
31 import java.util.HashMap;
32 import java.util.Random;
33 import java.nio.ByteOrder;
34
35 /*
36 * @test
37 * @bug 8326139 8348659
38 * @key randomness
39 * @summary Test splitting packs in SuperWord
40 * @library /test/lib /
41 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_ySAC
42 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_ySAC
43 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_ySAC
44 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_ySAC
45 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_nSAC
46 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_nSAC
47 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_nSAC
48 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_nSAC
49 */
50
51 public class TestSplitPacks {
52 static int RANGE = 1024*8;
53 static int RANGE_FINAL = 1024*8;
54 private static final Random RANDOM = Utils.getRandomInstance();
55
56 // Inputs
57 byte[] aB;
58 byte[] bB;
59 byte mB = (byte)31;
60 short[] aS;
61 short[] bS;
62 short mS = (short)0xF0F0;
63 int[] aI;
64 int[] bI;
65 int mI = 0xF0F0F0F0;
66 long[] aL;
67 long[] bL;
68 long mL = 0xF0F0F0F0F0F0F0F0L;
69
70 // List of tests
71 Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
72
73 // List of gold, the results from the first run before compilation
74 Map<String,Object[]> golds = new HashMap<String,Object[]>();
75
76 interface TestFunction {
77 Object[] run();
78 }
79
80 public static void main(String[] args) {
81 TestFramework framework = new TestFramework(TestSplitPacks.class);
82 framework.addFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000");
83 switch (args[0]) {
84 case "nCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
85 case "nCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
86 case "yCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
87 case "yCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
88 case "nCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
89 case "nCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
90 case "yCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
91 case "yCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
92 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
93 };
94 framework.start();
95 }
96
97 public TestSplitPacks() {
98 // Generate input once
99 aB = generateB();
100 bB = generateB();
101 aS = generateS();
102 bS = generateS();
103 aI = generateI();
104 bI = generateI();
105 aL = generateL();
106 bL = generateL();
107
108 // Add all tests to list
109 tests.put("test0", () -> { return test0(aI.clone(), bI.clone(), mI); });
110 tests.put("test1a", () -> { return test1a(aI.clone(), bI.clone(), mI); });
111 tests.put("test1b", () -> { return test1b(aI.clone(), bI.clone(), mI); });
112 tests.put("test1c", () -> { return test1c(aI.clone(), bI.clone(), mI); });
113 tests.put("test1d", () -> { return test1d(aI.clone(), bI.clone(), mI); });
114 tests.put("test2a", () -> { return test2a(aI.clone(), bI.clone(), mI); });
115 tests.put("test2b", () -> { return test2b(aI.clone(), bI.clone(), mI); });
116 tests.put("test2c", () -> { return test2c(aI.clone(), bI.clone(), mI); });
117 tests.put("test2d", () -> { return test2d(aI.clone(), bI.clone(), mI); });
118 tests.put("test3a", () -> { return test3a(aS.clone(), bS.clone(), mS); });
119 tests.put("test4a", () -> { return test4a(aS.clone(), bS.clone()); });
120 tests.put("test4b", () -> { return test4b(aS.clone(), bS.clone()); });
121 tests.put("test4c", () -> { return test4c(aS.clone(), bS.clone()); });
122 tests.put("test4d", () -> { return test4d(aS.clone(), bS.clone()); });
123 tests.put("test4e", () -> { return test4e(aS.clone(), bS.clone()); });
124 tests.put("test4f", () -> { return test4f(aS.clone(), bS.clone()); });
125 tests.put("test4g", () -> { return test4g(aS.clone(), bS.clone()); });
126 tests.put("test4a_alias",() -> { short[] x = aS.clone(); return test4a_alias(x, x); });
127 tests.put("test4b_alias",() -> { short[] x = aS.clone(); return test4b_alias(x, x); });
128 tests.put("test4c_alias",() -> { short[] x = aS.clone(); return test4c_alias(x, x); });
129 tests.put("test4d_alias",() -> { short[] x = aS.clone(); return test4d_alias(x, x); });
130 tests.put("test4e_alias",() -> { short[] x = aS.clone(); return test4e_alias(x, x); });
131 tests.put("test4f_alias",() -> { short[] x = aS.clone(); return test4f_alias(x, x); });
132 tests.put("test4g_alias",() -> { short[] x = aS.clone(); return test4g_alias(x, x); });
133 tests.put("test5a", () -> { return test5a(aS.clone(), bS.clone(), mS); });
134 tests.put("test6a", () -> { return test6a(aI.clone(), bI.clone()); });
135 tests.put("test7a", () -> { return test7a(aI.clone(), bI.clone()); });
136
137 // Compute gold value for all test methods before compilation
138 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
139 String name = entry.getKey();
140 TestFunction test = entry.getValue();
141 Object[] gold = test.run();
142 golds.put(name, gold);
143 }
144 }
145
146 @Warmup(100)
147 @Run(test = {"test0",
148 "test1a",
149 "test1b",
150 "test1c",
151 "test1d",
152 "test2a",
153 "test2b",
154 "test2c",
155 "test2d",
156 "test3a",
157 "test4a",
158 "test4b",
159 "test4c",
160 "test4d",
161 "test4e",
162 "test4f",
163 "test4g",
164 "test4a_alias",
165 "test4b_alias",
166 "test4c_alias",
167 "test4d_alias",
168 "test4e_alias",
169 "test4f_alias",
170 "test4g_alias",
171 "test5a",
172 "test6a",
173 "test7a"})
174 public void runTests() {
175 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
176 String name = entry.getKey();
177 TestFunction test = entry.getValue();
178 // Recall gold value from before compilation
179 Object[] gold = golds.get(name);
180 // Compute new result
181 Object[] result = test.run();
182 // Compare gold and new result
183 verify(name, gold, result);
184 }
185 }
186
187 static byte[] generateB() {
188 byte[] a = new byte[RANGE];
189 for (int i = 0; i < a.length; i++) {
190 a[i] = (byte)RANDOM.nextInt();
191 }
192 return a;
193 }
194
195 static short[] generateS() {
196 short[] a = new short[RANGE];
197 for (int i = 0; i < a.length; i++) {
198 a[i] = (short)RANDOM.nextInt();
199 }
200 return a;
201 }
202
203 static int[] generateI() {
204 int[] a = new int[RANGE];
205 for (int i = 0; i < a.length; i++) {
206 a[i] = RANDOM.nextInt();
207 }
208 return a;
209 }
210
211 static long[] generateL() {
212 long[] a = new long[RANGE];
213 for (int i = 0; i < a.length; i++) {
214 a[i] = RANDOM.nextLong();
215 }
216 return a;
217 }
218
219 static void verify(String name, Object[] gold, Object[] result) {
220 if (gold.length != result.length) {
221 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
222 gold.length + ", result.length = " + result.length);
223 }
224 for (int i = 0; i < gold.length; i++) {
225 Object g = gold[i];
226 Object r = result[i];
227 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
228 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
229 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
230 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
231 }
232 if (g == r) {
233 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
234 " gold[" + i + "] == result[" + i + "]");
235 }
236 if (Array.getLength(g) != Array.getLength(r)) {
237 throw new RuntimeException("verify " + name + ": arrays must have same length:" +
238 " gold[" + i + "].length = " + Array.getLength(g) +
239 " result[" + i + "].length = " + Array.getLength(r));
240 }
241 Class c = g.getClass().getComponentType();
242 if (c == byte.class) {
243 verifyB(name, i, (byte[])g, (byte[])r);
244 } else if (c == short.class) {
245 verifyS(name, i, (short[])g, (short[])r);
246 } else if (c == int.class) {
247 verifyI(name, i, (int[])g, (int[])r);
248 } else if (c == long.class) {
249 verifyL(name, i, (long[])g, (long[])r);
250 } else {
251 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
252 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
253 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
254 }
255 }
256 }
257
258 static void verifyB(String name, int i, byte[] g, byte[] r) {
259 for (int j = 0; j < g.length; j++) {
260 if (g[j] != r[j]) {
261 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
262 " gold[" + i + "][" + j + "] = " + g[j] +
263 " result[" + i + "][" + j + "] = " + r[j]);
264 }
265 }
266 }
267
268 static void verifyS(String name, int i, short[] g, short[] r) {
269 for (int j = 0; j < g.length; j++) {
270 if (g[j] != r[j]) {
271 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
272 " gold[" + i + "][" + j + "] = " + g[j] +
273 " result[" + i + "][" + j + "] = " + r[j]);
274 }
275 }
276 }
277
278 static void verifyI(String name, int i, int[] g, int[] r) {
279 for (int j = 0; j < g.length; j++) {
280 if (g[j] != r[j]) {
281 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
282 " gold[" + i + "][" + j + "] = " + g[j] +
283 " result[" + i + "][" + j + "] = " + r[j]);
284 }
285 }
286 }
287
288 static void verifyL(String name, int i, long[] g, long[] r) {
289 for (int j = 0; j < g.length; j++) {
290 if (g[j] != r[j]) {
291 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
292 " gold[" + i + "][" + j + "] = " + g[j] +
293 " result[" + i + "][" + j + "] = " + r[j]);
294 }
295 }
296 }
297
298 @Test
299 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
300 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
301 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
302 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
303 IRNode.STORE_VECTOR, "> 0"},
304 applyIf = {"MaxVectorSize", ">=32"},
305 applyIfPlatform = {"64-bit", "true"},
306 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
307 // Load and store are already split
308 //
309 // 0 1 - - 4 5 6 7
310 // | | | | | |
311 // 0 1 - - 4 5 6 7
312 static Object[] test0(int[] a, int[] b, int mask) {
313 for (int i = 0; i < RANGE; i+=8) {
314 int b0 = a[i+0] & mask;
315 int b1 = a[i+1] & mask;
316
317 int b4 = a[i+4] & mask;
318 int b5 = a[i+5] & mask;
319 int b6 = a[i+6] & mask;
320 int b7 = a[i+7] & mask;
321
322 b[i+0] = b0;
323 b[i+1] = b1;
324
325 b[i+4] = b4;
326 b[i+5] = b5;
327 b[i+6] = b6;
328 b[i+7] = b7;
329 }
330 return new Object[]{ a, b };
331 }
332
333 @Test
334 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
335 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
336 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
337 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
338 IRNode.STORE_VECTOR, "> 0"},
339 applyIf = {"MaxVectorSize", ">=32"},
340 applyIfPlatform = {"64-bit", "true"},
341 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
342 // Adjacent Load and Store, but split by Add/Mul
343 static Object[] test1a(int[] a, int[] b, int mask) {
344 for (int i = 0; i < RANGE; i+=8) {
345 b[i+0] = a[i+0] + mask; // Add
346 b[i+1] = a[i+1] + mask;
347 b[i+2] = a[i+2] + mask;
348 b[i+3] = a[i+3] + mask;
349
350 b[i+4] = a[i+4] * mask; // Mul
351 b[i+5] = a[i+5] * mask;
352 }
353 return new Object[]{ a, b };
354 }
355
356 @Test
357 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
358 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
359 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
360 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
361 IRNode.STORE_VECTOR, "> 0"},
362 applyIf = {"MaxVectorSize", ">=32"},
363 applyIfPlatform = {"64-bit", "true"},
364 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
365 // Adjacent Load and Store, but split by Add/Mul
366 static Object[] test1b(int[] a, int[] b, int mask) {
367 for (int i = 0; i < RANGE; i+=8) {
368 b[i+0] = a[i+0] * mask; // Mul
369 b[i+1] = a[i+1] * mask;
370 b[i+2] = a[i+2] * mask;
371 b[i+3] = a[i+3] * mask;
372
373 b[i+4] = a[i+4] + mask; // Add
374 b[i+5] = a[i+5] + mask;
375 }
376 return new Object[]{ a, b };
377 }
378
379 @Test
380 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
381 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
382 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
383 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
384 IRNode.STORE_VECTOR, "> 0"},
385 applyIf = {"MaxVectorSize", ">=32"},
386 applyIfPlatform = {"64-bit", "true"},
387 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
388 // Adjacent Load and Store, but split by Add/Mul
389 static Object[] test1c(int[] a, int[] b, int mask) {
390 for (int i = 0; i < RANGE; i+=8) {
391 b[i+0] = a[i+0] + mask; // Add
392 b[i+1] = a[i+1] + mask;
393
394 b[i+2] = a[i+2] * mask; // Mul
395 b[i+3] = a[i+3] * mask;
396 b[i+4] = a[i+4] * mask;
397 b[i+5] = a[i+5] * mask;
398 }
399 return new Object[]{ a, b };
400 }
401
402 @Test
403 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
404 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
405 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
406 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
407 IRNode.STORE_VECTOR, "> 0"},
408 applyIf = {"MaxVectorSize", ">=32"},
409 applyIfPlatform = {"64-bit", "true"},
410 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
411 // Adjacent Load and Store, but split by Add/Mul
412 static Object[] test1d(int[] a, int[] b, int mask) {
413 for (int i = 0; i < RANGE; i+=8) {
414 b[i+0] = a[i+0] * mask; // Mul
415 b[i+1] = a[i+1] * mask;
416
417 b[i+2] = a[i+2] + mask; // Add
418 b[i+3] = a[i+3] + mask;
419 b[i+4] = a[i+4] + mask;
420 b[i+5] = a[i+5] + mask;
421 }
422 return new Object[]{ a, b };
423 }
424
425 @Test
426 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
427 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
428 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
429 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
430 IRNode.STORE_VECTOR, "> 0"},
431 applyIf = {"MaxVectorSize", ">=32"},
432 applyIfPlatform = {"64-bit", "true"},
433 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
434 // Split the load
435 //
436 // 0 1 2 3 4 5 - -
437 // | | \ \ \ \
438 // | | \ \ \ \
439 // | | \ \ \ \
440 // 0 1 - - 4 5 6 7
441 //
442 static Object[] test2a(int[] a, int[] b, int mask) {
443 for (int i = 0; i < RANGE; i+=8) {
444 int b0 = a[i+0] & mask;
445 int b1 = a[i+1] & mask;
446 int b2 = a[i+2] & mask;
447 int b3 = a[i+3] & mask;
448 int b4 = a[i+4] & mask;
449 int b5 = a[i+5] & mask;
450
451 b[i+0] = b0;
452 b[i+1] = b1;
453
454 b[i+4] = b2;
455 b[i+5] = b3;
456 b[i+6] = b4;
457 b[i+7] = b5;
458 }
459 return new Object[]{ a, b };
460 }
461
462 @Test
463 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
464 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
465 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
466 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
467 IRNode.STORE_VECTOR, "> 0"},
468 applyIf = {"MaxVectorSize", ">=32"},
469 applyIfPlatform = {"64-bit", "true"},
470 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
471 // Split the load
472 //
473 // 0 1 2 3 4 5 - -
474 // | | | | \ \
475 // | | | | \ \
476 // | | | | \ \
477 // 0 1 2 3 -- 6 7
478 //
479 static Object[] test2b(int[] a, int[] b, int mask) {
480 for (int i = 0; i < RANGE; i+=8) {
481 int b0 = a[i+0] & mask;
482 int b1 = a[i+1] & mask;
483 int b2 = a[i+2] & mask;
484 int b3 = a[i+3] & mask;
485 int b4 = a[i+4] & mask;
486 int b5 = a[i+5] & mask;
487
488 b[i+0] = b0;
489 b[i+1] = b1;
490 b[i+2] = b2;
491 b[i+3] = b3;
492
493 b[i+6] = b4;
494 b[i+7] = b5;
495 }
496 return new Object[]{ a, b };
497 }
498
499 @Test
500 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
501 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
502 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
503 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
504 IRNode.STORE_VECTOR, "> 0"},
505 applyIf = {"MaxVectorSize", ">=32"},
506 applyIfPlatform = {"64-bit", "true"},
507 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
508 // Split the load
509 //
510 // 0 1 - - 4 5 6 7
511 // | | / / / /
512 // | | / / / /
513 // | | / / / /
514 // 0 1 2 3 4 5 - -
515 //
516 static Object[] test2c(int[] a, int[] b, int mask) {
517 for (int i = 0; i < RANGE; i+=8) {
518 int b0 = a[i+0] & mask;
519 int b1 = a[i+1] & mask;
520
521 int b4 = a[i+4] & mask;
522 int b5 = a[i+5] & mask;
523 int b6 = a[i+6] & mask;
524 int b7 = a[i+7] & mask;
525
526 b[i+0] = b0;
527 b[i+1] = b1;
528 b[i+2] = b4;
529 b[i+3] = b5;
530 b[i+4] = b6;
531 b[i+5] = b7;
532 }
533 return new Object[]{ a, b };
534 }
535
536 @Test
537 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
538 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
539 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
540 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
541 IRNode.STORE_VECTOR, "> 0"},
542 applyIf = {"MaxVectorSize", ">=32"},
543 applyIfPlatform = {"64-bit", "true"},
544 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
545 // Split the load
546 //
547 // 0 1 2 3 - - 6 7
548 // | | | | / /
549 // | | | | / /
550 // | | | | / /
551 // 0 1 2 3 4 5 - -
552 //
553 static Object[] test2d(int[] a, int[] b, int mask) {
554 for (int i = 0; i < RANGE; i+=8) {
555 int b0 = a[i+0] & mask;
556 int b1 = a[i+1] & mask;
557 int b2 = a[i+2] & mask;
558 int b3 = a[i+3] & mask;
559
560 int b6 = a[i+6] & mask;
561 int b7 = a[i+7] & mask;
562
563 b[i+0] = b0;
564 b[i+1] = b1;
565 b[i+2] = b2;
566 b[i+3] = b3;
567 b[i+4] = b6;
568 b[i+5] = b7;
569 }
570 return new Object[]{ a, b };
571 }
572
573 @Test
574 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
575 IRNode.STORE_VECTOR, "> 0"},
576 applyIf = {"MaxVectorSize", ">=32"},
577 applyIfPlatform = {"64-bit", "true"},
578 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
579 // 0 1 2 3 4 5 6 7 -
580 // | | | | | | | |
581 // | + + + | | | |
582 // | | | | |
583 // | v | | | | v
584 // | | | | | | |
585 // 1 - - 3 4 5 6 7 8
586 static Object[] test3a(short[] a, short[] b, short val) {
587 int sum = 0;
588 for (int i = 0; i < RANGE; i+=16) {
589 short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
590
591 short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
592 short a2 = a[i+2];
593 short a3 = a[i+3];
594
595 short a4 = a[i+4]; // 4-pack
596 short a5 = a[i+5];
597 short a6 = a[i+6];
598 short a7 = a[i+7];
599
600
601 b[i+0] = a0; // required for alignment / offsets, technical limitation.
602
603 sum += a1 + a2 + a3; // not packed
604
605 b[i+3] = val; // adjacent to 4-pack but needs to be split off
606
607 b[i+4] = a4; // 4-pack
608 b[i+5] = a5;
609 b[i+6] = a6;
610 b[i+7] = a7;
611
612 b[i+8] = val; // adjacent to 4-pack but needs to be split off
613 }
614 return new Object[]{ a, b, new int[]{ sum } };
615 }
616
617 @Test
618 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
619 IRNode.STORE_VECTOR, "> 0",
620 ".*multiversion.*", "= 0"},
621 phase = CompilePhase.PRINT_IDEAL,
622 applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
623 applyIfPlatform = {"64-bit", "true"},
624 applyIfCPUFeatureOr = {"sse4.1", "true"})
625 // Cyclic dependency with distance 2 -> split into 2-packs
626 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
627 IRNode.STORE_VECTOR, "> 0",
628 ".*multiversion.*", "= 0"},
629 phase = CompilePhase.PRINT_IDEAL,
630 applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
631 applyIfPlatform = {"64-bit", "true"},
632 applyIfCPUFeatureOr = {"sse4.1", "true"})
633 // Speculative aliasing check -> full vectorization.
634 static Object[] test4a(short[] a, short[] b) {
635 for (int i = 0; i < RANGE-64; i++) {
636 b[i+2] = a[i+0];
637 }
638 return new Object[]{ a, b };
639 }
640
641 @Test
642 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
643 IRNode.STORE_VECTOR, "> 0",
644 ".*multiversion.*", "= 0"},
645 phase = CompilePhase.PRINT_IDEAL,
646 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
647 applyIfPlatform = {"64-bit", "true"},
648 applyIfCPUFeatureOr = {"sse4.1", "true"})
649 // Cyclic dependency with distance 3 -> split into 2-packs
650 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
651 IRNode.STORE_VECTOR, "> 0",
652 ".*multiversion.*", "= 0"},
653 phase = CompilePhase.PRINT_IDEAL,
654 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
655 applyIfPlatform = {"64-bit", "true"},
656 applyIfCPUFeatureOr = {"sse4.1", "true"})
657 // Speculative aliasing check -> full vectorization.
658 static Object[] test4b(short[] a, short[] b) {
659 for (int i = 0; i < RANGE-64; i++) {
660 b[i+3] = a[i+0];
661 }
662 return new Object[]{ a, b };
663 }
664
665 @Test
666 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
667 IRNode.STORE_VECTOR, "> 0",
668 ".*multiversion.*", "= 0"},
669 phase = CompilePhase.PRINT_IDEAL,
670 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
671 applyIfPlatform = {"64-bit", "true"},
672 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
673 // Cyclic dependency with distance 4 -> split into 4-packs
674 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
675 IRNode.STORE_VECTOR, "> 0",
676 ".*multiversion.*", "= 0"},
677 phase = CompilePhase.PRINT_IDEAL,
678 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
679 applyIfPlatform = {"64-bit", "true"},
680 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
681 // Speculative aliasing check -> full vectorization.
682 static Object[] test4c(short[] a, short[] b) {
683 for (int i = 0; i < RANGE-64; i++) {
684 b[i+4] = a[i+0];
685 }
686 return new Object[]{ a, b };
687 }
688
689 @Test
690 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
691 IRNode.STORE_VECTOR, "> 0",
692 ".*multiversion.*", "= 0"},
693 phase = CompilePhase.PRINT_IDEAL,
694 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
695 applyIfPlatform = {"64-bit", "true"},
696 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
697 // Cyclic dependency with distance 5 -> split into 4-packs
698 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
699 IRNode.STORE_VECTOR, "> 0",
700 ".*multiversion.*", "= 0"},
701 phase = CompilePhase.PRINT_IDEAL,
702 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
703 applyIfPlatform = {"64-bit", "true"},
704 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
705 // Speculative aliasing check -> full vectorization.
706 static Object[] test4d(short[] a, short[] b) {
707 for (int i = 0; i < RANGE-64; i++) {
708 b[i+5] = a[i+0];
709 }
710 return new Object[]{ a, b };
711 }
712
713 @Test
714 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
715 IRNode.STORE_VECTOR, "> 0",
716 ".*multiversion.*", "= 0"},
717 phase = CompilePhase.PRINT_IDEAL,
718 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
719 applyIfPlatform = {"64-bit", "true"},
720 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
721 // Cyclic dependency with distance 6 -> split into 4-packs
722 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
723 IRNode.STORE_VECTOR, "> 0",
724 ".*multiversion.*", "= 0"},
725 phase = CompilePhase.PRINT_IDEAL,
726 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
727 applyIfPlatform = {"64-bit", "true"},
728 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
729 // Speculative aliasing check -> full vectorization.
730 static Object[] test4e(short[] a, short[] b) {
731 for (int i = 0; i < RANGE-64; i++) {
732 b[i+6] = a[i+0];
733 }
734 return new Object[]{ a, b };
735 }
736
737 @Test
738 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
739 IRNode.STORE_VECTOR, "> 0",
740 ".*multiversion.*", "= 0"},
741 phase = CompilePhase.PRINT_IDEAL,
742 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
743 applyIfPlatform = {"64-bit", "true"},
744 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
745 // Cyclic dependency with distance 7 -> split into 4-packs
746 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
747 IRNode.STORE_VECTOR, "> 0",
748 ".*multiversion.*", "= 0"},
749 phase = CompilePhase.PRINT_IDEAL,
750 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
751 applyIfPlatform = {"64-bit", "true"},
752 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
753 // Speculative aliasing check -> full vectorization.
754 static Object[] test4f(short[] a, short[] b) {
755 for (int i = 0; i < RANGE-64; i++) {
756 b[i+7] = a[i+0];
757 }
758 return new Object[]{ a, b };
759 }
760
761 @Test
762 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
763 IRNode.STORE_VECTOR, "> 0",
764 ".*multiversion.*", "= 0"},
765 phase = CompilePhase.PRINT_IDEAL,
766 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
767 applyIfPlatform = {"64-bit", "true"},
768 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
769 // Cyclic dependency with distance 8 -> split into 8-packs
770 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
771 IRNode.STORE_VECTOR, "> 0",
772 ".*multiversion.*", "= 0"},
773 phase = CompilePhase.PRINT_IDEAL,
774 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
775 applyIfPlatform = {"64-bit", "true"},
776 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
777 // Speculative aliasing check -> full vectorization.
778 static Object[] test4g(short[] a, short[] b) {
779 for (int i = 0; i < RANGE-64; i++) {
780 b[i+8] = a[i+0];
781 }
782 return new Object[]{ a, b };
783 }
784
785 @Test
786 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
787 IRNode.STORE_VECTOR, "> 0",
788 ".*multiversion.*", "= 0"},
789 phase = CompilePhase.PRINT_IDEAL,
790 applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
791 applyIfPlatform = {"64-bit", "true"},
792 applyIfCPUFeatureOr = {"sse4.1", "true"})
793 // Cyclic dependency with distance 2 -> split into 2-packs
794 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
795 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
796 IRNode.STORE_VECTOR, "> 0",
797 ".*multiversion.*", "> 0"},
798 phase = CompilePhase.PRINT_IDEAL,
799 applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
800 applyIfPlatform = {"64-bit", "true"},
801 applyIfCPUFeatureOr = {"sse4.1", "true"})
802 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
803 static Object[] test4a_alias(short[] a, short[] b) {
804 for (int i = 0; i < RANGE-64; i++) {
805 b[i+2] = a[i+0];
806 }
807 return new Object[]{ a, b };
808 }
809
810 @Test
811 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
812 IRNode.STORE_VECTOR, "> 0",
813 ".*multiversion.*", "= 0"},
814 phase = CompilePhase.PRINT_IDEAL,
815 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
816 applyIfPlatform = {"64-bit", "true"},
817 applyIfCPUFeatureOr = {"sse4.1", "true"})
818 // Cyclic dependency with distance 3 -> split into 2-packs
819 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
820 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
821 IRNode.STORE_VECTOR, "> 0",
822 ".*multiversion.*", "> 0"},
823 phase = CompilePhase.PRINT_IDEAL,
824 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
825 applyIfPlatform = {"64-bit", "true"},
826 applyIfCPUFeatureOr = {"sse4.1", "true"})
827 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
828 static Object[] test4b_alias(short[] a, short[] b) {
829 for (int i = 0; i < RANGE-64; i++) {
830 b[i+3] = a[i+0];
831 }
832 return new Object[]{ a, b };
833 }
834
835 @Test
836 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
837 IRNode.STORE_VECTOR, "> 0",
838 ".*multiversion.*", "= 0"},
839 phase = CompilePhase.PRINT_IDEAL,
840 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
841 applyIfPlatform = {"64-bit", "true"},
842 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
843 // Cyclic dependency with distance 4 -> split into 4-packs
844 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
845 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
846 IRNode.STORE_VECTOR, "> 0",
847 ".*multiversion.*", "> 0"},
848 phase = CompilePhase.PRINT_IDEAL,
849 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
850 applyIfPlatform = {"64-bit", "true"},
851 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
852 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
853 static Object[] test4c_alias(short[] a, short[] b) {
854 for (int i = 0; i < RANGE-64; i++) {
855 b[i+4] = a[i+0];
856 }
857 return new Object[]{ a, b };
858 }
859
860 @Test
861 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
862 IRNode.STORE_VECTOR, "> 0",
863 ".*multiversion.*", "= 0"},
864 phase = CompilePhase.PRINT_IDEAL,
865 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
866 applyIfPlatform = {"64-bit", "true"},
867 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
868 // Cyclic dependency with distance 5 -> split into 4-packs
869 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
870 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
871 IRNode.STORE_VECTOR, "> 0",
872 ".*multiversion.*", "> 0"},
873 phase = CompilePhase.PRINT_IDEAL,
874 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
875 applyIfPlatform = {"64-bit", "true"},
876 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
877 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
878 static Object[] test4d_alias(short[] a, short[] b) {
879 for (int i = 0; i < RANGE-64; i++) {
880 b[i+5] = a[i+0];
881 }
882 return new Object[]{ a, b };
883 }
884
885 @Test
886 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
887 IRNode.STORE_VECTOR, "> 0",
888 ".*multiversion.*", "= 0"},
889 phase = CompilePhase.PRINT_IDEAL,
890 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
891 applyIfPlatform = {"64-bit", "true"},
892 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
893 // Cyclic dependency with distance 6 -> split into 4-packs
894 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
895 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
896 IRNode.STORE_VECTOR, "> 0",
897 ".*multiversion.*", "> 0"},
898 phase = CompilePhase.PRINT_IDEAL,
899 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
900 applyIfPlatform = {"64-bit", "true"},
901 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
902 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
903 static Object[] test4e_alias(short[] a, short[] b) {
904 for (int i = 0; i < RANGE-64; i++) {
905 b[i+6] = a[i+0];
906 }
907 return new Object[]{ a, b };
908 }
909
910 @Test
911 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
912 IRNode.STORE_VECTOR, "> 0",
913 ".*multiversion.*", "= 0"},
914 phase = CompilePhase.PRINT_IDEAL,
915 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
916 applyIfPlatform = {"64-bit", "true"},
917 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
918 // Cyclic dependency with distance 7 -> split into 4-packs
919 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
920 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
921 IRNode.STORE_VECTOR, "> 0",
922 ".*multiversion.*", "> 0"},
923 phase = CompilePhase.PRINT_IDEAL,
924 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
925 applyIfPlatform = {"64-bit", "true"},
926 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
927 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
928 static Object[] test4f_alias(short[] a, short[] b) {
929 for (int i = 0; i < RANGE-64; i++) {
930 b[i+7] = a[i+0];
931 }
932 return new Object[]{ a, b };
933 }
934
935 @Test
936 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
937 IRNode.STORE_VECTOR, "> 0",
938 ".*multiversion.*", "= 0"},
939 phase = CompilePhase.PRINT_IDEAL,
940 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
941 applyIfPlatform = {"64-bit", "true"},
942 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
943 // Cyclic dependency with distance 8 -> split into 8-packs
944 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
945 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
946 IRNode.STORE_VECTOR, "> 0",
947 ".*multiversion.*", "> 0"},
948 phase = CompilePhase.PRINT_IDEAL,
949 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
950 applyIfPlatform = {"64-bit", "true"},
951 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
952 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
953 static Object[] test4g_alias(short[] a, short[] b) {
954 for (int i = 0; i < RANGE-64; i++) {
955 b[i+8] = a[i+0];
956 }
957 return new Object[]{ a, b };
958 }
959
960 @Test
961 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
962 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
963 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
964 IRNode.ADD_VS, IRNode.VECTOR_SIZE_2, "> 0",
965 IRNode.ADD_VS, IRNode.VECTOR_SIZE_8, "> 0",
966 IRNode.ADD_VS, IRNode.VECTOR_SIZE_4, "> 0",
967 IRNode.STORE_VECTOR, "> 0"},
968 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
969 applyIfPlatform = {"64-bit", "true"},
970 applyIfCPUFeature = {"sse4.1", "true"})
971 // aarch64 limits minimum vector size to 8B, thus a vector size of
972 // length 2 for type "short" will not be generated
973 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
974 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
975 IRNode.ADD_VS, IRNode.VECTOR_SIZE_8, "> 0",
976 IRNode.ADD_VS, IRNode.VECTOR_SIZE_4, "> 0",
977 IRNode.STORE_VECTOR, "> 0"},
978 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
979 applyIfPlatform = {"64-bit", "true"},
980 applyIfCPUFeature = {"sve", "true"})
981 // Split pack into power-of-2 sizes
982 static Object[] test5a(short[] a, short[] b, short val) {
983 for (int i = 0; i < RANGE; i+=16) {
984 b[i+ 0] = (short)(a[i+ 0] + val); // 8 pack
985 b[i+ 1] = (short)(a[i+ 1] + val);
986 b[i+ 2] = (short)(a[i+ 2] + val);
987 b[i+ 3] = (short)(a[i+ 3] + val);
988 b[i+ 4] = (short)(a[i+ 4] + val);
989 b[i+ 5] = (short)(a[i+ 5] + val);
990 b[i+ 6] = (short)(a[i+ 6] + val);
991 b[i+ 7] = (short)(a[i+ 7] + val);
992
993 b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
994 b[i+ 9] = (short)(a[i+ 9] + val);
995 b[i+10] = (short)(a[i+10] + val);
996 b[i+11] = (short)(a[i+11] + val);
997
998 b[i+12] = (short)(a[i+12] + val); // 2-pack
999 b[i+13] = (short)(a[i+13] + val);
1000
1001 b[i+14] = (short)(a[i+14] + val);
1002 }
1003 return new Object[]{ a, b };
1004 }
1005
1006 @Test
1007 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
1008 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
1009 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
1010 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1011 IRNode.ADD_REDUCTION_V, "> 0"},
1012 applyIf = {"MaxVectorSize", ">=32"},
1013 applyIfPlatform = {"64-bit", "true"},
1014 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1015 // Split packs including reductions
1016 static Object[] test6a(int[] a, int[] b) {
1017 int s = 0;
1018 for (int i = 0; i < RANGE; i+=8) {
1019 s += a[i+0] * b[i+0];
1020 s += a[i+1] * b[i+1];
1021 s += a[i+2] * b[i+2];
1022 s += a[i+3] * b[i+3];
1023
1024 s += a[i+4] & b[i+4];
1025 s += a[i+5] & b[i+5];
1026 s += a[i+6] & b[i+6];
1027 s += a[i+7] & b[i+7];
1028 }
1029 return new Object[]{ a, b, new int[]{ s } };
1030 }
1031
1032 @Test
1033 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1034 IRNode.MUL_VI, "> 0",
1035 IRNode.POPULATE_INDEX, "> 0"},
1036 applyIfPlatform = {"64-bit", "true"},
1037 applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1038 // Index Populate:
1039 // There can be an issue when all the (iv + 1), (iv + 2), ...
1040 // get packed, but not (iv). Then we have a pack that is one element
1041 // too short, and we start splitting everything in a bad way.
1042 static Object[] test7a(int[] a, int[] b) {
1043 for (int i = 0; i < RANGE; i++) {
1044 a[i] = b[i] * i;
1045 }
1046 return new Object[]{ a, b };
1047 }
1048 }