1 /*
2 * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package compiler.loopopts.superword;
25
26 import compiler.lib.ir_framework.*;
27 import jdk.test.lib.Utils;
28 import jdk.test.whitebox.WhiteBox;
29 import java.lang.reflect.Array;
30 import java.util.Map;
31 import java.util.HashMap;
32 import java.util.Random;
33 import java.nio.ByteOrder;
34
35 /*
36 * @test
37 * @bug 8326139 8348659
38 * @summary Test splitting packs in SuperWord
39 * @library /test/lib /
40 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_ySAC
41 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_ySAC
42 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_ySAC
43 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_ySAC
44 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_nSAC
45 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_nSAC
46 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_nSAC
47 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_nSAC
48 */
49
50 public class TestSplitPacks {
51 static int RANGE = 1024*8;
52 static int RANGE_FINAL = 1024*8;
53 private static final Random RANDOM = Utils.getRandomInstance();
54
55 // Inputs
56 byte[] aB;
57 byte[] bB;
58 byte mB = (byte)31;
59 short[] aS;
60 short[] bS;
61 short mS = (short)0xF0F0;
62 int[] aI;
63 int[] bI;
64 int mI = 0xF0F0F0F0;
65 long[] aL;
66 long[] bL;
67 long mL = 0xF0F0F0F0F0F0F0F0L;
68
69 // List of tests
70 Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
71
72 // List of gold, the results from the first run before compilation
73 Map<String,Object[]> golds = new HashMap<String,Object[]>();
74
75 interface TestFunction {
76 Object[] run();
77 }
78
79 public static void main(String[] args) {
80 TestFramework framework = new TestFramework(TestSplitPacks.class);
81 framework.addFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000");
82 switch (args[0]) {
83 case "nCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
84 case "nCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
85 case "yCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
86 case "yCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
87 case "nCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
88 case "nCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
89 case "yCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
90 case "yCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
91 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
92 };
93 framework.start();
94 }
95
96 public TestSplitPacks() {
97 // Generate input once
98 aB = generateB();
99 bB = generateB();
100 aS = generateS();
101 bS = generateS();
102 aI = generateI();
103 bI = generateI();
104 aL = generateL();
105 bL = generateL();
106
107 // Add all tests to list
108 tests.put("test0", () -> { return test0(aI.clone(), bI.clone(), mI); });
109 tests.put("test1a", () -> { return test1a(aI.clone(), bI.clone(), mI); });
110 tests.put("test1b", () -> { return test1b(aI.clone(), bI.clone(), mI); });
111 tests.put("test1c", () -> { return test1c(aI.clone(), bI.clone(), mI); });
112 tests.put("test1d", () -> { return test1d(aI.clone(), bI.clone(), mI); });
113 tests.put("test2a", () -> { return test2a(aI.clone(), bI.clone(), mI); });
114 tests.put("test2b", () -> { return test2b(aI.clone(), bI.clone(), mI); });
115 tests.put("test2c", () -> { return test2c(aI.clone(), bI.clone(), mI); });
116 tests.put("test2d", () -> { return test2d(aI.clone(), bI.clone(), mI); });
117 tests.put("test3a", () -> { return test3a(aS.clone(), bS.clone(), mS); });
118 tests.put("test4a", () -> { return test4a(aS.clone(), bS.clone()); });
119 tests.put("test4b", () -> { return test4b(aS.clone(), bS.clone()); });
120 tests.put("test4c", () -> { return test4c(aS.clone(), bS.clone()); });
121 tests.put("test4d", () -> { return test4d(aS.clone(), bS.clone()); });
122 tests.put("test4e", () -> { return test4e(aS.clone(), bS.clone()); });
123 tests.put("test4f", () -> { return test4f(aS.clone(), bS.clone()); });
124 tests.put("test4g", () -> { return test4g(aS.clone(), bS.clone()); });
125 tests.put("test4a_alias",() -> { short[] x = aS.clone(); return test4a_alias(x, x); });
126 tests.put("test4b_alias",() -> { short[] x = aS.clone(); return test4b_alias(x, x); });
127 tests.put("test4c_alias",() -> { short[] x = aS.clone(); return test4c_alias(x, x); });
128 tests.put("test4d_alias",() -> { short[] x = aS.clone(); return test4d_alias(x, x); });
129 tests.put("test4e_alias",() -> { short[] x = aS.clone(); return test4e_alias(x, x); });
130 tests.put("test4f_alias",() -> { short[] x = aS.clone(); return test4f_alias(x, x); });
131 tests.put("test4g_alias",() -> { short[] x = aS.clone(); return test4g_alias(x, x); });
132 tests.put("test5a", () -> { return test5a(aS.clone(), bS.clone(), mS); });
133 tests.put("test6a", () -> { return test6a(aI.clone(), bI.clone()); });
134 tests.put("test7a", () -> { return test7a(aI.clone(), bI.clone()); });
135
136 // Compute gold value for all test methods before compilation
137 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
138 String name = entry.getKey();
139 TestFunction test = entry.getValue();
140 Object[] gold = test.run();
141 golds.put(name, gold);
142 }
143 }
144
145 @Warmup(100)
146 @Run(test = {"test0",
147 "test1a",
148 "test1b",
149 "test1c",
150 "test1d",
151 "test2a",
152 "test2b",
153 "test2c",
154 "test2d",
155 "test3a",
156 "test4a",
157 "test4b",
158 "test4c",
159 "test4d",
160 "test4e",
161 "test4f",
162 "test4g",
163 "test4a_alias",
164 "test4b_alias",
165 "test4c_alias",
166 "test4d_alias",
167 "test4e_alias",
168 "test4f_alias",
169 "test4g_alias",
170 "test5a",
171 "test6a",
172 "test7a"})
173 public void runTests() {
174 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
175 String name = entry.getKey();
176 TestFunction test = entry.getValue();
177 // Recall gold value from before compilation
178 Object[] gold = golds.get(name);
179 // Compute new result
180 Object[] result = test.run();
181 // Compare gold and new result
182 verify(name, gold, result);
183 }
184 }
185
186 static byte[] generateB() {
187 byte[] a = new byte[RANGE];
188 for (int i = 0; i < a.length; i++) {
189 a[i] = (byte)RANDOM.nextInt();
190 }
191 return a;
192 }
193
194 static short[] generateS() {
195 short[] a = new short[RANGE];
196 for (int i = 0; i < a.length; i++) {
197 a[i] = (short)RANDOM.nextInt();
198 }
199 return a;
200 }
201
202 static int[] generateI() {
203 int[] a = new int[RANGE];
204 for (int i = 0; i < a.length; i++) {
205 a[i] = RANDOM.nextInt();
206 }
207 return a;
208 }
209
210 static long[] generateL() {
211 long[] a = new long[RANGE];
212 for (int i = 0; i < a.length; i++) {
213 a[i] = RANDOM.nextLong();
214 }
215 return a;
216 }
217
218 static void verify(String name, Object[] gold, Object[] result) {
219 if (gold.length != result.length) {
220 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
221 gold.length + ", result.length = " + result.length);
222 }
223 for (int i = 0; i < gold.length; i++) {
224 Object g = gold[i];
225 Object r = result[i];
226 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
227 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
228 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
229 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
230 }
231 if (g == r) {
232 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
233 " gold[" + i + "] == result[" + i + "]");
234 }
235 if (Array.getLength(g) != Array.getLength(r)) {
236 throw new RuntimeException("verify " + name + ": arrays must have same length:" +
237 " gold[" + i + "].length = " + Array.getLength(g) +
238 " result[" + i + "].length = " + Array.getLength(r));
239 }
240 Class c = g.getClass().getComponentType();
241 if (c == byte.class) {
242 verifyB(name, i, (byte[])g, (byte[])r);
243 } else if (c == short.class) {
244 verifyS(name, i, (short[])g, (short[])r);
245 } else if (c == int.class) {
246 verifyI(name, i, (int[])g, (int[])r);
247 } else if (c == long.class) {
248 verifyL(name, i, (long[])g, (long[])r);
249 } else {
250 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
251 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
252 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
253 }
254 }
255 }
256
257 static void verifyB(String name, int i, byte[] g, byte[] r) {
258 for (int j = 0; j < g.length; j++) {
259 if (g[j] != r[j]) {
260 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
261 " gold[" + i + "][" + j + "] = " + g[j] +
262 " result[" + i + "][" + j + "] = " + r[j]);
263 }
264 }
265 }
266
267 static void verifyS(String name, int i, short[] g, short[] r) {
268 for (int j = 0; j < g.length; j++) {
269 if (g[j] != r[j]) {
270 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
271 " gold[" + i + "][" + j + "] = " + g[j] +
272 " result[" + i + "][" + j + "] = " + r[j]);
273 }
274 }
275 }
276
277 static void verifyI(String name, int i, int[] g, int[] r) {
278 for (int j = 0; j < g.length; j++) {
279 if (g[j] != r[j]) {
280 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
281 " gold[" + i + "][" + j + "] = " + g[j] +
282 " result[" + i + "][" + j + "] = " + r[j]);
283 }
284 }
285 }
286
287 static void verifyL(String name, int i, long[] g, long[] r) {
288 for (int j = 0; j < g.length; j++) {
289 if (g[j] != r[j]) {
290 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
291 " gold[" + i + "][" + j + "] = " + g[j] +
292 " result[" + i + "][" + j + "] = " + r[j]);
293 }
294 }
295 }
296
297 @Test
298 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
299 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
300 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
301 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
302 IRNode.STORE_VECTOR, "> 0"},
303 applyIf = {"MaxVectorSize", ">=32"},
304 applyIfPlatform = {"64-bit", "true"},
305 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
306 // Load and store are already split
307 //
308 // 0 1 - - 4 5 6 7
309 // | | | | | |
310 // 0 1 - - 4 5 6 7
311 static Object[] test0(int[] a, int[] b, int mask) {
312 for (int i = 0; i < RANGE; i+=8) {
313 int b0 = a[i+0] & mask;
314 int b1 = a[i+1] & mask;
315
316 int b4 = a[i+4] & mask;
317 int b5 = a[i+5] & mask;
318 int b6 = a[i+6] & mask;
319 int b7 = a[i+7] & mask;
320
321 b[i+0] = b0;
322 b[i+1] = b1;
323
324 b[i+4] = b4;
325 b[i+5] = b5;
326 b[i+6] = b6;
327 b[i+7] = b7;
328 }
329 return new Object[]{ a, b };
330 }
331
332 @Test
333 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
334 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
335 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
336 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
337 IRNode.STORE_VECTOR, "> 0"},
338 applyIf = {"MaxVectorSize", ">=32"},
339 applyIfPlatform = {"64-bit", "true"},
340 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
341 // Adjacent Load and Store, but split by Add/Mul
342 static Object[] test1a(int[] a, int[] b, int mask) {
343 for (int i = 0; i < RANGE; i+=8) {
344 b[i+0] = a[i+0] + mask; // Add
345 b[i+1] = a[i+1] + mask;
346 b[i+2] = a[i+2] + mask;
347 b[i+3] = a[i+3] + mask;
348
349 b[i+4] = a[i+4] * mask; // Mul
350 b[i+5] = a[i+5] * mask;
351 }
352 return new Object[]{ a, b };
353 }
354
355 @Test
356 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
357 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
358 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
359 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
360 IRNode.STORE_VECTOR, "> 0"},
361 applyIf = {"MaxVectorSize", ">=32"},
362 applyIfPlatform = {"64-bit", "true"},
363 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
364 // Adjacent Load and Store, but split by Add/Mul
365 static Object[] test1b(int[] a, int[] b, int mask) {
366 for (int i = 0; i < RANGE; i+=8) {
367 b[i+0] = a[i+0] * mask; // Mul
368 b[i+1] = a[i+1] * mask;
369 b[i+2] = a[i+2] * mask;
370 b[i+3] = a[i+3] * mask;
371
372 b[i+4] = a[i+4] + mask; // Add
373 b[i+5] = a[i+5] + mask;
374 }
375 return new Object[]{ a, b };
376 }
377
378 @Test
379 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
380 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
381 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
382 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
383 IRNode.STORE_VECTOR, "> 0"},
384 applyIf = {"MaxVectorSize", ">=32"},
385 applyIfPlatform = {"64-bit", "true"},
386 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
387 // Adjacent Load and Store, but split by Add/Mul
388 static Object[] test1c(int[] a, int[] b, int mask) {
389 for (int i = 0; i < RANGE; i+=8) {
390 b[i+0] = a[i+0] + mask; // Add
391 b[i+1] = a[i+1] + mask;
392
393 b[i+2] = a[i+2] * mask; // Mul
394 b[i+3] = a[i+3] * mask;
395 b[i+4] = a[i+4] * mask;
396 b[i+5] = a[i+5] * mask;
397 }
398 return new Object[]{ a, b };
399 }
400
401 @Test
402 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
403 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
404 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
405 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
406 IRNode.STORE_VECTOR, "> 0"},
407 applyIf = {"MaxVectorSize", ">=32"},
408 applyIfPlatform = {"64-bit", "true"},
409 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
410 // Adjacent Load and Store, but split by Add/Mul
411 static Object[] test1d(int[] a, int[] b, int mask) {
412 for (int i = 0; i < RANGE; i+=8) {
413 b[i+0] = a[i+0] * mask; // Mul
414 b[i+1] = a[i+1] * mask;
415
416 b[i+2] = a[i+2] + mask; // Add
417 b[i+3] = a[i+3] + mask;
418 b[i+4] = a[i+4] + mask;
419 b[i+5] = a[i+5] + mask;
420 }
421 return new Object[]{ a, b };
422 }
423
424 @Test
425 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
426 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
427 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
428 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
429 IRNode.STORE_VECTOR, "> 0"},
430 applyIf = {"MaxVectorSize", ">=32"},
431 applyIfPlatform = {"64-bit", "true"},
432 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
433 // Split the load
434 //
435 // 0 1 2 3 4 5 - -
436 // | | \ \ \ \
437 // | | \ \ \ \
438 // | | \ \ \ \
439 // 0 1 - - 4 5 6 7
440 //
441 static Object[] test2a(int[] a, int[] b, int mask) {
442 for (int i = 0; i < RANGE; i+=8) {
443 int b0 = a[i+0] & mask;
444 int b1 = a[i+1] & mask;
445 int b2 = a[i+2] & mask;
446 int b3 = a[i+3] & mask;
447 int b4 = a[i+4] & mask;
448 int b5 = a[i+5] & mask;
449
450 b[i+0] = b0;
451 b[i+1] = b1;
452
453 b[i+4] = b2;
454 b[i+5] = b3;
455 b[i+6] = b4;
456 b[i+7] = b5;
457 }
458 return new Object[]{ a, b };
459 }
460
461 @Test
462 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
463 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
464 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
465 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
466 IRNode.STORE_VECTOR, "> 0"},
467 applyIf = {"MaxVectorSize", ">=32"},
468 applyIfPlatform = {"64-bit", "true"},
469 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
470 // Split the load
471 //
472 // 0 1 2 3 4 5 - -
473 // | | | | \ \
474 // | | | | \ \
475 // | | | | \ \
476 // 0 1 2 3 -- 6 7
477 //
478 static Object[] test2b(int[] a, int[] b, int mask) {
479 for (int i = 0; i < RANGE; i+=8) {
480 int b0 = a[i+0] & mask;
481 int b1 = a[i+1] & mask;
482 int b2 = a[i+2] & mask;
483 int b3 = a[i+3] & mask;
484 int b4 = a[i+4] & mask;
485 int b5 = a[i+5] & mask;
486
487 b[i+0] = b0;
488 b[i+1] = b1;
489 b[i+2] = b2;
490 b[i+3] = b3;
491
492 b[i+6] = b4;
493 b[i+7] = b5;
494 }
495 return new Object[]{ a, b };
496 }
497
498 @Test
499 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
500 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
501 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
502 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
503 IRNode.STORE_VECTOR, "> 0"},
504 applyIf = {"MaxVectorSize", ">=32"},
505 applyIfPlatform = {"64-bit", "true"},
506 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
507 // Split the load
508 //
509 // 0 1 - - 4 5 6 7
510 // | | / / / /
511 // | | / / / /
512 // | | / / / /
513 // 0 1 2 3 4 5 - -
514 //
515 static Object[] test2c(int[] a, int[] b, int mask) {
516 for (int i = 0; i < RANGE; i+=8) {
517 int b0 = a[i+0] & mask;
518 int b1 = a[i+1] & mask;
519
520 int b4 = a[i+4] & mask;
521 int b5 = a[i+5] & mask;
522 int b6 = a[i+6] & mask;
523 int b7 = a[i+7] & mask;
524
525 b[i+0] = b0;
526 b[i+1] = b1;
527 b[i+2] = b4;
528 b[i+3] = b5;
529 b[i+4] = b6;
530 b[i+5] = b7;
531 }
532 return new Object[]{ a, b };
533 }
534
535 @Test
536 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
537 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
538 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
539 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
540 IRNode.STORE_VECTOR, "> 0"},
541 applyIf = {"MaxVectorSize", ">=32"},
542 applyIfPlatform = {"64-bit", "true"},
543 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
544 // Split the load
545 //
546 // 0 1 2 3 - - 6 7
547 // | | | | / /
548 // | | | | / /
549 // | | | | / /
550 // 0 1 2 3 4 5 - -
551 //
552 static Object[] test2d(int[] a, int[] b, int mask) {
553 for (int i = 0; i < RANGE; i+=8) {
554 int b0 = a[i+0] & mask;
555 int b1 = a[i+1] & mask;
556 int b2 = a[i+2] & mask;
557 int b3 = a[i+3] & mask;
558
559 int b6 = a[i+6] & mask;
560 int b7 = a[i+7] & mask;
561
562 b[i+0] = b0;
563 b[i+1] = b1;
564 b[i+2] = b2;
565 b[i+3] = b3;
566 b[i+4] = b6;
567 b[i+5] = b7;
568 }
569 return new Object[]{ a, b };
570 }
571
572 @Test
573 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
574 IRNode.STORE_VECTOR, "> 0"},
575 applyIf = {"MaxVectorSize", ">=32"},
576 applyIfPlatform = {"64-bit", "true"},
577 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
578 // 0 1 2 3 4 5 6 7 -
579 // | | | | | | | |
580 // | + + + | | | |
581 // | | | | |
582 // | v | | | | v
583 // | | | | | | |
584 // 1 - - 3 4 5 6 7 8
585 static Object[] test3a(short[] a, short[] b, short val) {
586 int sum = 0;
587 for (int i = 0; i < RANGE; i+=16) {
588 short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
589
590 short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
591 short a2 = a[i+2];
592 short a3 = a[i+3];
593
594 short a4 = a[i+4]; // 4-pack
595 short a5 = a[i+5];
596 short a6 = a[i+6];
597 short a7 = a[i+7];
598
599
600 b[i+0] = a0; // required for alignment / offsets, technical limitation.
601
602 sum += a1 + a2 + a3; // not packed
603
604 b[i+3] = val; // adjacent to 4-pack but needs to be split off
605
606 b[i+4] = a4; // 4-pack
607 b[i+5] = a5;
608 b[i+6] = a6;
609 b[i+7] = a7;
610
611 b[i+8] = val; // adjacent to 4-pack but needs to be split off
612 }
613 return new Object[]{ a, b, new int[]{ sum } };
614 }
615
616 @Test
617 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
618 IRNode.STORE_VECTOR, "> 0",
619 ".*multiversion.*", "= 0"},
620 phase = CompilePhase.PRINT_IDEAL,
621 applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
622 applyIfPlatform = {"64-bit", "true"},
623 applyIfCPUFeatureOr = {"sse4.1", "true"})
624 // Cyclic dependency with distance 2 -> split into 2-packs
625 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
626 IRNode.STORE_VECTOR, "> 0",
627 ".*multiversion.*", "= 0"},
628 phase = CompilePhase.PRINT_IDEAL,
629 applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
630 applyIfPlatform = {"64-bit", "true"},
631 applyIfCPUFeatureOr = {"sse4.1", "true"})
632 // Speculative aliasing check -> full vectorization.
633 static Object[] test4a(short[] a, short[] b) {
634 for (int i = 0; i < RANGE-64; i++) {
635 b[i+2] = a[i+0];
636 }
637 return new Object[]{ a, b };
638 }
639
640 @Test
641 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
642 IRNode.STORE_VECTOR, "> 0",
643 ".*multiversion.*", "= 0"},
644 phase = CompilePhase.PRINT_IDEAL,
645 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
646 applyIfPlatform = {"64-bit", "true"},
647 applyIfCPUFeatureOr = {"sse4.1", "true"})
648 // Cyclic dependency with distance 3 -> split into 2-packs
649 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
650 IRNode.STORE_VECTOR, "> 0",
651 ".*multiversion.*", "= 0"},
652 phase = CompilePhase.PRINT_IDEAL,
653 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
654 applyIfPlatform = {"64-bit", "true"},
655 applyIfCPUFeatureOr = {"sse4.1", "true"})
656 // Speculative aliasing check -> full vectorization.
657 static Object[] test4b(short[] a, short[] b) {
658 for (int i = 0; i < RANGE-64; i++) {
659 b[i+3] = a[i+0];
660 }
661 return new Object[]{ a, b };
662 }
663
664 @Test
665 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
666 IRNode.STORE_VECTOR, "> 0",
667 ".*multiversion.*", "= 0"},
668 phase = CompilePhase.PRINT_IDEAL,
669 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
670 applyIfPlatform = {"64-bit", "true"},
671 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
672 // Cyclic dependency with distance 4 -> split into 4-packs
673 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
674 IRNode.STORE_VECTOR, "> 0",
675 ".*multiversion.*", "= 0"},
676 phase = CompilePhase.PRINT_IDEAL,
677 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
678 applyIfPlatform = {"64-bit", "true"},
679 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
680 // Speculative aliasing check -> full vectorization.
681 static Object[] test4c(short[] a, short[] b) {
682 for (int i = 0; i < RANGE-64; i++) {
683 b[i+4] = a[i+0];
684 }
685 return new Object[]{ a, b };
686 }
687
688 @Test
689 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
690 IRNode.STORE_VECTOR, "> 0",
691 ".*multiversion.*", "= 0"},
692 phase = CompilePhase.PRINT_IDEAL,
693 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
694 applyIfPlatform = {"64-bit", "true"},
695 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
696 // Cyclic dependency with distance 5 -> split into 4-packs
697 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
698 IRNode.STORE_VECTOR, "> 0",
699 ".*multiversion.*", "= 0"},
700 phase = CompilePhase.PRINT_IDEAL,
701 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
702 applyIfPlatform = {"64-bit", "true"},
703 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
704 // Speculative aliasing check -> full vectorization.
705 static Object[] test4d(short[] a, short[] b) {
706 for (int i = 0; i < RANGE-64; i++) {
707 b[i+5] = a[i+0];
708 }
709 return new Object[]{ a, b };
710 }
711
712 @Test
713 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
714 IRNode.STORE_VECTOR, "> 0",
715 ".*multiversion.*", "= 0"},
716 phase = CompilePhase.PRINT_IDEAL,
717 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
718 applyIfPlatform = {"64-bit", "true"},
719 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
720 // Cyclic dependency with distance 6 -> split into 4-packs
721 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
722 IRNode.STORE_VECTOR, "> 0",
723 ".*multiversion.*", "= 0"},
724 phase = CompilePhase.PRINT_IDEAL,
725 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
726 applyIfPlatform = {"64-bit", "true"},
727 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
728 // Speculative aliasing check -> full vectorization.
729 static Object[] test4e(short[] a, short[] b) {
730 for (int i = 0; i < RANGE-64; i++) {
731 b[i+6] = a[i+0];
732 }
733 return new Object[]{ a, b };
734 }
735
736 @Test
737 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
738 IRNode.STORE_VECTOR, "> 0",
739 ".*multiversion.*", "= 0"},
740 phase = CompilePhase.PRINT_IDEAL,
741 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
742 applyIfPlatform = {"64-bit", "true"},
743 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
744 // Cyclic dependency with distance 7 -> split into 4-packs
745 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
746 IRNode.STORE_VECTOR, "> 0",
747 ".*multiversion.*", "= 0"},
748 phase = CompilePhase.PRINT_IDEAL,
749 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
750 applyIfPlatform = {"64-bit", "true"},
751 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
752 // Speculative aliasing check -> full vectorization.
753 static Object[] test4f(short[] a, short[] b) {
754 for (int i = 0; i < RANGE-64; i++) {
755 b[i+7] = a[i+0];
756 }
757 return new Object[]{ a, b };
758 }
759
760 @Test
761 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
762 IRNode.STORE_VECTOR, "> 0",
763 ".*multiversion.*", "= 0"},
764 phase = CompilePhase.PRINT_IDEAL,
765 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
766 applyIfPlatform = {"64-bit", "true"},
767 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
768 // Cyclic dependency with distance 8 -> split into 8-packs
769 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
770 IRNode.STORE_VECTOR, "> 0",
771 ".*multiversion.*", "= 0"},
772 phase = CompilePhase.PRINT_IDEAL,
773 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
774 applyIfPlatform = {"64-bit", "true"},
775 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
776 // Speculative aliasing check -> full vectorization.
777 static Object[] test4g(short[] a, short[] b) {
778 for (int i = 0; i < RANGE-64; i++) {
779 b[i+8] = a[i+0];
780 }
781 return new Object[]{ a, b };
782 }
783
784 @Test
785 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
786 IRNode.STORE_VECTOR, "> 0",
787 ".*multiversion.*", "= 0"},
788 phase = CompilePhase.PRINT_IDEAL,
789 applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
790 applyIfPlatform = {"64-bit", "true"},
791 applyIfCPUFeatureOr = {"sse4.1", "true"})
792 // Cyclic dependency with distance 2 -> split into 2-packs
793 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
794 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
795 IRNode.STORE_VECTOR, "> 0",
796 ".*multiversion.*", "> 0"},
797 phase = CompilePhase.PRINT_IDEAL,
798 applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
799 applyIfPlatform = {"64-bit", "true"},
800 applyIfCPUFeatureOr = {"sse4.1", "true"})
801 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
802 static Object[] test4a_alias(short[] a, short[] b) {
803 for (int i = 0; i < RANGE-64; i++) {
804 b[i+2] = a[i+0];
805 }
806 return new Object[]{ a, b };
807 }
808
809 @Test
810 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
811 IRNode.STORE_VECTOR, "> 0",
812 ".*multiversion.*", "= 0"},
813 phase = CompilePhase.PRINT_IDEAL,
814 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
815 applyIfPlatform = {"64-bit", "true"},
816 applyIfCPUFeatureOr = {"sse4.1", "true"})
817 // Cyclic dependency with distance 3 -> split into 2-packs
818 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
819 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
820 IRNode.STORE_VECTOR, "> 0",
821 ".*multiversion.*", "> 0"},
822 phase = CompilePhase.PRINT_IDEAL,
823 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
824 applyIfPlatform = {"64-bit", "true"},
825 applyIfCPUFeatureOr = {"sse4.1", "true"})
826 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
827 static Object[] test4b_alias(short[] a, short[] b) {
828 for (int i = 0; i < RANGE-64; i++) {
829 b[i+3] = a[i+0];
830 }
831 return new Object[]{ a, b };
832 }
833
834 @Test
835 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
836 IRNode.STORE_VECTOR, "> 0",
837 ".*multiversion.*", "= 0"},
838 phase = CompilePhase.PRINT_IDEAL,
839 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
840 applyIfPlatform = {"64-bit", "true"},
841 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
842 // Cyclic dependency with distance 4 -> split into 4-packs
843 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
844 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
845 IRNode.STORE_VECTOR, "> 0",
846 ".*multiversion.*", "> 0"},
847 phase = CompilePhase.PRINT_IDEAL,
848 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
849 applyIfPlatform = {"64-bit", "true"},
850 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
851 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
852 static Object[] test4c_alias(short[] a, short[] b) {
853 for (int i = 0; i < RANGE-64; i++) {
854 b[i+4] = a[i+0];
855 }
856 return new Object[]{ a, b };
857 }
858
859 @Test
860 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
861 IRNode.STORE_VECTOR, "> 0",
862 ".*multiversion.*", "= 0"},
863 phase = CompilePhase.PRINT_IDEAL,
864 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
865 applyIfPlatform = {"64-bit", "true"},
866 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
867 // Cyclic dependency with distance 5 -> split into 4-packs
868 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
869 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
870 IRNode.STORE_VECTOR, "> 0",
871 ".*multiversion.*", "> 0"},
872 phase = CompilePhase.PRINT_IDEAL,
873 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
874 applyIfPlatform = {"64-bit", "true"},
875 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
876 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
877 static Object[] test4d_alias(short[] a, short[] b) {
878 for (int i = 0; i < RANGE-64; i++) {
879 b[i+5] = a[i+0];
880 }
881 return new Object[]{ a, b };
882 }
883
884 @Test
885 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
886 IRNode.STORE_VECTOR, "> 0",
887 ".*multiversion.*", "= 0"},
888 phase = CompilePhase.PRINT_IDEAL,
889 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
890 applyIfPlatform = {"64-bit", "true"},
891 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
892 // Cyclic dependency with distance 6 -> split into 4-packs
893 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
894 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
895 IRNode.STORE_VECTOR, "> 0",
896 ".*multiversion.*", "> 0"},
897 phase = CompilePhase.PRINT_IDEAL,
898 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
899 applyIfPlatform = {"64-bit", "true"},
900 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
901 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
902 static Object[] test4e_alias(short[] a, short[] b) {
903 for (int i = 0; i < RANGE-64; i++) {
904 b[i+6] = a[i+0];
905 }
906 return new Object[]{ a, b };
907 }
908
909 @Test
910 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
911 IRNode.STORE_VECTOR, "> 0",
912 ".*multiversion.*", "= 0"},
913 phase = CompilePhase.PRINT_IDEAL,
914 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
915 applyIfPlatform = {"64-bit", "true"},
916 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
917 // Cyclic dependency with distance 7 -> split into 4-packs
918 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
919 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
920 IRNode.STORE_VECTOR, "> 0",
921 ".*multiversion.*", "> 0"},
922 phase = CompilePhase.PRINT_IDEAL,
923 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
924 applyIfPlatform = {"64-bit", "true"},
925 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
926 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
927 static Object[] test4f_alias(short[] a, short[] b) {
928 for (int i = 0; i < RANGE-64; i++) {
929 b[i+7] = a[i+0];
930 }
931 return new Object[]{ a, b };
932 }
933
934 @Test
935 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
936 IRNode.STORE_VECTOR, "> 0",
937 ".*multiversion.*", "= 0"},
938 phase = CompilePhase.PRINT_IDEAL,
939 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
940 applyIfPlatform = {"64-bit", "true"},
941 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
942 // Cyclic dependency with distance 8 -> split into 8-packs
943 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
944 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
945 IRNode.STORE_VECTOR, "> 0",
946 ".*multiversion.*", "> 0"},
947 phase = CompilePhase.PRINT_IDEAL,
948 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
949 applyIfPlatform = {"64-bit", "true"},
950 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
951 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
952 static Object[] test4g_alias(short[] a, short[] b) {
953 for (int i = 0; i < RANGE-64; i++) {
954 b[i+8] = a[i+0];
955 }
956 return new Object[]{ a, b };
957 }
958
959 @Test
960 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
961 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
962 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
963 IRNode.ADD_VS, IRNode.VECTOR_SIZE_2, "> 0",
964 IRNode.ADD_VS, IRNode.VECTOR_SIZE_8, "> 0",
965 IRNode.ADD_VS, IRNode.VECTOR_SIZE_4, "> 0",
966 IRNode.STORE_VECTOR, "> 0"},
967 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
968 applyIfPlatform = {"64-bit", "true"},
969 applyIfCPUFeature = {"sse4.1", "true"})
970 // aarch64 limits minimum vector size to 8B, thus a vector size of
971 // length 2 for type "short" will not be generated
972 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
973 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
974 IRNode.ADD_VS, IRNode.VECTOR_SIZE_8, "> 0",
975 IRNode.ADD_VS, IRNode.VECTOR_SIZE_4, "> 0",
976 IRNode.STORE_VECTOR, "> 0"},
977 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
978 applyIfPlatform = {"64-bit", "true"},
979 applyIfCPUFeature = {"sve", "true"})
980 // Split pack into power-of-2 sizes
981 static Object[] test5a(short[] a, short[] b, short val) {
982 for (int i = 0; i < RANGE; i+=16) {
983 b[i+ 0] = (short)(a[i+ 0] + val); // 8 pack
984 b[i+ 1] = (short)(a[i+ 1] + val);
985 b[i+ 2] = (short)(a[i+ 2] + val);
986 b[i+ 3] = (short)(a[i+ 3] + val);
987 b[i+ 4] = (short)(a[i+ 4] + val);
988 b[i+ 5] = (short)(a[i+ 5] + val);
989 b[i+ 6] = (short)(a[i+ 6] + val);
990 b[i+ 7] = (short)(a[i+ 7] + val);
991
992 b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
993 b[i+ 9] = (short)(a[i+ 9] + val);
994 b[i+10] = (short)(a[i+10] + val);
995 b[i+11] = (short)(a[i+11] + val);
996
997 b[i+12] = (short)(a[i+12] + val); // 2-pack
998 b[i+13] = (short)(a[i+13] + val);
999
1000 b[i+14] = (short)(a[i+14] + val);
1001 }
1002 return new Object[]{ a, b };
1003 }
1004
1005 @Test
1006 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
1007 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
1008 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
1009 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1010 IRNode.ADD_REDUCTION_V, "> 0"},
1011 applyIf = {"MaxVectorSize", ">=32"},
1012 applyIfPlatform = {"64-bit", "true"},
1013 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1014 // Split packs including reductions
1015 static Object[] test6a(int[] a, int[] b) {
1016 int s = 0;
1017 for (int i = 0; i < RANGE; i+=8) {
1018 s += a[i+0] * b[i+0];
1019 s += a[i+1] * b[i+1];
1020 s += a[i+2] * b[i+2];
1021 s += a[i+3] * b[i+3];
1022
1023 s += a[i+4] & b[i+4];
1024 s += a[i+5] & b[i+5];
1025 s += a[i+6] & b[i+6];
1026 s += a[i+7] & b[i+7];
1027 }
1028 return new Object[]{ a, b, new int[]{ s } };
1029 }
1030
1031 @Test
1032 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1033 IRNode.MUL_VI, "> 0",
1034 IRNode.POPULATE_INDEX, "> 0"},
1035 applyIfPlatform = {"64-bit", "true"},
1036 applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1037 // Index Populate:
1038 // There can be an issue when all the (iv + 1), (iv + 2), ...
1039 // get packed, but not (iv). Then we have a pack that is one element
1040 // too short, and we start splitting everything in a bad way.
1041 static Object[] test7a(int[] a, int[] b) {
1042 for (int i = 0; i < RANGE; i++) {
1043 a[i] = b[i] * i;
1044 }
1045 return new Object[]{ a, b };
1046 }
1047 }