1 /*
2 * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 package compiler.loopopts.superword;
25
26 import compiler.lib.ir_framework.*;
27 import jdk.test.lib.Utils;
28 import jdk.test.whitebox.WhiteBox;
29 import java.lang.reflect.Array;
30 import java.util.Map;
31 import java.util.HashMap;
32 import java.util.Random;
33 import java.nio.ByteOrder;
34
35 /*
36 * @test
37 * @bug 8326139 8348659
38 * @key randomness
39 * @summary Test splitting packs in SuperWord
40 * @library /test/lib /
41 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_ySAC
42 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_ySAC
43 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_ySAC
44 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_ySAC
45 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_nSAC
46 * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_nSAC
47 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_nSAC
48 * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_nSAC
49 */
50
51 public class TestSplitPacks {
52 static int RANGE = 1024*8;
53 static int RANGE_FINAL = 1024*8;
54 private static final Random RANDOM = Utils.getRandomInstance();
55
56 // Inputs
57 byte[] aB;
58 byte[] bB;
59 byte mB = (byte)31;
60 short[] aS;
61 short[] bS;
62 short mS = (short)0xF0F0;
63 int[] aI;
64 int[] bI;
65 int mI = 0xF0F0F0F0;
66 long[] aL;
67 long[] bL;
68 long mL = 0xF0F0F0F0F0F0F0F0L;
69
70 // List of tests
71 Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
72
73 // List of gold, the results from the first run before compilation
74 Map<String,Object[]> golds = new HashMap<String,Object[]>();
75
76 interface TestFunction {
77 Object[] run();
78 }
79
80 public static void main(String[] args) {
81 TestFramework framework = new TestFramework(TestSplitPacks.class);
82 framework.addFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000");
83 switch (args[0]) {
84 case "nCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
85 case "nCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
86 case "yCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
87 case "yCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
88 case "nCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
89 case "nCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
90 case "yCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
91 case "yCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
92 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
93 };
94 framework.start();
95 }
96
97 public TestSplitPacks() {
98 // Generate input once
99 aB = generateB();
100 bB = generateB();
101 aS = generateS();
102 bS = generateS();
103 aI = generateI();
104 bI = generateI();
105 aL = generateL();
106 bL = generateL();
107
108 // Add all tests to list
109 tests.put("test0", () -> { return test0(aI.clone(), bI.clone(), mI); });
110 tests.put("test1a", () -> { return test1a(aI.clone(), bI.clone(), mI); });
111 tests.put("test1b", () -> { return test1b(aI.clone(), bI.clone(), mI); });
112 tests.put("test1c", () -> { return test1c(aI.clone(), bI.clone(), mI); });
113 tests.put("test1d", () -> { return test1d(aI.clone(), bI.clone(), mI); });
114 tests.put("test2a", () -> { return test2a(aI.clone(), bI.clone(), mI); });
115 tests.put("test2b", () -> { return test2b(aI.clone(), bI.clone(), mI); });
116 tests.put("test2c", () -> { return test2c(aI.clone(), bI.clone(), mI); });
117 tests.put("test2d", () -> { return test2d(aI.clone(), bI.clone(), mI); });
118 tests.put("test3a", () -> { return test3a(aS.clone(), bS.clone(), mS); });
119 tests.put("test4a", () -> { return test4a(aS.clone(), bS.clone()); });
120 tests.put("test4b", () -> { return test4b(aS.clone(), bS.clone()); });
121 tests.put("test4c", () -> { return test4c(aS.clone(), bS.clone()); });
122 tests.put("test4d", () -> { return test4d(aS.clone(), bS.clone()); });
123 tests.put("test4e", () -> { return test4e(aS.clone(), bS.clone()); });
124 tests.put("test4f", () -> { return test4f(aS.clone(), bS.clone()); });
125 tests.put("test4g", () -> { return test4g(aS.clone(), bS.clone()); });
126 tests.put("test4a_alias",() -> { short[] x = aS.clone(); return test4a_alias(x, x); });
127 tests.put("test4b_alias",() -> { short[] x = aS.clone(); return test4b_alias(x, x); });
128 tests.put("test4c_alias",() -> { short[] x = aS.clone(); return test4c_alias(x, x); });
129 tests.put("test4d_alias",() -> { short[] x = aS.clone(); return test4d_alias(x, x); });
130 tests.put("test4e_alias",() -> { short[] x = aS.clone(); return test4e_alias(x, x); });
131 tests.put("test4f_alias",() -> { short[] x = aS.clone(); return test4f_alias(x, x); });
132 tests.put("test4g_alias",() -> { short[] x = aS.clone(); return test4g_alias(x, x); });
133 tests.put("test5a", () -> { return test5a(aS.clone(), bS.clone(), mS); });
134 tests.put("test6a", () -> { return test6a(aI.clone(), bI.clone()); });
135 tests.put("test7a", () -> { return test7a(aI.clone(), bI.clone()); });
136
137 // Compute gold value for all test methods before compilation
138 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
139 String name = entry.getKey();
140 TestFunction test = entry.getValue();
141 Object[] gold = test.run();
142 golds.put(name, gold);
143 }
144 }
145
146 @Warmup(100)
147 @Run(test = {"test0",
148 "test1a",
149 "test1b",
150 "test1c",
151 "test1d",
152 "test2a",
153 "test2b",
154 "test2c",
155 "test2d",
156 "test3a",
157 "test4a",
158 "test4b",
159 "test4c",
160 "test4d",
161 "test4e",
162 "test4f",
163 "test4g",
164 "test4a_alias",
165 "test4b_alias",
166 "test4c_alias",
167 "test4d_alias",
168 "test4e_alias",
169 "test4f_alias",
170 "test4g_alias",
171 "test5a",
172 "test6a",
173 "test7a"})
174 public void runTests() {
175 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
176 String name = entry.getKey();
177 TestFunction test = entry.getValue();
178 // Recall gold value from before compilation
179 Object[] gold = golds.get(name);
180 // Compute new result
181 Object[] result = test.run();
182 // Compare gold and new result
183 verify(name, gold, result);
184 }
185 }
186
187 static byte[] generateB() {
188 byte[] a = new byte[RANGE];
189 for (int i = 0; i < a.length; i++) {
190 a[i] = (byte)RANDOM.nextInt();
191 }
192 return a;
193 }
194
195 static short[] generateS() {
196 short[] a = new short[RANGE];
197 for (int i = 0; i < a.length; i++) {
198 a[i] = (short)RANDOM.nextInt();
199 }
200 return a;
201 }
202
203 static int[] generateI() {
204 int[] a = new int[RANGE];
205 for (int i = 0; i < a.length; i++) {
206 a[i] = RANDOM.nextInt();
207 }
208 return a;
209 }
210
211 static long[] generateL() {
212 long[] a = new long[RANGE];
213 for (int i = 0; i < a.length; i++) {
214 a[i] = RANDOM.nextLong();
215 }
216 return a;
217 }
218
219 static void verify(String name, Object[] gold, Object[] result) {
220 if (gold.length != result.length) {
221 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
222 gold.length + ", result.length = " + result.length);
223 }
224 for (int i = 0; i < gold.length; i++) {
225 Object g = gold[i];
226 Object r = result[i];
227 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
228 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
229 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
230 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
231 }
232 if (g == r) {
233 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
234 " gold[" + i + "] == result[" + i + "]");
235 }
236 if (Array.getLength(g) != Array.getLength(r)) {
237 throw new RuntimeException("verify " + name + ": arrays must have same length:" +
238 " gold[" + i + "].length = " + Array.getLength(g) +
239 " result[" + i + "].length = " + Array.getLength(r));
240 }
241 Class c = g.getClass().getComponentType();
242 if (c == byte.class) {
243 verifyB(name, i, (byte[])g, (byte[])r);
244 } else if (c == short.class) {
245 verifyS(name, i, (short[])g, (short[])r);
246 } else if (c == int.class) {
247 verifyI(name, i, (int[])g, (int[])r);
248 } else if (c == long.class) {
249 verifyL(name, i, (long[])g, (long[])r);
250 } else {
251 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
252 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
253 " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
254 }
255 }
256 }
257
258 static void verifyB(String name, int i, byte[] g, byte[] r) {
259 for (int j = 0; j < g.length; j++) {
260 if (g[j] != r[j]) {
261 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
262 " gold[" + i + "][" + j + "] = " + g[j] +
263 " result[" + i + "][" + j + "] = " + r[j]);
264 }
265 }
266 }
267
268 static void verifyS(String name, int i, short[] g, short[] r) {
269 for (int j = 0; j < g.length; j++) {
270 if (g[j] != r[j]) {
271 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
272 " gold[" + i + "][" + j + "] = " + g[j] +
273 " result[" + i + "][" + j + "] = " + r[j]);
274 }
275 }
276 }
277
278 static void verifyI(String name, int i, int[] g, int[] r) {
279 for (int j = 0; j < g.length; j++) {
280 if (g[j] != r[j]) {
281 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
282 " gold[" + i + "][" + j + "] = " + g[j] +
283 " result[" + i + "][" + j + "] = " + r[j]);
284 }
285 }
286 }
287
288 static void verifyL(String name, int i, long[] g, long[] r) {
289 for (int j = 0; j < g.length; j++) {
290 if (g[j] != r[j]) {
291 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
292 " gold[" + i + "][" + j + "] = " + g[j] +
293 " result[" + i + "][" + j + "] = " + r[j]);
294 }
295 }
296 }
297
298 @Test
299 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
300 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
301 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
302 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
303 IRNode.STORE_VECTOR, "> 0"},
304 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
305 applyIfPlatform = {"64-bit", "true"},
306 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
307 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
308 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
309 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
310 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
311 IRNode.STORE_VECTOR, "> 0"},
312 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
313 applyIfPlatform = {"64-bit", "true"},
314 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
315 // Load and store are already split
316 //
317 // 0 1 - - 4 5 6 7
318 // | | | | | |
319 // 0 1 - - 4 5 6 7
320 static Object[] test0(int[] a, int[] b, int mask) {
321 for (int i = 0; i < RANGE; i+=8) {
322 int b0 = a[i+0] & mask;
323 int b1 = a[i+1] & mask;
324
325 int b4 = a[i+4] & mask;
326 int b5 = a[i+5] & mask;
327 int b6 = a[i+6] & mask;
328 int b7 = a[i+7] & mask;
329
330 b[i+0] = b0;
331 b[i+1] = b1;
332
333 b[i+4] = b4;
334 b[i+5] = b5;
335 b[i+6] = b6;
336 b[i+7] = b7;
337 // With AlignVector, we need 8-byte alignment of vector loads/stores.
338 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
339 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
340 // -> vectorize -> no vectorization
341 }
342 return new Object[]{ a, b };
343 }
344
345 @Test
346 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
347 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
348 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
349 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
350 IRNode.STORE_VECTOR, "> 0"},
351 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
352 applyIfPlatform = {"64-bit", "true"},
353 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
354 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
355 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
356 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
357 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
358 IRNode.STORE_VECTOR, "> 0"},
359 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
360 applyIfPlatform = {"64-bit", "true"},
361 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
362 // Adjacent Load and Store, but split by Add/Mul
363 static Object[] test1a(int[] a, int[] b, int mask) {
364 for (int i = 0; i < RANGE; i+=8) {
365 b[i+0] = a[i+0] + mask; // Add
366 b[i+1] = a[i+1] + mask;
367 b[i+2] = a[i+2] + mask;
368 b[i+3] = a[i+3] + mask;
369
370 b[i+4] = a[i+4] * mask; // Mul
371 b[i+5] = a[i+5] * mask;
372 // With AlignVector, we need 8-byte alignment of vector loads/stores.
373 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
374 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
375 // -> vectorize -> no vectorization
376 }
377 return new Object[]{ a, b };
378 }
379
380 @Test
381 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
382 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
383 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
384 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
385 IRNode.STORE_VECTOR, "> 0"},
386 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
387 applyIfPlatform = {"64-bit", "true"},
388 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
389 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
390 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
391 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
392 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
393 IRNode.STORE_VECTOR, "> 0"},
394 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
395 applyIfPlatform = {"64-bit", "true"},
396 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
397 // Adjacent Load and Store, but split by Add/Mul
398 static Object[] test1b(int[] a, int[] b, int mask) {
399 for (int i = 0; i < RANGE; i+=8) {
400 b[i+0] = a[i+0] * mask; // Mul
401 b[i+1] = a[i+1] * mask;
402 b[i+2] = a[i+2] * mask;
403 b[i+3] = a[i+3] * mask;
404
405 b[i+4] = a[i+4] + mask; // Add
406 b[i+5] = a[i+5] + mask;
407 // With AlignVector, we need 8-byte alignment of vector loads/stores.
408 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
409 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
410 // -> vectorize -> no vectorization
411 }
412 return new Object[]{ a, b };
413 }
414
415 @Test
416 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
417 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
418 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
419 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
420 IRNode.STORE_VECTOR, "> 0"},
421 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
422 applyIfPlatform = {"64-bit", "true"},
423 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
424 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
425 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
426 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
427 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
428 IRNode.STORE_VECTOR, "> 0"},
429 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
430 applyIfPlatform = {"64-bit", "true"},
431 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
432 // Adjacent Load and Store, but split by Add/Mul
433 static Object[] test1c(int[] a, int[] b, int mask) {
434 for (int i = 0; i < RANGE; i+=8) {
435 b[i+0] = a[i+0] + mask; // Add
436 b[i+1] = a[i+1] + mask;
437
438 b[i+2] = a[i+2] * mask; // Mul
439 b[i+3] = a[i+3] * mask;
440 b[i+4] = a[i+4] * mask;
441 b[i+5] = a[i+5] * mask;
442 // With AlignVector, we need 8-byte alignment of vector loads/stores.
443 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
444 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
445 // -> vectorize -> no vectorization
446 }
447 return new Object[]{ a, b };
448 }
449
450 @Test
451 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
452 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
453 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
454 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
455 IRNode.STORE_VECTOR, "> 0"},
456 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
457 applyIfPlatform = {"64-bit", "true"},
458 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
459 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
460 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
461 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
462 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
463 IRNode.STORE_VECTOR, "> 0"},
464 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
465 applyIfPlatform = {"64-bit", "true"},
466 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
467 // Adjacent Load and Store, but split by Add/Mul
468 static Object[] test1d(int[] a, int[] b, int mask) {
469 for (int i = 0; i < RANGE; i+=8) {
470 b[i+0] = a[i+0] * mask; // Mul
471 b[i+1] = a[i+1] * mask;
472
473 b[i+2] = a[i+2] + mask; // Add
474 b[i+3] = a[i+3] + mask;
475 b[i+4] = a[i+4] + mask;
476 b[i+5] = a[i+5] + mask;
477 // With AlignVector, we need 8-byte alignment of vector loads/stores.
478 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
479 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
480 // -> vectorize -> no vectorization
481 }
482 return new Object[]{ a, b };
483 }
484
485 @Test
486 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
487 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
488 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
489 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
490 IRNode.STORE_VECTOR, "> 0"},
491 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
492 applyIfPlatform = {"64-bit", "true"},
493 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
494 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
495 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
496 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
497 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
498 IRNode.STORE_VECTOR, "> 0"},
499 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
500 applyIfPlatform = {"64-bit", "true"},
501 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
502 // Split the load
503 //
504 // 0 1 2 3 4 5 - -
505 // | | \ \ \ \
506 // | | \ \ \ \
507 // | | \ \ \ \
508 // 0 1 - - 4 5 6 7
509 //
510 static Object[] test2a(int[] a, int[] b, int mask) {
511 for (int i = 0; i < RANGE; i+=8) {
512 int b0 = a[i+0] & mask;
513 int b1 = a[i+1] & mask;
514 int b2 = a[i+2] & mask;
515 int b3 = a[i+3] & mask;
516 int b4 = a[i+4] & mask;
517 int b5 = a[i+5] & mask;
518
519 b[i+0] = b0;
520 b[i+1] = b1;
521
522 b[i+4] = b2;
523 b[i+5] = b3;
524 b[i+6] = b4;
525 b[i+7] = b5;
526 // With AlignVector, we need 8-byte alignment of vector loads/stores.
527 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
528 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
529 // -> vectorize -> no vectorization
530 }
531 return new Object[]{ a, b };
532 }
533
534 @Test
535 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
536 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
537 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
538 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
539 IRNode.STORE_VECTOR, "> 0"},
540 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
541 applyIfPlatform = {"64-bit", "true"},
542 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
543 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
544 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
545 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
546 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
547 IRNode.STORE_VECTOR, "> 0"},
548 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
549 applyIfPlatform = {"64-bit", "true"},
550 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
551 // Split the load
552 //
553 // 0 1 2 3 4 5 - -
554 // | | | | \ \
555 // | | | | \ \
556 // | | | | \ \
557 // 0 1 2 3 -- 6 7
558 //
559 static Object[] test2b(int[] a, int[] b, int mask) {
560 for (int i = 0; i < RANGE; i+=8) {
561 int b0 = a[i+0] & mask;
562 int b1 = a[i+1] & mask;
563 int b2 = a[i+2] & mask;
564 int b3 = a[i+3] & mask;
565 int b4 = a[i+4] & mask;
566 int b5 = a[i+5] & mask;
567
568 b[i+0] = b0;
569 b[i+1] = b1;
570 b[i+2] = b2;
571 b[i+3] = b3;
572
573 b[i+6] = b4;
574 b[i+7] = b5;
575 // With AlignVector, we need 8-byte alignment of vector loads/stores.
576 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
577 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
578 // -> vectorize -> no vectorization
579 }
580 return new Object[]{ a, b };
581 }
582
583 @Test
584 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
585 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
586 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
587 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
588 IRNode.STORE_VECTOR, "> 0"},
589 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
590 applyIfPlatform = {"64-bit", "true"},
591 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
592 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
593 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
594 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
595 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
596 IRNode.STORE_VECTOR, "> 0"},
597 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
598 applyIfPlatform = {"64-bit", "true"},
599 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
600 // Split the load
601 //
602 // 0 1 - - 4 5 6 7
603 // | | / / / /
604 // | | / / / /
605 // | | / / / /
606 // 0 1 2 3 4 5 - -
607 //
608 static Object[] test2c(int[] a, int[] b, int mask) {
609 for (int i = 0; i < RANGE; i+=8) {
610 int b0 = a[i+0] & mask;
611 int b1 = a[i+1] & mask;
612
613 int b4 = a[i+4] & mask;
614 int b5 = a[i+5] & mask;
615 int b6 = a[i+6] & mask;
616 int b7 = a[i+7] & mask;
617
618 b[i+0] = b0;
619 b[i+1] = b1;
620 b[i+2] = b4;
621 b[i+3] = b5;
622 b[i+4] = b6;
623 b[i+5] = b7;
624 // With AlignVector, we need 8-byte alignment of vector loads/stores.
625 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
626 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
627 // -> vectorize -> no vectorization
628 }
629 return new Object[]{ a, b };
630 }
631
632 @Test
633 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
634 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
635 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
636 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
637 IRNode.STORE_VECTOR, "> 0"},
638 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
639 applyIfPlatform = {"64-bit", "true"},
640 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
641 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
642 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
643 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
644 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
645 IRNode.STORE_VECTOR, "> 0"},
646 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
647 applyIfPlatform = {"64-bit", "true"},
648 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
649 // Split the load
650 //
651 // 0 1 2 3 - - 6 7
652 // | | | | / /
653 // | | | | / /
654 // | | | | / /
655 // 0 1 2 3 4 5 - -
656 //
657 static Object[] test2d(int[] a, int[] b, int mask) {
658 for (int i = 0; i < RANGE; i+=8) {
659 int b0 = a[i+0] & mask;
660 int b1 = a[i+1] & mask;
661 int b2 = a[i+2] & mask;
662 int b3 = a[i+3] & mask;
663
664 int b6 = a[i+6] & mask;
665 int b7 = a[i+7] & mask;
666
667 b[i+0] = b0;
668 b[i+1] = b1;
669 b[i+2] = b2;
670 b[i+3] = b3;
671 b[i+4] = b6;
672 b[i+5] = b7;
673 // With AlignVector, we need 8-byte alignment of vector loads/stores.
674 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
675 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
676 // -> vectorize -> no vectorization
677 }
678 return new Object[]{ a, b };
679 }
680
681 @Test
682 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
683 IRNode.STORE_VECTOR, "> 0"},
684 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
685 applyIfPlatform = {"64-bit", "true"},
686 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
687 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
688 IRNode.STORE_VECTOR, "> 0"},
689 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
690 applyIfPlatform = {"64-bit", "true"},
691 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
692 // 0 1 2 3 4 5 6 7 -
693 // | | | | | | | |
694 // | + + + | | | |
695 // | | | | |
696 // | v | | | | v
697 // | | | | | | |
698 // 1 - - 3 4 5 6 7 8
699 static Object[] test3a(short[] a, short[] b, short val) {
700 int sum = 0;
701 for (int i = 0; i < RANGE; i+=16) {
702 short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
703
704 short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
705 short a2 = a[i+2];
706 short a3 = a[i+3];
707
708 short a4 = a[i+4]; // 4-pack
709 short a5 = a[i+5];
710 short a6 = a[i+6];
711 short a7 = a[i+7];
712
713
714 b[i+0] = a0; // required for alignment / offsets, technical limitation.
715
716 sum += a1 + a2 + a3; // not packed
717
718 b[i+3] = val; // adjacent to 4-pack but needs to be split off
719
720 b[i+4] = a4; // 4-pack
721 b[i+5] = a5;
722 b[i+6] = a6;
723 b[i+7] = a7;
724
725 b[i+8] = val; // adjacent to 4-pack but needs to be split off
726
727 // With AlignVector, we need 8-byte alignment of vector loads/stores.
728 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
729 // adr = base + 16 + 8 + 32*i -> always adr = base + 12 + 8 + 32*i -> never
730 // -> vectorize -> no vectorization
731 }
732 return new Object[]{ a, b, new int[]{ sum } };
733 }
734
735 @Test
736 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
737 IRNode.STORE_VECTOR, "> 0",
738 ".*multiversion.*", "= 0"},
739 phase = CompilePhase.PRINT_IDEAL,
740 applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
741 applyIfPlatform = {"64-bit", "true"},
742 applyIfCPUFeatureOr = {"sse4.1", "true"})
743 // Cyclic dependency with distance 2 -> split into 2-packs
744 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
745 IRNode.STORE_VECTOR, "> 0",
746 ".*multiversion.*", "= 0"},
747 phase = CompilePhase.PRINT_IDEAL,
748 applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
749 applyIfPlatform = {"64-bit", "true"},
750 applyIfCPUFeatureOr = {"sse4.1", "true"})
751 // Speculative aliasing check -> full vectorization.
752 static Object[] test4a(short[] a, short[] b) {
753 for (int i = 0; i < RANGE-64; i++) {
754 b[i+2] = a[i+0];
755 }
756 return new Object[]{ a, b };
757 }
758
759 @Test
760 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
761 IRNode.STORE_VECTOR, "> 0",
762 ".*multiversion.*", "= 0"},
763 phase = CompilePhase.PRINT_IDEAL,
764 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
765 applyIfPlatform = {"64-bit", "true"},
766 applyIfCPUFeatureOr = {"sse4.1", "true"})
767 // Cyclic dependency with distance 3 -> split into 2-packs
768 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
769 IRNode.STORE_VECTOR, "> 0",
770 ".*multiversion.*", "= 0"},
771 phase = CompilePhase.PRINT_IDEAL,
772 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
773 applyIfPlatform = {"64-bit", "true"},
774 applyIfCPUFeatureOr = {"sse4.1", "true"})
775 // Speculative aliasing check -> full vectorization.
776 static Object[] test4b(short[] a, short[] b) {
777 for (int i = 0; i < RANGE-64; i++) {
778 b[i+3] = a[i+0];
779 }
780 return new Object[]{ a, b };
781 }
782
783 @Test
784 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
785 IRNode.STORE_VECTOR, "> 0",
786 ".*multiversion.*", "= 0"},
787 phase = CompilePhase.PRINT_IDEAL,
788 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
789 applyIfPlatform = {"64-bit", "true"},
790 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
791 // Cyclic dependency with distance 4 -> split into 4-packs
792 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
793 IRNode.STORE_VECTOR, "> 0",
794 ".*multiversion.*", "= 0"},
795 phase = CompilePhase.PRINT_IDEAL,
796 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
797 applyIfPlatform = {"64-bit", "true"},
798 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
799 // Speculative aliasing check -> full vectorization.
800 static Object[] test4c(short[] a, short[] b) {
801 for (int i = 0; i < RANGE-64; i++) {
802 b[i+4] = a[i+0];
803 }
804 return new Object[]{ a, b };
805 }
806
807 @Test
808 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
809 IRNode.STORE_VECTOR, "> 0",
810 ".*multiversion.*", "= 0"},
811 phase = CompilePhase.PRINT_IDEAL,
812 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
813 applyIfPlatform = {"64-bit", "true"},
814 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
815 // Cyclic dependency with distance 5 -> split into 4-packs
816 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
817 IRNode.STORE_VECTOR, "> 0",
818 ".*multiversion.*", "= 0"},
819 phase = CompilePhase.PRINT_IDEAL,
820 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
821 applyIfPlatform = {"64-bit", "true"},
822 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
823 // Speculative aliasing check -> full vectorization.
824 static Object[] test4d(short[] a, short[] b) {
825 for (int i = 0; i < RANGE-64; i++) {
826 b[i+5] = a[i+0];
827 }
828 return new Object[]{ a, b };
829 }
830
831 @Test
832 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
833 IRNode.STORE_VECTOR, "> 0",
834 ".*multiversion.*", "= 0"},
835 phase = CompilePhase.PRINT_IDEAL,
836 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
837 applyIfPlatform = {"64-bit", "true"},
838 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
839 // Cyclic dependency with distance 6 -> split into 4-packs
840 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
841 IRNode.STORE_VECTOR, "> 0",
842 ".*multiversion.*", "= 0"},
843 phase = CompilePhase.PRINT_IDEAL,
844 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
845 applyIfPlatform = {"64-bit", "true"},
846 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
847 // Speculative aliasing check -> full vectorization.
848 static Object[] test4e(short[] a, short[] b) {
849 for (int i = 0; i < RANGE-64; i++) {
850 b[i+6] = a[i+0];
851 }
852 return new Object[]{ a, b };
853 }
854
855 @Test
856 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
857 IRNode.STORE_VECTOR, "> 0",
858 ".*multiversion.*", "= 0"},
859 phase = CompilePhase.PRINT_IDEAL,
860 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
861 applyIfPlatform = {"64-bit", "true"},
862 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
863 // Cyclic dependency with distance 7 -> split into 4-packs
864 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
865 IRNode.STORE_VECTOR, "> 0",
866 ".*multiversion.*", "= 0"},
867 phase = CompilePhase.PRINT_IDEAL,
868 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
869 applyIfPlatform = {"64-bit", "true"},
870 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
871 // Speculative aliasing check -> full vectorization.
872 static Object[] test4f(short[] a, short[] b) {
873 for (int i = 0; i < RANGE-64; i++) {
874 b[i+7] = a[i+0];
875 }
876 return new Object[]{ a, b };
877 }
878
879 @Test
880 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
881 IRNode.STORE_VECTOR, "> 0",
882 ".*multiversion.*", "= 0"},
883 phase = CompilePhase.PRINT_IDEAL,
884 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
885 applyIfPlatform = {"64-bit", "true"},
886 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
887 // Cyclic dependency with distance 8 -> split into 8-packs
888 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
889 IRNode.STORE_VECTOR, "> 0",
890 ".*multiversion.*", "= 0"},
891 phase = CompilePhase.PRINT_IDEAL,
892 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
893 applyIfPlatform = {"64-bit", "true"},
894 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
895 // Speculative aliasing check -> full vectorization.
896 static Object[] test4g(short[] a, short[] b) {
897 for (int i = 0; i < RANGE-64; i++) {
898 b[i+8] = a[i+0];
899 }
900 return new Object[]{ a, b };
901 }
902
903 @Test
904 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
905 IRNode.STORE_VECTOR, "> 0",
906 ".*multiversion.*", "= 0"},
907 phase = CompilePhase.PRINT_IDEAL,
908 applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
909 applyIfPlatform = {"64-bit", "true"},
910 applyIfCPUFeatureOr = {"sse4.1", "true"})
911 // Cyclic dependency with distance 2 -> split into 2-packs
912 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
913 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
914 IRNode.STORE_VECTOR, "> 0",
915 ".*multiversion.*", "> 0"},
916 phase = CompilePhase.PRINT_IDEAL,
917 applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
918 applyIfPlatform = {"64-bit", "true"},
919 applyIfCPUFeatureOr = {"sse4.1", "true"})
920 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
921 static Object[] test4a_alias(short[] a, short[] b) {
922 for (int i = 0; i < RANGE-64; i++) {
923 b[i+2] = a[i+0];
924 }
925 return new Object[]{ a, b };
926 }
927
928 @Test
929 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
930 IRNode.STORE_VECTOR, "> 0",
931 ".*multiversion.*", "= 0"},
932 phase = CompilePhase.PRINT_IDEAL,
933 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
934 applyIfPlatform = {"64-bit", "true"},
935 applyIfCPUFeatureOr = {"sse4.1", "true"})
936 // Cyclic dependency with distance 3 -> split into 2-packs
937 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
938 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
939 IRNode.STORE_VECTOR, "> 0",
940 ".*multiversion.*", "> 0"},
941 phase = CompilePhase.PRINT_IDEAL,
942 applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
943 applyIfPlatform = {"64-bit", "true"},
944 applyIfCPUFeatureOr = {"sse4.1", "true"})
945 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
946 static Object[] test4b_alias(short[] a, short[] b) {
947 for (int i = 0; i < RANGE-64; i++) {
948 b[i+3] = a[i+0];
949 }
950 return new Object[]{ a, b };
951 }
952
953 @Test
954 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
955 IRNode.STORE_VECTOR, "> 0",
956 ".*multiversion.*", "= 0"},
957 phase = CompilePhase.PRINT_IDEAL,
958 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
959 applyIfPlatform = {"64-bit", "true"},
960 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
961 // Cyclic dependency with distance 4 -> split into 4-packs
962 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
963 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
964 IRNode.STORE_VECTOR, "> 0",
965 ".*multiversion.*", "> 0"},
966 phase = CompilePhase.PRINT_IDEAL,
967 applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
968 applyIfPlatform = {"64-bit", "true"},
969 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
970 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
971 static Object[] test4c_alias(short[] a, short[] b) {
972 for (int i = 0; i < RANGE-64; i++) {
973 b[i+4] = a[i+0];
974 }
975 return new Object[]{ a, b };
976 }
977
978 @Test
979 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
980 IRNode.STORE_VECTOR, "> 0",
981 ".*multiversion.*", "= 0"},
982 phase = CompilePhase.PRINT_IDEAL,
983 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
984 applyIfPlatform = {"64-bit", "true"},
985 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
986 // Cyclic dependency with distance 5 -> split into 4-packs
987 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
988 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
989 IRNode.STORE_VECTOR, "> 0",
990 ".*multiversion.*", "> 0"},
991 phase = CompilePhase.PRINT_IDEAL,
992 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
993 applyIfPlatform = {"64-bit", "true"},
994 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
995 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
996 static Object[] test4d_alias(short[] a, short[] b) {
997 for (int i = 0; i < RANGE-64; i++) {
998 b[i+5] = a[i+0];
999 }
1000 return new Object[]{ a, b };
1001 }
1002
1003 @Test
1004 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1005 IRNode.STORE_VECTOR, "> 0",
1006 ".*multiversion.*", "= 0"},
1007 phase = CompilePhase.PRINT_IDEAL,
1008 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
1009 applyIfPlatform = {"64-bit", "true"},
1010 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1011 // Cyclic dependency with distance 6 -> split into 4-packs
1012 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1013 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1014 IRNode.STORE_VECTOR, "> 0",
1015 ".*multiversion.*", "> 0"},
1016 phase = CompilePhase.PRINT_IDEAL,
1017 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
1018 applyIfPlatform = {"64-bit", "true"},
1019 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1020 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
1021 static Object[] test4e_alias(short[] a, short[] b) {
1022 for (int i = 0; i < RANGE-64; i++) {
1023 b[i+6] = a[i+0];
1024 }
1025 return new Object[]{ a, b };
1026 }
1027
1028 @Test
1029 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1030 IRNode.STORE_VECTOR, "> 0",
1031 ".*multiversion.*", "= 0"},
1032 phase = CompilePhase.PRINT_IDEAL,
1033 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
1034 applyIfPlatform = {"64-bit", "true"},
1035 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1036 // Cyclic dependency with distance 7 -> split into 4-packs
1037 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1038 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1039 IRNode.STORE_VECTOR, "> 0",
1040 ".*multiversion.*", "> 0"},
1041 phase = CompilePhase.PRINT_IDEAL,
1042 applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
1043 applyIfPlatform = {"64-bit", "true"},
1044 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1045 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
1046 static Object[] test4f_alias(short[] a, short[] b) {
1047 for (int i = 0; i < RANGE-64; i++) {
1048 b[i+7] = a[i+0];
1049 }
1050 return new Object[]{ a, b };
1051 }
1052
1053 @Test
1054 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1055 IRNode.STORE_VECTOR, "> 0",
1056 ".*multiversion.*", "= 0"},
1057 phase = CompilePhase.PRINT_IDEAL,
1058 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
1059 applyIfPlatform = {"64-bit", "true"},
1060 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1061 // Cyclic dependency with distance 8 -> split into 8-packs
1062 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1063 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1064 IRNode.STORE_VECTOR, "> 0",
1065 ".*multiversion.*", "> 0"},
1066 phase = CompilePhase.PRINT_IDEAL,
1067 applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
1068 applyIfPlatform = {"64-bit", "true"},
1069 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1070 // Speculative aliasing check with multiversioning -> full vectorization & split packs.
1071 static Object[] test4g_alias(short[] a, short[] b) {
1072 for (int i = 0; i < RANGE-64; i++) {
1073 b[i+8] = a[i+0];
1074 }
1075 return new Object[]{ a, b };
1076 }
1077
1078 @Test
1079 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
1080 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1081 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1082 IRNode.ADD_VS, IRNode.VECTOR_SIZE_2, "> 0",
1083 IRNode.ADD_VS, IRNode.VECTOR_SIZE_8, "> 0",
1084 IRNode.ADD_VS, IRNode.VECTOR_SIZE_4, "> 0",
1085 IRNode.STORE_VECTOR, "> 0"},
1086 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1087 applyIfPlatform = {"64-bit", "true"},
1088 applyIfCPUFeature = {"sse4.1", "true"})
1089 // aarch64 limits minimum vector size to 8B, thus a vector size of
1090 // length 2 for type "short" will not be generated
1091 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1092 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1093 IRNode.ADD_VS, IRNode.VECTOR_SIZE_8, "> 0",
1094 IRNode.ADD_VS, IRNode.VECTOR_SIZE_4, "> 0",
1095 IRNode.STORE_VECTOR, "> 0"},
1096 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1097 applyIfPlatform = {"64-bit", "true"},
1098 applyIfCPUFeature = {"sve", "true"})
1099 // Split pack into power-of-2 sizes
1100 static Object[] test5a(short[] a, short[] b, short val) {
1101 for (int i = 0; i < RANGE; i+=16) {
1102 b[i+ 0] = (short)(a[i+ 0] + val); // 8 pack
1103 b[i+ 1] = (short)(a[i+ 1] + val);
1104 b[i+ 2] = (short)(a[i+ 2] + val);
1105 b[i+ 3] = (short)(a[i+ 3] + val);
1106 b[i+ 4] = (short)(a[i+ 4] + val);
1107 b[i+ 5] = (short)(a[i+ 5] + val);
1108 b[i+ 6] = (short)(a[i+ 6] + val);
1109 b[i+ 7] = (short)(a[i+ 7] + val);
1110
1111 b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
1112 b[i+ 9] = (short)(a[i+ 9] + val);
1113 b[i+10] = (short)(a[i+10] + val);
1114 b[i+11] = (short)(a[i+11] + val);
1115
1116 b[i+12] = (short)(a[i+12] + val); // 2-pack
1117 b[i+13] = (short)(a[i+13] + val);
1118
1119 b[i+14] = (short)(a[i+14] + val);
1120 }
1121 return new Object[]{ a, b };
1122 }
1123
1124 @Test
1125 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
1126 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
1127 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
1128 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1129 IRNode.ADD_REDUCTION_V, "> 0"},
1130 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1131 applyIfPlatform = {"64-bit", "true"},
1132 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1133 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
1134 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
1135 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
1136 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1137 IRNode.ADD_REDUCTION_V, "> 0"},
1138 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
1139 applyIfPlatform = {"64-bit", "true"},
1140 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1141 // Split packs including reductions
1142 static Object[] test6a(int[] a, int[] b) {
1143 int s = 0;
1144 for (int i = 0; i < RANGE; i+=8) {
1145 s += a[i+0] * b[i+0];
1146 s += a[i+1] * b[i+1];
1147 s += a[i+2] * b[i+2];
1148 s += a[i+3] * b[i+3];
1149
1150 s += a[i+4] & b[i+4];
1151 s += a[i+5] & b[i+5];
1152 s += a[i+6] & b[i+6];
1153 s += a[i+7] & b[i+7];
1154 // With AlignVector, we need 8-byte alignment of vector loads/stores.
1155 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
1156 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
1157 // -> vectorize -> no vectorization
1158 }
1159 return new Object[]{ a, b, new int[]{ s } };
1160 }
1161
1162 @Test
1163 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1164 IRNode.MUL_VI, "> 0",
1165 IRNode.POPULATE_INDEX, "> 0"},
1166 applyIfPlatform = {"64-bit", "true"},
1167 applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1168 // Index Populate:
1169 // There can be an issue when all the (iv + 1), (iv + 2), ...
1170 // get packed, but not (iv). Then we have a pack that is one element
1171 // too short, and we start splitting everything in a bad way.
1172 static Object[] test7a(int[] a, int[] b) {
1173 for (int i = 0; i < RANGE; i++) {
1174 a[i] = b[i] * i;
1175 }
1176 return new Object[]{ a, b };
1177 }
1178 }