1 /*
   2  * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package compiler.loopopts.superword;
  25 
  26 import compiler.lib.ir_framework.*;
  27 import jdk.test.lib.Utils;
  28 import jdk.test.whitebox.WhiteBox;
  29 import java.lang.reflect.Array;
  30 import java.util.Map;
  31 import java.util.HashMap;
  32 import java.util.Random;
  33 import java.nio.ByteOrder;
  34 
  35 /*
  36  * @test
  37  * @bug 8326139 8348659
  38  * @key randomness
  39  * @summary Test splitting packs in SuperWord
  40  * @library /test/lib /
  41  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_ySAC
  42  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_ySAC
  43  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_ySAC
  44  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_ySAC
  45  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_nSAC
  46  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_nSAC
  47  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_nSAC
  48  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_nSAC
  49  */
  50 
  51 public class TestSplitPacks {
  52     static int RANGE = 1024*8;
  53     static int RANGE_FINAL = 1024*8;
  54     private static final Random RANDOM = Utils.getRandomInstance();
  55 
  56     // Inputs
  57     byte[] aB;
  58     byte[] bB;
  59     byte mB = (byte)31;
  60     short[] aS;
  61     short[] bS;
  62     short mS = (short)0xF0F0;
  63     int[] aI;
  64     int[] bI;
  65     int mI = 0xF0F0F0F0;
  66     long[] aL;
  67     long[] bL;
  68     long mL = 0xF0F0F0F0F0F0F0F0L;
  69 
  70     // List of tests
  71     Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
  72 
  73     // List of gold, the results from the first run before compilation
  74     Map<String,Object[]> golds = new HashMap<String,Object[]>();
  75 
  76     interface TestFunction {
  77         Object[] run();
  78     }
  79 
  80     public static void main(String[] args) {
  81         TestFramework framework = new TestFramework(TestSplitPacks.class);
  82         framework.addFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000");
  83         switch (args[0]) {
  84             case "nCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  85             case "nCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  86             case "yCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  87             case "yCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  88             case "nCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  89             case "nCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  90             case "yCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  91             case "yCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  92             default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
  93         };
  94         framework.start();
  95     }
  96 
  97     public TestSplitPacks() {
  98         // Generate input once
  99         aB = generateB();
 100         bB = generateB();
 101         aS = generateS();
 102         bS = generateS();
 103         aI = generateI();
 104         bI = generateI();
 105         aL = generateL();
 106         bL = generateL();
 107 
 108         // Add all tests to list
 109         tests.put("test0",       () -> { return test0(aI.clone(), bI.clone(), mI); });
 110         tests.put("test1a",      () -> { return test1a(aI.clone(), bI.clone(), mI); });
 111         tests.put("test1b",      () -> { return test1b(aI.clone(), bI.clone(), mI); });
 112         tests.put("test1c",      () -> { return test1c(aI.clone(), bI.clone(), mI); });
 113         tests.put("test1d",      () -> { return test1d(aI.clone(), bI.clone(), mI); });
 114         tests.put("test2a",      () -> { return test2a(aI.clone(), bI.clone(), mI); });
 115         tests.put("test2b",      () -> { return test2b(aI.clone(), bI.clone(), mI); });
 116         tests.put("test2c",      () -> { return test2c(aI.clone(), bI.clone(), mI); });
 117         tests.put("test2d",      () -> { return test2d(aI.clone(), bI.clone(), mI); });
 118         tests.put("test3a",      () -> { return test3a(aS.clone(), bS.clone(), mS); });
 119         tests.put("test4a",      () -> { return test4a(aS.clone(), bS.clone()); });
 120         tests.put("test4b",      () -> { return test4b(aS.clone(), bS.clone()); });
 121         tests.put("test4c",      () -> { return test4c(aS.clone(), bS.clone()); });
 122         tests.put("test4d",      () -> { return test4d(aS.clone(), bS.clone()); });
 123         tests.put("test4e",      () -> { return test4e(aS.clone(), bS.clone()); });
 124         tests.put("test4f",      () -> { return test4f(aS.clone(), bS.clone()); });
 125         tests.put("test4g",      () -> { return test4g(aS.clone(), bS.clone()); });
 126         tests.put("test4a_alias",() -> { short[] x = aS.clone(); return test4a_alias(x, x); });
 127         tests.put("test4b_alias",() -> { short[] x = aS.clone(); return test4b_alias(x, x); });
 128         tests.put("test4c_alias",() -> { short[] x = aS.clone(); return test4c_alias(x, x); });
 129         tests.put("test4d_alias",() -> { short[] x = aS.clone(); return test4d_alias(x, x); });
 130         tests.put("test4e_alias",() -> { short[] x = aS.clone(); return test4e_alias(x, x); });
 131         tests.put("test4f_alias",() -> { short[] x = aS.clone(); return test4f_alias(x, x); });
 132         tests.put("test4g_alias",() -> { short[] x = aS.clone(); return test4g_alias(x, x); });
 133         tests.put("test5a",      () -> { return test5a(aS.clone(), bS.clone(), mS); });
 134         tests.put("test6a",      () -> { return test6a(aI.clone(), bI.clone()); });
 135         tests.put("test7a",      () -> { return test7a(aI.clone(), bI.clone()); });
 136 
 137         // Compute gold value for all test methods before compilation
 138         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 139             String name = entry.getKey();
 140             TestFunction test = entry.getValue();
 141             Object[] gold = test.run();
 142             golds.put(name, gold);
 143         }
 144     }
 145 
 146     @Warmup(100)
 147     @Run(test = {"test0",
 148                  "test1a",
 149                  "test1b",
 150                  "test1c",
 151                  "test1d",
 152                  "test2a",
 153                  "test2b",
 154                  "test2c",
 155                  "test2d",
 156                  "test3a",
 157                  "test4a",
 158                  "test4b",
 159                  "test4c",
 160                  "test4d",
 161                  "test4e",
 162                  "test4f",
 163                  "test4g",
 164                  "test4a_alias",
 165                  "test4b_alias",
 166                  "test4c_alias",
 167                  "test4d_alias",
 168                  "test4e_alias",
 169                  "test4f_alias",
 170                  "test4g_alias",
 171                  "test5a",
 172                  "test6a",
 173                  "test7a"})
 174     public void runTests() {
 175         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 176             String name = entry.getKey();
 177             TestFunction test = entry.getValue();
 178             // Recall gold value from before compilation
 179             Object[] gold = golds.get(name);
 180             // Compute new result
 181             Object[] result = test.run();
 182             // Compare gold and new result
 183             verify(name, gold, result);
 184         }
 185     }
 186 
 187     static byte[] generateB() {
 188         byte[] a = new byte[RANGE];
 189         for (int i = 0; i < a.length; i++) {
 190             a[i] = (byte)RANDOM.nextInt();
 191         }
 192         return a;
 193     }
 194 
 195     static short[] generateS() {
 196         short[] a = new short[RANGE];
 197         for (int i = 0; i < a.length; i++) {
 198             a[i] = (short)RANDOM.nextInt();
 199         }
 200         return a;
 201     }
 202 
 203     static int[] generateI() {
 204         int[] a = new int[RANGE];
 205         for (int i = 0; i < a.length; i++) {
 206             a[i] = RANDOM.nextInt();
 207         }
 208         return a;
 209     }
 210 
 211     static long[] generateL() {
 212         long[] a = new long[RANGE];
 213         for (int i = 0; i < a.length; i++) {
 214             a[i] = RANDOM.nextLong();
 215         }
 216         return a;
 217     }
 218 
 219     static void verify(String name, Object[] gold, Object[] result) {
 220         if (gold.length != result.length) {
 221             throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
 222                                        gold.length + ", result.length = " + result.length);
 223         }
 224         for (int i = 0; i < gold.length; i++) {
 225             Object g = gold[i];
 226             Object r = result[i];
 227             if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
 228                 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
 229                                            " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 230                                            " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 231             }
 232             if (g == r) {
 233                 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
 234                                            " gold[" + i + "] == result[" + i + "]");
 235             }
 236             if (Array.getLength(g) != Array.getLength(r)) {
 237                     throw new RuntimeException("verify " + name + ": arrays must have same length:" +
 238                                            " gold[" + i + "].length = " + Array.getLength(g) +
 239                                            " result[" + i + "].length = " + Array.getLength(r));
 240             }
 241             Class c = g.getClass().getComponentType();
 242             if (c == byte.class) {
 243                 verifyB(name, i, (byte[])g, (byte[])r);
 244             } else if (c == short.class) {
 245                 verifyS(name, i, (short[])g, (short[])r);
 246             } else if (c == int.class) {
 247                 verifyI(name, i, (int[])g, (int[])r);
 248             } else if (c == long.class) {
 249                 verifyL(name, i, (long[])g, (long[])r);
 250             } else {
 251                 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
 252                                        " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 253                                        " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 254             }
 255         }
 256     }
 257 
 258     static void verifyB(String name, int i, byte[] g, byte[] r) {
 259         for (int j = 0; j < g.length; j++) {
 260             if (g[j] != r[j]) {
 261                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 262                                            " gold[" + i + "][" + j + "] = " + g[j] +
 263                                            " result[" + i + "][" + j + "] = " + r[j]);
 264             }
 265         }
 266     }
 267 
 268     static void verifyS(String name, int i, short[] g, short[] r) {
 269         for (int j = 0; j < g.length; j++) {
 270             if (g[j] != r[j]) {
 271                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 272                                            " gold[" + i + "][" + j + "] = " + g[j] +
 273                                            " result[" + i + "][" + j + "] = " + r[j]);
 274             }
 275         }
 276     }
 277 
 278     static void verifyI(String name, int i, int[] g, int[] r) {
 279         for (int j = 0; j < g.length; j++) {
 280             if (g[j] != r[j]) {
 281                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 282                                            " gold[" + i + "][" + j + "] = " + g[j] +
 283                                            " result[" + i + "][" + j + "] = " + r[j]);
 284             }
 285         }
 286     }
 287 
 288     static void verifyL(String name, int i, long[] g, long[] r) {
 289         for (int j = 0; j < g.length; j++) {
 290             if (g[j] != r[j]) {
 291                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 292                                            " gold[" + i + "][" + j + "] = " + g[j] +
 293                                            " result[" + i + "][" + j + "] = " + r[j]);
 294             }
 295         }
 296     }
 297 
 298     @Test
 299     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 300                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 301                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 302                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 303                   IRNode.STORE_VECTOR, "> 0"},
 304         applyIf = {"MaxVectorSize", ">=32"},
 305         applyIfPlatform = {"64-bit", "true"},
 306         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 307     // Load and store are already split
 308     //
 309     //  0 1 - - 4 5 6 7
 310     //  | |     | | | |
 311     //  0 1 - - 4 5 6 7
 312     static Object[] test0(int[] a, int[] b, int mask) {
 313         for (int i = 0; i < RANGE; i+=8) {
 314             int b0 = a[i+0] & mask;
 315             int b1 = a[i+1] & mask;
 316 
 317             int b4 = a[i+4] & mask;
 318             int b5 = a[i+5] & mask;
 319             int b6 = a[i+6] & mask;
 320             int b7 = a[i+7] & mask;
 321 
 322             b[i+0] = b0;
 323             b[i+1] = b1;
 324 
 325             b[i+4] = b4;
 326             b[i+5] = b5;
 327             b[i+6] = b6;
 328             b[i+7] = b7;
 329         }
 330         return new Object[]{ a, b };
 331     }
 332 
 333     @Test
 334     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 335                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 336                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 337                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 338                   IRNode.STORE_VECTOR, "> 0"},
 339         applyIf = {"MaxVectorSize", ">=32"},
 340         applyIfPlatform = {"64-bit", "true"},
 341         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 342     // Adjacent Load and Store, but split by Add/Mul
 343     static Object[] test1a(int[] a, int[] b, int mask) {
 344         for (int i = 0; i < RANGE; i+=8) {
 345             b[i+0] = a[i+0] + mask; // Add
 346             b[i+1] = a[i+1] + mask;
 347             b[i+2] = a[i+2] + mask;
 348             b[i+3] = a[i+3] + mask;
 349 
 350             b[i+4] = a[i+4] * mask; // Mul
 351             b[i+5] = a[i+5] * mask;
 352         }
 353         return new Object[]{ a, b };
 354     }
 355 
 356     @Test
 357     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 358                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 359                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 360                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 361                   IRNode.STORE_VECTOR, "> 0"},
 362         applyIf = {"MaxVectorSize", ">=32"},
 363         applyIfPlatform = {"64-bit", "true"},
 364         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 365     // Adjacent Load and Store, but split by Add/Mul
 366     static Object[] test1b(int[] a, int[] b, int mask) {
 367         for (int i = 0; i < RANGE; i+=8) {
 368             b[i+0] = a[i+0] * mask; // Mul
 369             b[i+1] = a[i+1] * mask;
 370             b[i+2] = a[i+2] * mask;
 371             b[i+3] = a[i+3] * mask;
 372 
 373             b[i+4] = a[i+4] + mask; // Add
 374             b[i+5] = a[i+5] + mask;
 375         }
 376         return new Object[]{ a, b };
 377     }
 378 
 379     @Test
 380     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 381                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 382                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 383                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 384                   IRNode.STORE_VECTOR, "> 0"},
 385         applyIf = {"MaxVectorSize", ">=32"},
 386         applyIfPlatform = {"64-bit", "true"},
 387         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 388     // Adjacent Load and Store, but split by Add/Mul
 389     static Object[] test1c(int[] a, int[] b, int mask) {
 390         for (int i = 0; i < RANGE; i+=8) {
 391             b[i+0] = a[i+0] + mask; // Add
 392             b[i+1] = a[i+1] + mask;
 393 
 394             b[i+2] = a[i+2] * mask; // Mul
 395             b[i+3] = a[i+3] * mask;
 396             b[i+4] = a[i+4] * mask;
 397             b[i+5] = a[i+5] * mask;
 398         }
 399         return new Object[]{ a, b };
 400     }
 401 
 402     @Test
 403     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 404                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 405                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 406                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 407                   IRNode.STORE_VECTOR, "> 0"},
 408         applyIf = {"MaxVectorSize", ">=32"},
 409         applyIfPlatform = {"64-bit", "true"},
 410         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 411     // Adjacent Load and Store, but split by Add/Mul
 412     static Object[] test1d(int[] a, int[] b, int mask) {
 413         for (int i = 0; i < RANGE; i+=8) {
 414             b[i+0] = a[i+0] * mask; // Mul
 415             b[i+1] = a[i+1] * mask;
 416 
 417             b[i+2] = a[i+2] + mask; // Add
 418             b[i+3] = a[i+3] + mask;
 419             b[i+4] = a[i+4] + mask;
 420             b[i+5] = a[i+5] + mask;
 421         }
 422         return new Object[]{ a, b };
 423     }
 424 
 425     @Test
 426     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 427                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 428                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 429                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 430                   IRNode.STORE_VECTOR, "> 0"},
 431         applyIf = {"MaxVectorSize", ">=32"},
 432         applyIfPlatform = {"64-bit", "true"},
 433         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 434     // Split the load
 435     //
 436     //  0 1 2 3 4 5 - -
 437     //  | |  \ \ \ \
 438     //  | |   \ \ \ \
 439     //  | |    \ \ \ \
 440     //  0 1 - - 4 5 6 7
 441     //
 442     static Object[] test2a(int[] a, int[] b, int mask) {
 443         for (int i = 0; i < RANGE; i+=8) {
 444             int b0 = a[i+0] & mask;
 445             int b1 = a[i+1] & mask;
 446             int b2 = a[i+2] & mask;
 447             int b3 = a[i+3] & mask;
 448             int b4 = a[i+4] & mask;
 449             int b5 = a[i+5] & mask;
 450 
 451             b[i+0] = b0;
 452             b[i+1] = b1;
 453 
 454             b[i+4] = b2;
 455             b[i+5] = b3;
 456             b[i+6] = b4;
 457             b[i+7] = b5;
 458         }
 459         return new Object[]{ a, b };
 460     }
 461 
 462     @Test
 463     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 464                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 465                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 466                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 467                   IRNode.STORE_VECTOR, "> 0"},
 468         applyIf = {"MaxVectorSize", ">=32"},
 469         applyIfPlatform = {"64-bit", "true"},
 470         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 471     // Split the load
 472     //
 473     //  0 1 2 3 4 5 - -
 474     //  | | | |  \ \
 475     //  | | | |   \ \
 476     //  | | | |    \ \
 477     //  0 1 2 3 -- 6 7
 478     //
 479     static Object[] test2b(int[] a, int[] b, int mask) {
 480         for (int i = 0; i < RANGE; i+=8) {
 481             int b0 = a[i+0] & mask;
 482             int b1 = a[i+1] & mask;
 483             int b2 = a[i+2] & mask;
 484             int b3 = a[i+3] & mask;
 485             int b4 = a[i+4] & mask;
 486             int b5 = a[i+5] & mask;
 487 
 488             b[i+0] = b0;
 489             b[i+1] = b1;
 490             b[i+2] = b2;
 491             b[i+3] = b3;
 492 
 493             b[i+6] = b4;
 494             b[i+7] = b5;
 495         }
 496         return new Object[]{ a, b };
 497     }
 498 
 499     @Test
 500     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 501                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 502                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 503                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 504                   IRNode.STORE_VECTOR, "> 0"},
 505         applyIf = {"MaxVectorSize", ">=32"},
 506         applyIfPlatform = {"64-bit", "true"},
 507         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 508     // Split the load
 509     //
 510     //  0 1 - - 4 5 6 7
 511     //  | |    / / / /
 512     //  | |   / / / /
 513     //  | |  / / / /
 514     //  0 1 2 3 4 5 - -
 515     //
 516     static Object[] test2c(int[] a, int[] b, int mask) {
 517         for (int i = 0; i < RANGE; i+=8) {
 518             int b0 = a[i+0] & mask;
 519             int b1 = a[i+1] & mask;
 520 
 521             int b4 = a[i+4] & mask;
 522             int b5 = a[i+5] & mask;
 523             int b6 = a[i+6] & mask;
 524             int b7 = a[i+7] & mask;
 525 
 526             b[i+0] = b0;
 527             b[i+1] = b1;
 528             b[i+2] = b4;
 529             b[i+3] = b5;
 530             b[i+4] = b6;
 531             b[i+5] = b7;
 532         }
 533         return new Object[]{ a, b };
 534     }
 535 
 536     @Test
 537     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 538                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 539                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 540                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 541                   IRNode.STORE_VECTOR, "> 0"},
 542         applyIf = {"MaxVectorSize", ">=32"},
 543         applyIfPlatform = {"64-bit", "true"},
 544         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 545     // Split the load
 546     //
 547     //  0 1 2 3 - - 6 7
 548     //  | | | |    / /
 549     //  | | | |   / /
 550     //  | | | |  / /
 551     //  0 1 2 3 4 5 - -
 552     //
 553     static Object[] test2d(int[] a, int[] b, int mask) {
 554         for (int i = 0; i < RANGE; i+=8) {
 555             int b0 = a[i+0] & mask;
 556             int b1 = a[i+1] & mask;
 557             int b2 = a[i+2] & mask;
 558             int b3 = a[i+3] & mask;
 559 
 560             int b6 = a[i+6] & mask;
 561             int b7 = a[i+7] & mask;
 562 
 563             b[i+0] = b0;
 564             b[i+1] = b1;
 565             b[i+2] = b2;
 566             b[i+3] = b3;
 567             b[i+4] = b6;
 568             b[i+5] = b7;
 569         }
 570         return new Object[]{ a, b };
 571     }
 572 
 573     @Test
 574     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 575                   IRNode.STORE_VECTOR, "> 0"},
 576         applyIf = {"MaxVectorSize", ">=32"},
 577         applyIfPlatform = {"64-bit", "true"},
 578         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 579     // 0 1 2 3 4 5 6 7 -
 580     // | | | | | | | |
 581     // | + + + | | | |
 582     // |       | | | |
 583     // |     v | | | | v
 584     // |     | | | | | |
 585     // 1 - - 3 4 5 6 7 8
 586     static Object[] test3a(short[] a, short[] b, short val) {
 587         int sum = 0;
 588         for (int i = 0; i < RANGE; i+=16) {
 589             short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
 590 
 591             short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
 592             short a2 = a[i+2];
 593             short a3 = a[i+3];
 594 
 595             short a4 = a[i+4]; // 4-pack
 596             short a5 = a[i+5];
 597             short a6 = a[i+6];
 598             short a7 = a[i+7];
 599 
 600 
 601             b[i+0] = a0; // required for alignment / offsets, technical limitation.
 602 
 603             sum += a1 + a2 + a3; // not packed
 604 
 605             b[i+3] = val; // adjacent to 4-pack but needs to be split off
 606 
 607             b[i+4] = a4; // 4-pack
 608             b[i+5] = a5;
 609             b[i+6] = a6;
 610             b[i+7] = a7;
 611 
 612             b[i+8] = val; // adjacent to 4-pack but needs to be split off
 613         }
 614         return new Object[]{ a, b, new int[]{ sum } };
 615     }
 616 
 617     @Test
 618     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 619                   IRNode.STORE_VECTOR, "> 0",
 620                   ".*multiversion.*", "= 0"},
 621         phase = CompilePhase.PRINT_IDEAL,
 622         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 623         applyIfPlatform = {"64-bit", "true"},
 624         applyIfCPUFeatureOr = {"sse4.1", "true"})
 625     // Cyclic dependency with distance 2 -> split into 2-packs
 626     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 627                   IRNode.STORE_VECTOR, "> 0",
 628                   ".*multiversion.*", "= 0"},
 629         phase = CompilePhase.PRINT_IDEAL,
 630         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
 631         applyIfPlatform = {"64-bit", "true"},
 632         applyIfCPUFeatureOr = {"sse4.1", "true"})
 633     // Speculative aliasing check -> full vectorization.
 634     static Object[] test4a(short[] a, short[] b) {
 635         for (int i = 0; i < RANGE-64; i++) {
 636           b[i+2] = a[i+0];
 637         }
 638         return new Object[]{ a, b };
 639     }
 640 
 641     @Test
 642     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 643                   IRNode.STORE_VECTOR, "> 0",
 644                   ".*multiversion.*", "= 0"},
 645         phase = CompilePhase.PRINT_IDEAL,
 646         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 647         applyIfPlatform = {"64-bit", "true"},
 648         applyIfCPUFeatureOr = {"sse4.1", "true"})
 649     // Cyclic dependency with distance 3 -> split into 2-packs
 650     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 651                   IRNode.STORE_VECTOR, "> 0",
 652                   ".*multiversion.*", "= 0"},
 653         phase = CompilePhase.PRINT_IDEAL,
 654         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 655         applyIfPlatform = {"64-bit", "true"},
 656         applyIfCPUFeatureOr = {"sse4.1", "true"})
 657     // Speculative aliasing check -> full vectorization.
 658     static Object[] test4b(short[] a, short[] b) {
 659         for (int i = 0; i < RANGE-64; i++) {
 660           b[i+3] = a[i+0];
 661         }
 662         return new Object[]{ a, b };
 663     }
 664 
 665     @Test
 666     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 667                   IRNode.STORE_VECTOR, "> 0",
 668                   ".*multiversion.*", "= 0"},
 669         phase = CompilePhase.PRINT_IDEAL,
 670         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 671         applyIfPlatform = {"64-bit", "true"},
 672         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 673     // Cyclic dependency with distance 4 -> split into 4-packs
 674     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 675                   IRNode.STORE_VECTOR, "> 0",
 676                   ".*multiversion.*", "= 0"},
 677         phase = CompilePhase.PRINT_IDEAL,
 678         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 679         applyIfPlatform = {"64-bit", "true"},
 680         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 681     // Speculative aliasing check -> full vectorization.
 682     static Object[] test4c(short[] a, short[] b) {
 683         for (int i = 0; i < RANGE-64; i++) {
 684           b[i+4] = a[i+0];
 685         }
 686         return new Object[]{ a, b };
 687     }
 688 
 689     @Test
 690     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 691                   IRNode.STORE_VECTOR, "> 0",
 692                   ".*multiversion.*", "= 0"},
 693         phase = CompilePhase.PRINT_IDEAL,
 694         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 695         applyIfPlatform = {"64-bit", "true"},
 696         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 697     // Cyclic dependency with distance 5 -> split into 4-packs
 698     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 699                   IRNode.STORE_VECTOR, "> 0",
 700                   ".*multiversion.*", "= 0"},
 701         phase = CompilePhase.PRINT_IDEAL,
 702         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 703         applyIfPlatform = {"64-bit", "true"},
 704         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 705     // Speculative aliasing check -> full vectorization.
 706     static Object[] test4d(short[] a, short[] b) {
 707         for (int i = 0; i < RANGE-64; i++) {
 708           b[i+5] = a[i+0];
 709         }
 710         return new Object[]{ a, b };
 711     }
 712 
 713     @Test
 714     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 715                   IRNode.STORE_VECTOR, "> 0",
 716                   ".*multiversion.*", "= 0"},
 717         phase = CompilePhase.PRINT_IDEAL,
 718         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 719         applyIfPlatform = {"64-bit", "true"},
 720         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 721     // Cyclic dependency with distance 6 -> split into 4-packs
 722     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 723                   IRNode.STORE_VECTOR, "> 0",
 724                   ".*multiversion.*", "= 0"},
 725         phase = CompilePhase.PRINT_IDEAL,
 726         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 727         applyIfPlatform = {"64-bit", "true"},
 728         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 729     // Speculative aliasing check -> full vectorization.
 730     static Object[] test4e(short[] a, short[] b) {
 731         for (int i = 0; i < RANGE-64; i++) {
 732           b[i+6] = a[i+0];
 733         }
 734         return new Object[]{ a, b };
 735     }
 736 
 737     @Test
 738     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 739                   IRNode.STORE_VECTOR, "> 0",
 740                   ".*multiversion.*", "= 0"},
 741         phase = CompilePhase.PRINT_IDEAL,
 742         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 743         applyIfPlatform = {"64-bit", "true"},
 744         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 745     // Cyclic dependency with distance 7 -> split into 4-packs
 746     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 747                   IRNode.STORE_VECTOR, "> 0",
 748                   ".*multiversion.*", "= 0"},
 749         phase = CompilePhase.PRINT_IDEAL,
 750         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 751         applyIfPlatform = {"64-bit", "true"},
 752         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 753     // Speculative aliasing check -> full vectorization.
 754     static Object[] test4f(short[] a, short[] b) {
 755         for (int i = 0; i < RANGE-64; i++) {
 756           b[i+7] = a[i+0];
 757         }
 758         return new Object[]{ a, b };
 759     }
 760 
 761     @Test
 762     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
 763                   IRNode.STORE_VECTOR, "> 0",
 764                   ".*multiversion.*", "= 0"},
 765         phase = CompilePhase.PRINT_IDEAL,
 766         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 767         applyIfPlatform = {"64-bit", "true"},
 768         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 769     // Cyclic dependency with distance 8 -> split into 8-packs
 770     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 771                   IRNode.STORE_VECTOR, "> 0",
 772                   ".*multiversion.*", "= 0"},
 773         phase = CompilePhase.PRINT_IDEAL,
 774         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 775         applyIfPlatform = {"64-bit", "true"},
 776         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 777     // Speculative aliasing check -> full vectorization.
 778     static Object[] test4g(short[] a, short[] b) {
 779         for (int i = 0; i < RANGE-64; i++) {
 780           b[i+8] = a[i+0];
 781         }
 782         return new Object[]{ a, b };
 783     }
 784 
 785     @Test
 786     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 787                   IRNode.STORE_VECTOR, "> 0",
 788                   ".*multiversion.*", "= 0"},
 789         phase = CompilePhase.PRINT_IDEAL,
 790         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 791         applyIfPlatform = {"64-bit", "true"},
 792         applyIfCPUFeatureOr = {"sse4.1", "true"})
 793     // Cyclic dependency with distance 2 -> split into 2-packs
 794     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 795                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 796                   IRNode.STORE_VECTOR, "> 0",
 797                   ".*multiversion.*", "> 0"},
 798         phase = CompilePhase.PRINT_IDEAL,
 799         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
 800         applyIfPlatform = {"64-bit", "true"},
 801         applyIfCPUFeatureOr = {"sse4.1", "true"})
 802     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 803     static Object[] test4a_alias(short[] a, short[] b) {
 804         for (int i = 0; i < RANGE-64; i++) {
 805           b[i+2] = a[i+0];
 806         }
 807         return new Object[]{ a, b };
 808     }
 809 
 810     @Test
 811     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 812                   IRNode.STORE_VECTOR, "> 0",
 813                   ".*multiversion.*", "= 0"},
 814         phase = CompilePhase.PRINT_IDEAL,
 815         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 816         applyIfPlatform = {"64-bit", "true"},
 817         applyIfCPUFeatureOr = {"sse4.1", "true"})
 818     // Cyclic dependency with distance 3 -> split into 2-packs
 819     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 820                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 821                   IRNode.STORE_VECTOR, "> 0",
 822                   ".*multiversion.*", "> 0"},
 823         phase = CompilePhase.PRINT_IDEAL,
 824         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 825         applyIfPlatform = {"64-bit", "true"},
 826         applyIfCPUFeatureOr = {"sse4.1", "true"})
 827     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 828     static Object[] test4b_alias(short[] a, short[] b) {
 829         for (int i = 0; i < RANGE-64; i++) {
 830           b[i+3] = a[i+0];
 831         }
 832         return new Object[]{ a, b };
 833     }
 834 
 835     @Test
 836     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 837                   IRNode.STORE_VECTOR, "> 0",
 838                   ".*multiversion.*", "= 0"},
 839         phase = CompilePhase.PRINT_IDEAL,
 840         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 841         applyIfPlatform = {"64-bit", "true"},
 842         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 843     // Cyclic dependency with distance 4 -> split into 4-packs
 844     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 845                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 846                   IRNode.STORE_VECTOR, "> 0",
 847                   ".*multiversion.*", "> 0"},
 848         phase = CompilePhase.PRINT_IDEAL,
 849         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 850         applyIfPlatform = {"64-bit", "true"},
 851         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 852     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 853     static Object[] test4c_alias(short[] a, short[] b) {
 854         for (int i = 0; i < RANGE-64; i++) {
 855           b[i+4] = a[i+0];
 856         }
 857         return new Object[]{ a, b };
 858     }
 859 
 860     @Test
 861     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 862                   IRNode.STORE_VECTOR, "> 0",
 863                   ".*multiversion.*", "= 0"},
 864         phase = CompilePhase.PRINT_IDEAL,
 865         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 866         applyIfPlatform = {"64-bit", "true"},
 867         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 868     // Cyclic dependency with distance 5 -> split into 4-packs
 869     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 870                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 871                   IRNode.STORE_VECTOR, "> 0",
 872                   ".*multiversion.*", "> 0"},
 873         phase = CompilePhase.PRINT_IDEAL,
 874         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 875         applyIfPlatform = {"64-bit", "true"},
 876         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 877     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 878     static Object[] test4d_alias(short[] a, short[] b) {
 879         for (int i = 0; i < RANGE-64; i++) {
 880           b[i+5] = a[i+0];
 881         }
 882         return new Object[]{ a, b };
 883     }
 884 
 885     @Test
 886     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 887                   IRNode.STORE_VECTOR, "> 0",
 888                   ".*multiversion.*", "= 0"},
 889         phase = CompilePhase.PRINT_IDEAL,
 890         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 891         applyIfPlatform = {"64-bit", "true"},
 892         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 893     // Cyclic dependency with distance 6 -> split into 4-packs
 894     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 895                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 896                   IRNode.STORE_VECTOR, "> 0",
 897                   ".*multiversion.*", "> 0"},
 898         phase = CompilePhase.PRINT_IDEAL,
 899         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 900         applyIfPlatform = {"64-bit", "true"},
 901         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 902     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 903     static Object[] test4e_alias(short[] a, short[] b) {
 904         for (int i = 0; i < RANGE-64; i++) {
 905           b[i+6] = a[i+0];
 906         }
 907         return new Object[]{ a, b };
 908     }
 909 
 910     @Test
 911     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 912                   IRNode.STORE_VECTOR, "> 0",
 913                   ".*multiversion.*", "= 0"},
 914         phase = CompilePhase.PRINT_IDEAL,
 915         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 916         applyIfPlatform = {"64-bit", "true"},
 917         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 918     // Cyclic dependency with distance 7 -> split into 4-packs
 919     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 920                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 921                   IRNode.STORE_VECTOR, "> 0",
 922                   ".*multiversion.*", "> 0"},
 923         phase = CompilePhase.PRINT_IDEAL,
 924         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 925         applyIfPlatform = {"64-bit", "true"},
 926         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 927     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 928     static Object[] test4f_alias(short[] a, short[] b) {
 929         for (int i = 0; i < RANGE-64; i++) {
 930           b[i+7] = a[i+0];
 931         }
 932         return new Object[]{ a, b };
 933     }
 934 
 935     @Test
 936     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
 937                   IRNode.STORE_VECTOR, "> 0",
 938                   ".*multiversion.*", "= 0"},
 939         phase = CompilePhase.PRINT_IDEAL,
 940         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 941         applyIfPlatform = {"64-bit", "true"},
 942         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 943     // Cyclic dependency with distance 8 -> split into 8-packs
 944     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 945                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
 946                   IRNode.STORE_VECTOR, "> 0",
 947                   ".*multiversion.*", "> 0"},
 948         phase = CompilePhase.PRINT_IDEAL,
 949         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 950         applyIfPlatform = {"64-bit", "true"},
 951         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 952     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 953     static Object[] test4g_alias(short[] a, short[] b) {
 954         for (int i = 0; i < RANGE-64; i++) {
 955           b[i+8] = a[i+0];
 956         }
 957         return new Object[]{ a, b };
 958     }
 959 
 960     @Test
 961     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 962                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 963                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
 964                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_2, "> 0",
 965                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_8, "> 0",
 966                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_4, "> 0",
 967                   IRNode.STORE_VECTOR, "> 0"},
 968         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 969         applyIfPlatform = {"64-bit", "true"},
 970         applyIfCPUFeature = {"sse4.1", "true"})
 971     // aarch64 limits minimum vector size to 8B, thus a vector size of
 972     // length 2 for type "short" will not be generated
 973     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 974                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
 975                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_8, "> 0",
 976                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_4, "> 0",
 977                   IRNode.STORE_VECTOR, "> 0"},
 978         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 979         applyIfPlatform = {"64-bit", "true"},
 980         applyIfCPUFeature = {"sve", "true"})
 981     // Split pack into power-of-2 sizes
 982     static Object[] test5a(short[] a, short[] b, short val) {
 983         for (int i = 0; i < RANGE; i+=16) {
 984             b[i+ 0] = (short)(a[i+ 0] + val); // 8 pack
 985             b[i+ 1] = (short)(a[i+ 1] + val);
 986             b[i+ 2] = (short)(a[i+ 2] + val);
 987             b[i+ 3] = (short)(a[i+ 3] + val);
 988             b[i+ 4] = (short)(a[i+ 4] + val);
 989             b[i+ 5] = (short)(a[i+ 5] + val);
 990             b[i+ 6] = (short)(a[i+ 6] + val);
 991             b[i+ 7] = (short)(a[i+ 7] + val);
 992 
 993             b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
 994             b[i+ 9] = (short)(a[i+ 9] + val);
 995             b[i+10] = (short)(a[i+10] + val);
 996             b[i+11] = (short)(a[i+11] + val);
 997 
 998             b[i+12] = (short)(a[i+12] + val); // 2-pack
 999             b[i+13] = (short)(a[i+13] + val);
1000 
1001             b[i+14] = (short)(a[i+14] + val);
1002         }
1003         return new Object[]{ a, b };
1004     }
1005 
1006     @Test
1007     @IR(counts = {IRNode.LOAD_VECTOR_I,   IRNode.VECTOR_SIZE_4, "> 0",
1008                   IRNode.MUL_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1009                   IRNode.AND_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1010                   IRNode.ADD_VI,          IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1011                   IRNode.ADD_REDUCTION_V,                       "> 0"},
1012         applyIf = {"MaxVectorSize", ">=32"},
1013         applyIfPlatform = {"64-bit", "true"},
1014         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1015     // Split packs including reductions
1016     static Object[] test6a(int[] a, int[] b) {
1017         int s = 0;
1018         for (int i = 0; i < RANGE; i+=8) {
1019             s += a[i+0] * b[i+0];
1020             s += a[i+1] * b[i+1];
1021             s += a[i+2] * b[i+2];
1022             s += a[i+3] * b[i+3];
1023 
1024             s += a[i+4] & b[i+4];
1025             s += a[i+5] & b[i+5];
1026             s += a[i+6] & b[i+6];
1027             s += a[i+7] & b[i+7];
1028         }
1029         return new Object[]{ a, b, new int[]{ s } };
1030     }
1031 
1032     @Test
1033     @IR(counts = {IRNode.LOAD_VECTOR_I,  "> 0",
1034                   IRNode.MUL_VI,         "> 0",
1035                   IRNode.POPULATE_INDEX, "> 0"},
1036         applyIfPlatform = {"64-bit", "true"},
1037         applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1038     // Index Populate:
1039     // There can be an issue when all the (iv + 1), (iv + 2), ...
1040     // get packed, but not (iv). Then we have a pack that is one element
1041     // too short, and we start splitting everything in a bad way.
1042     static Object[] test7a(int[] a, int[] b) {
1043         for (int i = 0; i < RANGE; i++) {
1044             a[i] = b[i] * i;
1045         }
1046         return new Object[]{ a, b };
1047     }
1048 }