1 /*
   2  * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package compiler.loopopts.superword;
  25 
  26 import compiler.lib.ir_framework.*;
  27 import jdk.test.lib.Utils;
  28 import jdk.test.whitebox.WhiteBox;
  29 import java.lang.reflect.Array;
  30 import java.util.Map;
  31 import java.util.HashMap;
  32 import java.util.Random;
  33 import java.nio.ByteOrder;
  34 
  35 /*
  36  * @test
  37  * @bug 8326139 8348659
  38  * @summary Test splitting packs in SuperWord
  39  * @library /test/lib /
  40  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_ySAC
  41  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_ySAC
  42  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_ySAC
  43  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_ySAC
  44  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_nSAC
  45  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_nSAC
  46  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_nSAC
  47  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_nSAC
  48  */
  49 
  50 public class TestSplitPacks {
  51     static int RANGE = 1024*8;
  52     static int RANGE_FINAL = 1024*8;
  53     private static final Random RANDOM = Utils.getRandomInstance();
  54 
  55     // Inputs
  56     byte[] aB;
  57     byte[] bB;
  58     byte mB = (byte)31;
  59     short[] aS;
  60     short[] bS;
  61     short mS = (short)0xF0F0;
  62     int[] aI;
  63     int[] bI;
  64     int mI = 0xF0F0F0F0;
  65     long[] aL;
  66     long[] bL;
  67     long mL = 0xF0F0F0F0F0F0F0F0L;
  68 
  69     // List of tests
  70     Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
  71 
  72     // List of gold, the results from the first run before compilation
  73     Map<String,Object[]> golds = new HashMap<String,Object[]>();
  74 
  75     interface TestFunction {
  76         Object[] run();
  77     }
  78 
  79     public static void main(String[] args) {
  80         TestFramework framework = new TestFramework(TestSplitPacks.class);
  81         framework.addFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000");
  82         switch (args[0]) {
  83             case "nCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  84             case "nCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  85             case "yCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  86             case "yCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  87             case "nCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  88             case "nCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  89             case "yCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  90             case "yCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  91             default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
  92         };
  93         framework.start();
  94     }
  95 
  96     public TestSplitPacks() {
  97         // Generate input once
  98         aB = generateB();
  99         bB = generateB();
 100         aS = generateS();
 101         bS = generateS();
 102         aI = generateI();
 103         bI = generateI();
 104         aL = generateL();
 105         bL = generateL();
 106 
 107         // Add all tests to list
 108         tests.put("test0",       () -> { return test0(aI.clone(), bI.clone(), mI); });
 109         tests.put("test1a",      () -> { return test1a(aI.clone(), bI.clone(), mI); });
 110         tests.put("test1b",      () -> { return test1b(aI.clone(), bI.clone(), mI); });
 111         tests.put("test1c",      () -> { return test1c(aI.clone(), bI.clone(), mI); });
 112         tests.put("test1d",      () -> { return test1d(aI.clone(), bI.clone(), mI); });
 113         tests.put("test2a",      () -> { return test2a(aI.clone(), bI.clone(), mI); });
 114         tests.put("test2b",      () -> { return test2b(aI.clone(), bI.clone(), mI); });
 115         tests.put("test2c",      () -> { return test2c(aI.clone(), bI.clone(), mI); });
 116         tests.put("test2d",      () -> { return test2d(aI.clone(), bI.clone(), mI); });
 117         tests.put("test3a",      () -> { return test3a(aS.clone(), bS.clone(), mS); });
 118         tests.put("test4a",      () -> { return test4a(aS.clone(), bS.clone()); });
 119         tests.put("test4b",      () -> { return test4b(aS.clone(), bS.clone()); });
 120         tests.put("test4c",      () -> { return test4c(aS.clone(), bS.clone()); });
 121         tests.put("test4d",      () -> { return test4d(aS.clone(), bS.clone()); });
 122         tests.put("test4e",      () -> { return test4e(aS.clone(), bS.clone()); });
 123         tests.put("test4f",      () -> { return test4f(aS.clone(), bS.clone()); });
 124         tests.put("test4g",      () -> { return test4g(aS.clone(), bS.clone()); });
 125         tests.put("test4a_alias",() -> { short[] x = aS.clone(); return test4a_alias(x, x); });
 126         tests.put("test4b_alias",() -> { short[] x = aS.clone(); return test4b_alias(x, x); });
 127         tests.put("test4c_alias",() -> { short[] x = aS.clone(); return test4c_alias(x, x); });
 128         tests.put("test4d_alias",() -> { short[] x = aS.clone(); return test4d_alias(x, x); });
 129         tests.put("test4e_alias",() -> { short[] x = aS.clone(); return test4e_alias(x, x); });
 130         tests.put("test4f_alias",() -> { short[] x = aS.clone(); return test4f_alias(x, x); });
 131         tests.put("test4g_alias",() -> { short[] x = aS.clone(); return test4g_alias(x, x); });
 132         tests.put("test5a",      () -> { return test5a(aS.clone(), bS.clone(), mS); });
 133         tests.put("test6a",      () -> { return test6a(aI.clone(), bI.clone()); });
 134         tests.put("test7a",      () -> { return test7a(aI.clone(), bI.clone()); });
 135 
 136         // Compute gold value for all test methods before compilation
 137         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 138             String name = entry.getKey();
 139             TestFunction test = entry.getValue();
 140             Object[] gold = test.run();
 141             golds.put(name, gold);
 142         }
 143     }
 144 
 145     @Warmup(100)
 146     @Run(test = {"test0",
 147                  "test1a",
 148                  "test1b",
 149                  "test1c",
 150                  "test1d",
 151                  "test2a",
 152                  "test2b",
 153                  "test2c",
 154                  "test2d",
 155                  "test3a",
 156                  "test4a",
 157                  "test4b",
 158                  "test4c",
 159                  "test4d",
 160                  "test4e",
 161                  "test4f",
 162                  "test4g",
 163                  "test4a_alias",
 164                  "test4b_alias",
 165                  "test4c_alias",
 166                  "test4d_alias",
 167                  "test4e_alias",
 168                  "test4f_alias",
 169                  "test4g_alias",
 170                  "test5a",
 171                  "test6a",
 172                  "test7a"})
 173     public void runTests() {
 174         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 175             String name = entry.getKey();
 176             TestFunction test = entry.getValue();
 177             // Recall gold value from before compilation
 178             Object[] gold = golds.get(name);
 179             // Compute new result
 180             Object[] result = test.run();
 181             // Compare gold and new result
 182             verify(name, gold, result);
 183         }
 184     }
 185 
 186     static byte[] generateB() {
 187         byte[] a = new byte[RANGE];
 188         for (int i = 0; i < a.length; i++) {
 189             a[i] = (byte)RANDOM.nextInt();
 190         }
 191         return a;
 192     }
 193 
 194     static short[] generateS() {
 195         short[] a = new short[RANGE];
 196         for (int i = 0; i < a.length; i++) {
 197             a[i] = (short)RANDOM.nextInt();
 198         }
 199         return a;
 200     }
 201 
 202     static int[] generateI() {
 203         int[] a = new int[RANGE];
 204         for (int i = 0; i < a.length; i++) {
 205             a[i] = RANDOM.nextInt();
 206         }
 207         return a;
 208     }
 209 
 210     static long[] generateL() {
 211         long[] a = new long[RANGE];
 212         for (int i = 0; i < a.length; i++) {
 213             a[i] = RANDOM.nextLong();
 214         }
 215         return a;
 216     }
 217 
 218     static void verify(String name, Object[] gold, Object[] result) {
 219         if (gold.length != result.length) {
 220             throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
 221                                        gold.length + ", result.length = " + result.length);
 222         }
 223         for (int i = 0; i < gold.length; i++) {
 224             Object g = gold[i];
 225             Object r = result[i];
 226             if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
 227                 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
 228                                            " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 229                                            " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 230             }
 231             if (g == r) {
 232                 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
 233                                            " gold[" + i + "] == result[" + i + "]");
 234             }
 235             if (Array.getLength(g) != Array.getLength(r)) {
 236                     throw new RuntimeException("verify " + name + ": arrays must have same length:" +
 237                                            " gold[" + i + "].length = " + Array.getLength(g) +
 238                                            " result[" + i + "].length = " + Array.getLength(r));
 239             }
 240             Class c = g.getClass().getComponentType();
 241             if (c == byte.class) {
 242                 verifyB(name, i, (byte[])g, (byte[])r);
 243             } else if (c == short.class) {
 244                 verifyS(name, i, (short[])g, (short[])r);
 245             } else if (c == int.class) {
 246                 verifyI(name, i, (int[])g, (int[])r);
 247             } else if (c == long.class) {
 248                 verifyL(name, i, (long[])g, (long[])r);
 249             } else {
 250                 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
 251                                        " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 252                                        " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 253             }
 254         }
 255     }
 256 
 257     static void verifyB(String name, int i, byte[] g, byte[] r) {
 258         for (int j = 0; j < g.length; j++) {
 259             if (g[j] != r[j]) {
 260                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 261                                            " gold[" + i + "][" + j + "] = " + g[j] +
 262                                            " result[" + i + "][" + j + "] = " + r[j]);
 263             }
 264         }
 265     }
 266 
 267     static void verifyS(String name, int i, short[] g, short[] r) {
 268         for (int j = 0; j < g.length; j++) {
 269             if (g[j] != r[j]) {
 270                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 271                                            " gold[" + i + "][" + j + "] = " + g[j] +
 272                                            " result[" + i + "][" + j + "] = " + r[j]);
 273             }
 274         }
 275     }
 276 
 277     static void verifyI(String name, int i, int[] g, int[] r) {
 278         for (int j = 0; j < g.length; j++) {
 279             if (g[j] != r[j]) {
 280                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 281                                            " gold[" + i + "][" + j + "] = " + g[j] +
 282                                            " result[" + i + "][" + j + "] = " + r[j]);
 283             }
 284         }
 285     }
 286 
 287     static void verifyL(String name, int i, long[] g, long[] r) {
 288         for (int j = 0; j < g.length; j++) {
 289             if (g[j] != r[j]) {
 290                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 291                                            " gold[" + i + "][" + j + "] = " + g[j] +
 292                                            " result[" + i + "][" + j + "] = " + r[j]);
 293             }
 294         }
 295     }
 296 
 297     @Test
 298     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 299                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 300                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 301                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 302                   IRNode.STORE_VECTOR, "> 0"},
 303         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 304         applyIfPlatform = {"64-bit", "true"},
 305         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 306     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 307                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 308                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 309                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 310                   IRNode.STORE_VECTOR, "> 0"},
 311         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 312         applyIfPlatform = {"64-bit", "true"},
 313         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 314     // Load and store are already split
 315     //
 316     //  0 1 - - 4 5 6 7
 317     //  | |     | | | |
 318     //  0 1 - - 4 5 6 7
 319     static Object[] test0(int[] a, int[] b, int mask) {
 320         for (int i = 0; i < RANGE; i+=8) {
 321             int b0 = a[i+0] & mask;
 322             int b1 = a[i+1] & mask;
 323 
 324             int b4 = a[i+4] & mask;
 325             int b5 = a[i+5] & mask;
 326             int b6 = a[i+6] & mask;
 327             int b7 = a[i+7] & mask;
 328 
 329             b[i+0] = b0;
 330             b[i+1] = b1;
 331 
 332             b[i+4] = b4;
 333             b[i+5] = b5;
 334             b[i+6] = b6;
 335             b[i+7] = b7;
 336             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 337             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 338             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 339             // -> vectorize                                  -> no vectorization
 340         }
 341         return new Object[]{ a, b };
 342     }
 343 
 344     @Test
 345     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 346                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 347                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 348                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 349                   IRNode.STORE_VECTOR, "> 0"},
 350         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 351         applyIfPlatform = {"64-bit", "true"},
 352         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 353     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 354                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 355                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 356                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 357                   IRNode.STORE_VECTOR, "> 0"},
 358         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 359         applyIfPlatform = {"64-bit", "true"},
 360         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 361     // Adjacent Load and Store, but split by Add/Mul
 362     static Object[] test1a(int[] a, int[] b, int mask) {
 363         for (int i = 0; i < RANGE; i+=8) {
 364             b[i+0] = a[i+0] + mask; // Add
 365             b[i+1] = a[i+1] + mask;
 366             b[i+2] = a[i+2] + mask;
 367             b[i+3] = a[i+3] + mask;
 368 
 369             b[i+4] = a[i+4] * mask; // Mul
 370             b[i+5] = a[i+5] * mask;
 371             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 372             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 373             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 374             // -> vectorize                                  -> no vectorization
 375         }
 376         return new Object[]{ a, b };
 377     }
 378 
 379     @Test
 380     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 381                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 382                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 383                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 384                   IRNode.STORE_VECTOR, "> 0"},
 385         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 386         applyIfPlatform = {"64-bit", "true"},
 387         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 388     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 389                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 390                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 391                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 392                   IRNode.STORE_VECTOR, "> 0"},
 393         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 394         applyIfPlatform = {"64-bit", "true"},
 395         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 396     // Adjacent Load and Store, but split by Add/Mul
 397     static Object[] test1b(int[] a, int[] b, int mask) {
 398         for (int i = 0; i < RANGE; i+=8) {
 399             b[i+0] = a[i+0] * mask; // Mul
 400             b[i+1] = a[i+1] * mask;
 401             b[i+2] = a[i+2] * mask;
 402             b[i+3] = a[i+3] * mask;
 403 
 404             b[i+4] = a[i+4] + mask; // Add
 405             b[i+5] = a[i+5] + mask;
 406             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 407             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 408             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 409             // -> vectorize                                  -> no vectorization
 410         }
 411         return new Object[]{ a, b };
 412     }
 413 
 414     @Test
 415     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 416                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 417                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 418                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 419                   IRNode.STORE_VECTOR, "> 0"},
 420         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 421         applyIfPlatform = {"64-bit", "true"},
 422         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 423     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 424                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 425                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 426                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 427                   IRNode.STORE_VECTOR, "> 0"},
 428         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 429         applyIfPlatform = {"64-bit", "true"},
 430         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 431     // Adjacent Load and Store, but split by Add/Mul
 432     static Object[] test1c(int[] a, int[] b, int mask) {
 433         for (int i = 0; i < RANGE; i+=8) {
 434             b[i+0] = a[i+0] + mask; // Add
 435             b[i+1] = a[i+1] + mask;
 436 
 437             b[i+2] = a[i+2] * mask; // Mul
 438             b[i+3] = a[i+3] * mask;
 439             b[i+4] = a[i+4] * mask;
 440             b[i+5] = a[i+5] * mask;
 441             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 442             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 443             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 444             // -> vectorize                                  -> no vectorization
 445         }
 446         return new Object[]{ a, b };
 447     }
 448 
 449     @Test
 450     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 451                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 452                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 453                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 454                   IRNode.STORE_VECTOR, "> 0"},
 455         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 456         applyIfPlatform = {"64-bit", "true"},
 457         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 458     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 459                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 460                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 461                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 462                   IRNode.STORE_VECTOR, "> 0"},
 463         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 464         applyIfPlatform = {"64-bit", "true"},
 465         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 466     // Adjacent Load and Store, but split by Add/Mul
 467     static Object[] test1d(int[] a, int[] b, int mask) {
 468         for (int i = 0; i < RANGE; i+=8) {
 469             b[i+0] = a[i+0] * mask; // Mul
 470             b[i+1] = a[i+1] * mask;
 471 
 472             b[i+2] = a[i+2] + mask; // Add
 473             b[i+3] = a[i+3] + mask;
 474             b[i+4] = a[i+4] + mask;
 475             b[i+5] = a[i+5] + mask;
 476             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 477             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 478             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 479             // -> vectorize                                  -> no vectorization
 480         }
 481         return new Object[]{ a, b };
 482     }
 483 
 484     @Test
 485     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 486                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 487                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 488                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 489                   IRNode.STORE_VECTOR, "> 0"},
 490         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 491         applyIfPlatform = {"64-bit", "true"},
 492         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 493     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 494                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 495                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 496                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 497                   IRNode.STORE_VECTOR, "> 0"},
 498         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 499         applyIfPlatform = {"64-bit", "true"},
 500         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 501     // Split the load
 502     //
 503     //  0 1 2 3 4 5 - -
 504     //  | |  \ \ \ \
 505     //  | |   \ \ \ \
 506     //  | |    \ \ \ \
 507     //  0 1 - - 4 5 6 7
 508     //
 509     static Object[] test2a(int[] a, int[] b, int mask) {
 510         for (int i = 0; i < RANGE; i+=8) {
 511             int b0 = a[i+0] & mask;
 512             int b1 = a[i+1] & mask;
 513             int b2 = a[i+2] & mask;
 514             int b3 = a[i+3] & mask;
 515             int b4 = a[i+4] & mask;
 516             int b5 = a[i+5] & mask;
 517 
 518             b[i+0] = b0;
 519             b[i+1] = b1;
 520 
 521             b[i+4] = b2;
 522             b[i+5] = b3;
 523             b[i+6] = b4;
 524             b[i+7] = b5;
 525             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 526             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 527             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 528             // -> vectorize                                  -> no vectorization
 529         }
 530         return new Object[]{ a, b };
 531     }
 532 
 533     @Test
 534     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 535                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 536                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 537                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 538                   IRNode.STORE_VECTOR, "> 0"},
 539         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 540         applyIfPlatform = {"64-bit", "true"},
 541         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 542     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 543                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 544                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 545                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 546                   IRNode.STORE_VECTOR, "> 0"},
 547         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 548         applyIfPlatform = {"64-bit", "true"},
 549         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 550     // Split the load
 551     //
 552     //  0 1 2 3 4 5 - -
 553     //  | | | |  \ \
 554     //  | | | |   \ \
 555     //  | | | |    \ \
 556     //  0 1 2 3 -- 6 7
 557     //
 558     static Object[] test2b(int[] a, int[] b, int mask) {
 559         for (int i = 0; i < RANGE; i+=8) {
 560             int b0 = a[i+0] & mask;
 561             int b1 = a[i+1] & mask;
 562             int b2 = a[i+2] & mask;
 563             int b3 = a[i+3] & mask;
 564             int b4 = a[i+4] & mask;
 565             int b5 = a[i+5] & mask;
 566 
 567             b[i+0] = b0;
 568             b[i+1] = b1;
 569             b[i+2] = b2;
 570             b[i+3] = b3;
 571 
 572             b[i+6] = b4;
 573             b[i+7] = b5;
 574             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 575             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 576             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 577             // -> vectorize                                  -> no vectorization
 578         }
 579         return new Object[]{ a, b };
 580     }
 581 
 582     @Test
 583     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 584                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 585                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 586                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 587                   IRNode.STORE_VECTOR, "> 0"},
 588         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 589         applyIfPlatform = {"64-bit", "true"},
 590         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 591     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 592                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 593                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 594                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 595                   IRNode.STORE_VECTOR, "> 0"},
 596         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 597         applyIfPlatform = {"64-bit", "true"},
 598         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 599     // Split the load
 600     //
 601     //  0 1 - - 4 5 6 7
 602     //  | |    / / / /
 603     //  | |   / / / /
 604     //  | |  / / / /
 605     //  0 1 2 3 4 5 - -
 606     //
 607     static Object[] test2c(int[] a, int[] b, int mask) {
 608         for (int i = 0; i < RANGE; i+=8) {
 609             int b0 = a[i+0] & mask;
 610             int b1 = a[i+1] & mask;
 611 
 612             int b4 = a[i+4] & mask;
 613             int b5 = a[i+5] & mask;
 614             int b6 = a[i+6] & mask;
 615             int b7 = a[i+7] & mask;
 616 
 617             b[i+0] = b0;
 618             b[i+1] = b1;
 619             b[i+2] = b4;
 620             b[i+3] = b5;
 621             b[i+4] = b6;
 622             b[i+5] = b7;
 623             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 624             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 625             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 626             // -> vectorize                                  -> no vectorization
 627         }
 628         return new Object[]{ a, b };
 629     }
 630 
 631     @Test
 632     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 633                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 634                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 635                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 636                   IRNode.STORE_VECTOR, "> 0"},
 637         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 638         applyIfPlatform = {"64-bit", "true"},
 639         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 640     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 641                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 642                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 643                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 644                   IRNode.STORE_VECTOR, "> 0"},
 645         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 646         applyIfPlatform = {"64-bit", "true"},
 647         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 648     // Split the load
 649     //
 650     //  0 1 2 3 - - 6 7
 651     //  | | | |    / /
 652     //  | | | |   / /
 653     //  | | | |  / /
 654     //  0 1 2 3 4 5 - -
 655     //
 656     static Object[] test2d(int[] a, int[] b, int mask) {
 657         for (int i = 0; i < RANGE; i+=8) {
 658             int b0 = a[i+0] & mask;
 659             int b1 = a[i+1] & mask;
 660             int b2 = a[i+2] & mask;
 661             int b3 = a[i+3] & mask;
 662 
 663             int b6 = a[i+6] & mask;
 664             int b7 = a[i+7] & mask;
 665 
 666             b[i+0] = b0;
 667             b[i+1] = b1;
 668             b[i+2] = b2;
 669             b[i+3] = b3;
 670             b[i+4] = b6;
 671             b[i+5] = b7;
 672             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 673             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 674             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 675             // -> vectorize                                  -> no vectorization
 676         }
 677         return new Object[]{ a, b };
 678     }
 679 
 680     @Test
 681     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 682                   IRNode.STORE_VECTOR, "> 0"},
 683         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 684         applyIfPlatform = {"64-bit", "true"},
 685         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 686     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 687                   IRNode.STORE_VECTOR, "> 0"},
 688         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 689         applyIfPlatform = {"64-bit", "true"},
 690         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 691     // 0 1 2 3 4 5 6 7 -
 692     // | | | | | | | |
 693     // | + + + | | | |
 694     // |       | | | |
 695     // |     v | | | | v
 696     // |     | | | | | |
 697     // 1 - - 3 4 5 6 7 8
 698     static Object[] test3a(short[] a, short[] b, short val) {
 699         int sum = 0;
 700         for (int i = 0; i < RANGE; i+=16) {
 701             short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
 702 
 703             short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
 704             short a2 = a[i+2];
 705             short a3 = a[i+3];
 706 
 707             short a4 = a[i+4]; // 4-pack
 708             short a5 = a[i+5];
 709             short a6 = a[i+6];
 710             short a7 = a[i+7];
 711 
 712 
 713             b[i+0] = a0; // required for alignment / offsets, technical limitation.
 714 
 715             sum += a1 + a2 + a3; // not packed
 716 
 717             b[i+3] = val; // adjacent to 4-pack but needs to be split off
 718 
 719             b[i+4] = a4; // 4-pack
 720             b[i+5] = a5;
 721             b[i+6] = a6;
 722             b[i+7] = a7;
 723 
 724             b[i+8] = val; // adjacent to 4-pack but needs to be split off
 725 
 726             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 727             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 728             // adr = base + 16 + 8 + 32*i  ->  always        adr = base + 12 + 8 + 32*i  ->  never
 729             // -> vectorize                                  -> no vectorization
 730         }
 731         return new Object[]{ a, b, new int[]{ sum } };
 732     }
 733 
 734     @Test
 735     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 736                   IRNode.STORE_VECTOR, "> 0",
 737                   ".*multiversion.*", "= 0"},
 738         phase = CompilePhase.PRINT_IDEAL,
 739         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 740         applyIfPlatform = {"64-bit", "true"},
 741         applyIfCPUFeatureOr = {"sse4.1", "true"})
 742     // Cyclic dependency with distance 2 -> split into 2-packs
 743     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 744                   IRNode.STORE_VECTOR, "> 0",
 745                   ".*multiversion.*", "= 0"},
 746         phase = CompilePhase.PRINT_IDEAL,
 747         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
 748         applyIfPlatform = {"64-bit", "true"},
 749         applyIfCPUFeatureOr = {"sse4.1", "true"})
 750     // Speculative aliasing check -> full vectorization.
 751     static Object[] test4a(short[] a, short[] b) {
 752         for (int i = 0; i < RANGE-64; i++) {
 753           b[i+2] = a[i+0];
 754         }
 755         return new Object[]{ a, b };
 756     }
 757 
 758     @Test
 759     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 760                   IRNode.STORE_VECTOR, "> 0",
 761                   ".*multiversion.*", "= 0"},
 762         phase = CompilePhase.PRINT_IDEAL,
 763         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 764         applyIfPlatform = {"64-bit", "true"},
 765         applyIfCPUFeatureOr = {"sse4.1", "true"})
 766     // Cyclic dependency with distance 3 -> split into 2-packs
 767     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 768                   IRNode.STORE_VECTOR, "> 0",
 769                   ".*multiversion.*", "= 0"},
 770         phase = CompilePhase.PRINT_IDEAL,
 771         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 772         applyIfPlatform = {"64-bit", "true"},
 773         applyIfCPUFeatureOr = {"sse4.1", "true"})
 774     // Speculative aliasing check -> full vectorization.
 775     static Object[] test4b(short[] a, short[] b) {
 776         for (int i = 0; i < RANGE-64; i++) {
 777           b[i+3] = a[i+0];
 778         }
 779         return new Object[]{ a, b };
 780     }
 781 
 782     @Test
 783     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 784                   IRNode.STORE_VECTOR, "> 0",
 785                   ".*multiversion.*", "= 0"},
 786         phase = CompilePhase.PRINT_IDEAL,
 787         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 788         applyIfPlatform = {"64-bit", "true"},
 789         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 790     // Cyclic dependency with distance 4 -> split into 4-packs
 791     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 792                   IRNode.STORE_VECTOR, "> 0",
 793                   ".*multiversion.*", "= 0"},
 794         phase = CompilePhase.PRINT_IDEAL,
 795         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 796         applyIfPlatform = {"64-bit", "true"},
 797         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 798     // Speculative aliasing check -> full vectorization.
 799     static Object[] test4c(short[] a, short[] b) {
 800         for (int i = 0; i < RANGE-64; i++) {
 801           b[i+4] = a[i+0];
 802         }
 803         return new Object[]{ a, b };
 804     }
 805 
 806     @Test
 807     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 808                   IRNode.STORE_VECTOR, "> 0",
 809                   ".*multiversion.*", "= 0"},
 810         phase = CompilePhase.PRINT_IDEAL,
 811         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 812         applyIfPlatform = {"64-bit", "true"},
 813         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 814     // Cyclic dependency with distance 5 -> split into 4-packs
 815     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 816                   IRNode.STORE_VECTOR, "> 0",
 817                   ".*multiversion.*", "= 0"},
 818         phase = CompilePhase.PRINT_IDEAL,
 819         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 820         applyIfPlatform = {"64-bit", "true"},
 821         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 822     // Speculative aliasing check -> full vectorization.
 823     static Object[] test4d(short[] a, short[] b) {
 824         for (int i = 0; i < RANGE-64; i++) {
 825           b[i+5] = a[i+0];
 826         }
 827         return new Object[]{ a, b };
 828     }
 829 
 830     @Test
 831     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 832                   IRNode.STORE_VECTOR, "> 0",
 833                   ".*multiversion.*", "= 0"},
 834         phase = CompilePhase.PRINT_IDEAL,
 835         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 836         applyIfPlatform = {"64-bit", "true"},
 837         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 838     // Cyclic dependency with distance 6 -> split into 4-packs
 839     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 840                   IRNode.STORE_VECTOR, "> 0",
 841                   ".*multiversion.*", "= 0"},
 842         phase = CompilePhase.PRINT_IDEAL,
 843         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 844         applyIfPlatform = {"64-bit", "true"},
 845         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 846     // Speculative aliasing check -> full vectorization.
 847     static Object[] test4e(short[] a, short[] b) {
 848         for (int i = 0; i < RANGE-64; i++) {
 849           b[i+6] = a[i+0];
 850         }
 851         return new Object[]{ a, b };
 852     }
 853 
 854     @Test
 855     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 856                   IRNode.STORE_VECTOR, "> 0",
 857                   ".*multiversion.*", "= 0"},
 858         phase = CompilePhase.PRINT_IDEAL,
 859         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 860         applyIfPlatform = {"64-bit", "true"},
 861         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 862     // Cyclic dependency with distance 7 -> split into 4-packs
 863     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 864                   IRNode.STORE_VECTOR, "> 0",
 865                   ".*multiversion.*", "= 0"},
 866         phase = CompilePhase.PRINT_IDEAL,
 867         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 868         applyIfPlatform = {"64-bit", "true"},
 869         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 870     // Speculative aliasing check -> full vectorization.
 871     static Object[] test4f(short[] a, short[] b) {
 872         for (int i = 0; i < RANGE-64; i++) {
 873           b[i+7] = a[i+0];
 874         }
 875         return new Object[]{ a, b };
 876     }
 877 
 878     @Test
 879     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
 880                   IRNode.STORE_VECTOR, "> 0",
 881                   ".*multiversion.*", "= 0"},
 882         phase = CompilePhase.PRINT_IDEAL,
 883         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 884         applyIfPlatform = {"64-bit", "true"},
 885         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 886     // Cyclic dependency with distance 8 -> split into 8-packs
 887     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 888                   IRNode.STORE_VECTOR, "> 0",
 889                   ".*multiversion.*", "= 0"},
 890         phase = CompilePhase.PRINT_IDEAL,
 891         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 892         applyIfPlatform = {"64-bit", "true"},
 893         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 894     // Speculative aliasing check -> full vectorization.
 895     static Object[] test4g(short[] a, short[] b) {
 896         for (int i = 0; i < RANGE-64; i++) {
 897           b[i+8] = a[i+0];
 898         }
 899         return new Object[]{ a, b };
 900     }
 901 
 902     @Test
 903     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 904                   IRNode.STORE_VECTOR, "> 0",
 905                   ".*multiversion.*", "= 0"},
 906         phase = CompilePhase.PRINT_IDEAL,
 907         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 908         applyIfPlatform = {"64-bit", "true"},
 909         applyIfCPUFeatureOr = {"sse4.1", "true"})
 910     // Cyclic dependency with distance 2 -> split into 2-packs
 911     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 912                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 913                   IRNode.STORE_VECTOR, "> 0",
 914                   ".*multiversion.*", "> 0"},
 915         phase = CompilePhase.PRINT_IDEAL,
 916         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
 917         applyIfPlatform = {"64-bit", "true"},
 918         applyIfCPUFeatureOr = {"sse4.1", "true"})
 919     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 920     static Object[] test4a_alias(short[] a, short[] b) {
 921         for (int i = 0; i < RANGE-64; i++) {
 922           b[i+2] = a[i+0];
 923         }
 924         return new Object[]{ a, b };
 925     }
 926 
 927     @Test
 928     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 929                   IRNode.STORE_VECTOR, "> 0",
 930                   ".*multiversion.*", "= 0"},
 931         phase = CompilePhase.PRINT_IDEAL,
 932         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 933         applyIfPlatform = {"64-bit", "true"},
 934         applyIfCPUFeatureOr = {"sse4.1", "true"})
 935     // Cyclic dependency with distance 3 -> split into 2-packs
 936     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 937                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 938                   IRNode.STORE_VECTOR, "> 0",
 939                   ".*multiversion.*", "> 0"},
 940         phase = CompilePhase.PRINT_IDEAL,
 941         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 942         applyIfPlatform = {"64-bit", "true"},
 943         applyIfCPUFeatureOr = {"sse4.1", "true"})
 944     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 945     static Object[] test4b_alias(short[] a, short[] b) {
 946         for (int i = 0; i < RANGE-64; i++) {
 947           b[i+3] = a[i+0];
 948         }
 949         return new Object[]{ a, b };
 950     }
 951 
 952     @Test
 953     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 954                   IRNode.STORE_VECTOR, "> 0",
 955                   ".*multiversion.*", "= 0"},
 956         phase = CompilePhase.PRINT_IDEAL,
 957         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 958         applyIfPlatform = {"64-bit", "true"},
 959         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 960     // Cyclic dependency with distance 4 -> split into 4-packs
 961     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 962                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 963                   IRNode.STORE_VECTOR, "> 0",
 964                   ".*multiversion.*", "> 0"},
 965         phase = CompilePhase.PRINT_IDEAL,
 966         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 967         applyIfPlatform = {"64-bit", "true"},
 968         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 969     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 970     static Object[] test4c_alias(short[] a, short[] b) {
 971         for (int i = 0; i < RANGE-64; i++) {
 972           b[i+4] = a[i+0];
 973         }
 974         return new Object[]{ a, b };
 975     }
 976 
 977     @Test
 978     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 979                   IRNode.STORE_VECTOR, "> 0",
 980                   ".*multiversion.*", "= 0"},
 981         phase = CompilePhase.PRINT_IDEAL,
 982         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 983         applyIfPlatform = {"64-bit", "true"},
 984         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 985     // Cyclic dependency with distance 5 -> split into 4-packs
 986     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 987                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 988                   IRNode.STORE_VECTOR, "> 0",
 989                   ".*multiversion.*", "> 0"},
 990         phase = CompilePhase.PRINT_IDEAL,
 991         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 992         applyIfPlatform = {"64-bit", "true"},
 993         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 994     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 995     static Object[] test4d_alias(short[] a, short[] b) {
 996         for (int i = 0; i < RANGE-64; i++) {
 997           b[i+5] = a[i+0];
 998         }
 999         return new Object[]{ a, b };
1000     }
1001 
1002     @Test
1003     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1004                   IRNode.STORE_VECTOR, "> 0",
1005                   ".*multiversion.*", "= 0"},
1006         phase = CompilePhase.PRINT_IDEAL,
1007         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
1008         applyIfPlatform = {"64-bit", "true"},
1009         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1010     // Cyclic dependency with distance 6 -> split into 4-packs
1011     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1012                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1013                   IRNode.STORE_VECTOR, "> 0",
1014                   ".*multiversion.*", "> 0"},
1015         phase = CompilePhase.PRINT_IDEAL,
1016         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
1017         applyIfPlatform = {"64-bit", "true"},
1018         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1019     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
1020     static Object[] test4e_alias(short[] a, short[] b) {
1021         for (int i = 0; i < RANGE-64; i++) {
1022           b[i+6] = a[i+0];
1023         }
1024         return new Object[]{ a, b };
1025     }
1026 
1027     @Test
1028     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1029                   IRNode.STORE_VECTOR, "> 0",
1030                   ".*multiversion.*", "= 0"},
1031         phase = CompilePhase.PRINT_IDEAL,
1032         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
1033         applyIfPlatform = {"64-bit", "true"},
1034         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1035     // Cyclic dependency with distance 7 -> split into 4-packs
1036     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1037                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1038                   IRNode.STORE_VECTOR, "> 0",
1039                   ".*multiversion.*", "> 0"},
1040         phase = CompilePhase.PRINT_IDEAL,
1041         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
1042         applyIfPlatform = {"64-bit", "true"},
1043         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1044     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
1045     static Object[] test4f_alias(short[] a, short[] b) {
1046         for (int i = 0; i < RANGE-64; i++) {
1047           b[i+7] = a[i+0];
1048         }
1049         return new Object[]{ a, b };
1050     }
1051 
1052     @Test
1053     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1054                   IRNode.STORE_VECTOR, "> 0",
1055                   ".*multiversion.*", "= 0"},
1056         phase = CompilePhase.PRINT_IDEAL,
1057         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
1058         applyIfPlatform = {"64-bit", "true"},
1059         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1060     // Cyclic dependency with distance 8 -> split into 8-packs
1061     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1062                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1063                   IRNode.STORE_VECTOR, "> 0",
1064                   ".*multiversion.*", "> 0"},
1065         phase = CompilePhase.PRINT_IDEAL,
1066         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
1067         applyIfPlatform = {"64-bit", "true"},
1068         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1069     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
1070     static Object[] test4g_alias(short[] a, short[] b) {
1071         for (int i = 0; i < RANGE-64; i++) {
1072           b[i+8] = a[i+0];
1073         }
1074         return new Object[]{ a, b };
1075     }
1076 
1077     @Test
1078     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
1079                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1080                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1081                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_2, "> 0",
1082                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_8, "> 0",
1083                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_4, "> 0",
1084                   IRNode.STORE_VECTOR, "> 0"},
1085         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1086         applyIfPlatform = {"64-bit", "true"},
1087         applyIfCPUFeature = {"sse4.1", "true"})
1088     // aarch64 limits minimum vector size to 8B, thus a vector size of
1089     // length 2 for type "short" will not be generated
1090     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1091                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1092                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_8, "> 0",
1093                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_4, "> 0",
1094                   IRNode.STORE_VECTOR, "> 0"},
1095         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1096         applyIfPlatform = {"64-bit", "true"},
1097         applyIfCPUFeature = {"sve", "true"})
1098     // Split pack into power-of-2 sizes
1099     static Object[] test5a(short[] a, short[] b, short val) {
1100         for (int i = 0; i < RANGE; i+=16) {
1101             b[i+ 0] = (short)(a[i+ 0] + val); // 8 pack
1102             b[i+ 1] = (short)(a[i+ 1] + val);
1103             b[i+ 2] = (short)(a[i+ 2] + val);
1104             b[i+ 3] = (short)(a[i+ 3] + val);
1105             b[i+ 4] = (short)(a[i+ 4] + val);
1106             b[i+ 5] = (short)(a[i+ 5] + val);
1107             b[i+ 6] = (short)(a[i+ 6] + val);
1108             b[i+ 7] = (short)(a[i+ 7] + val);
1109 
1110             b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
1111             b[i+ 9] = (short)(a[i+ 9] + val);
1112             b[i+10] = (short)(a[i+10] + val);
1113             b[i+11] = (short)(a[i+11] + val);
1114 
1115             b[i+12] = (short)(a[i+12] + val); // 2-pack
1116             b[i+13] = (short)(a[i+13] + val);
1117 
1118             b[i+14] = (short)(a[i+14] + val);
1119         }
1120         return new Object[]{ a, b };
1121     }
1122 
1123     @Test
1124     @IR(counts = {IRNode.LOAD_VECTOR_I,   IRNode.VECTOR_SIZE_4, "> 0",
1125                   IRNode.MUL_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1126                   IRNode.AND_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1127                   IRNode.ADD_VI,          IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1128                   IRNode.ADD_REDUCTION_V,                       "> 0"},
1129         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1130         applyIfPlatform = {"64-bit", "true"},
1131         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1132     @IR(counts = {IRNode.LOAD_VECTOR_I,   IRNode.VECTOR_SIZE_4, "> 0",
1133                   IRNode.MUL_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1134                   IRNode.AND_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1135                   IRNode.ADD_VI,          IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1136                   IRNode.ADD_REDUCTION_V,                       "> 0"},
1137         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
1138         applyIfPlatform = {"64-bit", "true"},
1139         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1140     // Split packs including reductions
1141     static Object[] test6a(int[] a, int[] b) {
1142         int s = 0;
1143         for (int i = 0; i < RANGE; i+=8) {
1144             s += a[i+0] * b[i+0];
1145             s += a[i+1] * b[i+1];
1146             s += a[i+2] * b[i+2];
1147             s += a[i+3] * b[i+3];
1148 
1149             s += a[i+4] & b[i+4];
1150             s += a[i+5] & b[i+5];
1151             s += a[i+6] & b[i+6];
1152             s += a[i+7] & b[i+7];
1153             // With AlignVector, we need 8-byte alignment of vector loads/stores.
1154             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
1155             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
1156             // -> vectorize                                  -> no vectorization
1157         }
1158         return new Object[]{ a, b, new int[]{ s } };
1159     }
1160 
1161     @Test
1162     @IR(counts = {IRNode.LOAD_VECTOR_I,  "> 0",
1163                   IRNode.MUL_VI,         "> 0",
1164                   IRNode.POPULATE_INDEX, "> 0"},
1165         applyIfPlatform = {"64-bit", "true"},
1166         applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1167     // Index Populate:
1168     // There can be an issue when all the (iv + 1), (iv + 2), ...
1169     // get packed, but not (iv). Then we have a pack that is one element
1170     // too short, and we start splitting everything in a bad way.
1171     static Object[] test7a(int[] a, int[] b) {
1172         for (int i = 0; i < RANGE; i++) {
1173             a[i] = b[i] * i;
1174         }
1175         return new Object[]{ a, b };
1176     }
1177 }