1 /*
   2  * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package compiler.loopopts.superword;
  25 
  26 import compiler.lib.ir_framework.*;
  27 import jdk.test.lib.Utils;
  28 import jdk.test.whitebox.WhiteBox;
  29 import java.lang.reflect.Array;
  30 import java.util.Map;
  31 import java.util.HashMap;
  32 import java.util.Random;
  33 import java.nio.ByteOrder;
  34 
  35 /*
  36  * @test
  37  * @bug 8326139 8348659
  38  * @key randomness
  39  * @summary Test splitting packs in SuperWord
  40  * @library /test/lib /
  41  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_ySAC
  42  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_ySAC
  43  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_ySAC
  44  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_ySAC
  45  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_nSAC
  46  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_nSAC
  47  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_nSAC
  48  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_nSAC
  49  */
  50 
  51 public class TestSplitPacks {
  52     static int RANGE = 1024*8;
  53     static int RANGE_FINAL = 1024*8;
  54     private static final Random RANDOM = Utils.getRandomInstance();
  55 
  56     // Inputs
  57     byte[] aB;
  58     byte[] bB;
  59     byte mB = (byte)31;
  60     short[] aS;
  61     short[] bS;
  62     short mS = (short)0xF0F0;
  63     int[] aI;
  64     int[] bI;
  65     int mI = 0xF0F0F0F0;
  66     long[] aL;
  67     long[] bL;
  68     long mL = 0xF0F0F0F0F0F0F0F0L;
  69 
  70     // List of tests
  71     Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
  72 
  73     // List of gold, the results from the first run before compilation
  74     Map<String,Object[]> golds = new HashMap<String,Object[]>();
  75 
  76     interface TestFunction {
  77         Object[] run();
  78     }
  79 
  80     public static void main(String[] args) {
  81         TestFramework framework = new TestFramework(TestSplitPacks.class);
  82         framework.addFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000");
  83         switch (args[0]) {
  84             case "nCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  85             case "nCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  86             case "yCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  87             case "yCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  88             case "nCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  89             case "nCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  90             case "yCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  91             case "yCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  92             default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
  93         };
  94         framework.start();
  95     }
  96 
  97     public TestSplitPacks() {
  98         // Generate input once
  99         aB = generateB();
 100         bB = generateB();
 101         aS = generateS();
 102         bS = generateS();
 103         aI = generateI();
 104         bI = generateI();
 105         aL = generateL();
 106         bL = generateL();
 107 
 108         // Add all tests to list
 109         tests.put("test0",       () -> { return test0(aI.clone(), bI.clone(), mI); });
 110         tests.put("test1a",      () -> { return test1a(aI.clone(), bI.clone(), mI); });
 111         tests.put("test1b",      () -> { return test1b(aI.clone(), bI.clone(), mI); });
 112         tests.put("test1c",      () -> { return test1c(aI.clone(), bI.clone(), mI); });
 113         tests.put("test1d",      () -> { return test1d(aI.clone(), bI.clone(), mI); });
 114         tests.put("test2a",      () -> { return test2a(aI.clone(), bI.clone(), mI); });
 115         tests.put("test2b",      () -> { return test2b(aI.clone(), bI.clone(), mI); });
 116         tests.put("test2c",      () -> { return test2c(aI.clone(), bI.clone(), mI); });
 117         tests.put("test2d",      () -> { return test2d(aI.clone(), bI.clone(), mI); });
 118         tests.put("test3a",      () -> { return test3a(aS.clone(), bS.clone(), mS); });
 119         tests.put("test4a",      () -> { return test4a(aS.clone(), bS.clone()); });
 120         tests.put("test4b",      () -> { return test4b(aS.clone(), bS.clone()); });
 121         tests.put("test4c",      () -> { return test4c(aS.clone(), bS.clone()); });
 122         tests.put("test4d",      () -> { return test4d(aS.clone(), bS.clone()); });
 123         tests.put("test4e",      () -> { return test4e(aS.clone(), bS.clone()); });
 124         tests.put("test4f",      () -> { return test4f(aS.clone(), bS.clone()); });
 125         tests.put("test4g",      () -> { return test4g(aS.clone(), bS.clone()); });
 126         tests.put("test4a_alias",() -> { short[] x = aS.clone(); return test4a_alias(x, x); });
 127         tests.put("test4b_alias",() -> { short[] x = aS.clone(); return test4b_alias(x, x); });
 128         tests.put("test4c_alias",() -> { short[] x = aS.clone(); return test4c_alias(x, x); });
 129         tests.put("test4d_alias",() -> { short[] x = aS.clone(); return test4d_alias(x, x); });
 130         tests.put("test4e_alias",() -> { short[] x = aS.clone(); return test4e_alias(x, x); });
 131         tests.put("test4f_alias",() -> { short[] x = aS.clone(); return test4f_alias(x, x); });
 132         tests.put("test4g_alias",() -> { short[] x = aS.clone(); return test4g_alias(x, x); });
 133         tests.put("test5a",      () -> { return test5a(aS.clone(), bS.clone(), mS); });
 134         tests.put("test6a",      () -> { return test6a(aI.clone(), bI.clone()); });
 135         tests.put("test7a",      () -> { return test7a(aI.clone(), bI.clone()); });
 136 
 137         // Compute gold value for all test methods before compilation
 138         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 139             String name = entry.getKey();
 140             TestFunction test = entry.getValue();
 141             Object[] gold = test.run();
 142             golds.put(name, gold);
 143         }
 144     }
 145 
 146     @Warmup(100)
 147     @Run(test = {"test0",
 148                  "test1a",
 149                  "test1b",
 150                  "test1c",
 151                  "test1d",
 152                  "test2a",
 153                  "test2b",
 154                  "test2c",
 155                  "test2d",
 156                  "test3a",
 157                  "test4a",
 158                  "test4b",
 159                  "test4c",
 160                  "test4d",
 161                  "test4e",
 162                  "test4f",
 163                  "test4g",
 164                  "test4a_alias",
 165                  "test4b_alias",
 166                  "test4c_alias",
 167                  "test4d_alias",
 168                  "test4e_alias",
 169                  "test4f_alias",
 170                  "test4g_alias",
 171                  "test5a",
 172                  "test6a",
 173                  "test7a"})
 174     public void runTests() {
 175         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 176             String name = entry.getKey();
 177             TestFunction test = entry.getValue();
 178             // Recall gold value from before compilation
 179             Object[] gold = golds.get(name);
 180             // Compute new result
 181             Object[] result = test.run();
 182             // Compare gold and new result
 183             verify(name, gold, result);
 184         }
 185     }
 186 
 187     static byte[] generateB() {
 188         byte[] a = new byte[RANGE];
 189         for (int i = 0; i < a.length; i++) {
 190             a[i] = (byte)RANDOM.nextInt();
 191         }
 192         return a;
 193     }
 194 
 195     static short[] generateS() {
 196         short[] a = new short[RANGE];
 197         for (int i = 0; i < a.length; i++) {
 198             a[i] = (short)RANDOM.nextInt();
 199         }
 200         return a;
 201     }
 202 
 203     static int[] generateI() {
 204         int[] a = new int[RANGE];
 205         for (int i = 0; i < a.length; i++) {
 206             a[i] = RANDOM.nextInt();
 207         }
 208         return a;
 209     }
 210 
 211     static long[] generateL() {
 212         long[] a = new long[RANGE];
 213         for (int i = 0; i < a.length; i++) {
 214             a[i] = RANDOM.nextLong();
 215         }
 216         return a;
 217     }
 218 
 219     static void verify(String name, Object[] gold, Object[] result) {
 220         if (gold.length != result.length) {
 221             throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
 222                                        gold.length + ", result.length = " + result.length);
 223         }
 224         for (int i = 0; i < gold.length; i++) {
 225             Object g = gold[i];
 226             Object r = result[i];
 227             if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
 228                 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
 229                                            " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 230                                            " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 231             }
 232             if (g == r) {
 233                 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
 234                                            " gold[" + i + "] == result[" + i + "]");
 235             }
 236             if (Array.getLength(g) != Array.getLength(r)) {
 237                     throw new RuntimeException("verify " + name + ": arrays must have same length:" +
 238                                            " gold[" + i + "].length = " + Array.getLength(g) +
 239                                            " result[" + i + "].length = " + Array.getLength(r));
 240             }
 241             Class c = g.getClass().getComponentType();
 242             if (c == byte.class) {
 243                 verifyB(name, i, (byte[])g, (byte[])r);
 244             } else if (c == short.class) {
 245                 verifyS(name, i, (short[])g, (short[])r);
 246             } else if (c == int.class) {
 247                 verifyI(name, i, (int[])g, (int[])r);
 248             } else if (c == long.class) {
 249                 verifyL(name, i, (long[])g, (long[])r);
 250             } else {
 251                 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
 252                                        " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 253                                        " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 254             }
 255         }
 256     }
 257 
 258     static void verifyB(String name, int i, byte[] g, byte[] r) {
 259         for (int j = 0; j < g.length; j++) {
 260             if (g[j] != r[j]) {
 261                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 262                                            " gold[" + i + "][" + j + "] = " + g[j] +
 263                                            " result[" + i + "][" + j + "] = " + r[j]);
 264             }
 265         }
 266     }
 267 
 268     static void verifyS(String name, int i, short[] g, short[] r) {
 269         for (int j = 0; j < g.length; j++) {
 270             if (g[j] != r[j]) {
 271                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 272                                            " gold[" + i + "][" + j + "] = " + g[j] +
 273                                            " result[" + i + "][" + j + "] = " + r[j]);
 274             }
 275         }
 276     }
 277 
 278     static void verifyI(String name, int i, int[] g, int[] r) {
 279         for (int j = 0; j < g.length; j++) {
 280             if (g[j] != r[j]) {
 281                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 282                                            " gold[" + i + "][" + j + "] = " + g[j] +
 283                                            " result[" + i + "][" + j + "] = " + r[j]);
 284             }
 285         }
 286     }
 287 
 288     static void verifyL(String name, int i, long[] g, long[] r) {
 289         for (int j = 0; j < g.length; j++) {
 290             if (g[j] != r[j]) {
 291                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 292                                            " gold[" + i + "][" + j + "] = " + g[j] +
 293                                            " result[" + i + "][" + j + "] = " + r[j]);
 294             }
 295         }
 296     }
 297 
 298     @Test
 299     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 300                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 301                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 302                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 303                   IRNode.STORE_VECTOR, "> 0"},
 304         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 305         applyIfPlatform = {"64-bit", "true"},
 306         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 307     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 308                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 309                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 310                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 311                   IRNode.STORE_VECTOR, "> 0"},
 312         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 313         applyIfPlatform = {"64-bit", "true"},
 314         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 315     // Load and store are already split
 316     //
 317     //  0 1 - - 4 5 6 7
 318     //  | |     | | | |
 319     //  0 1 - - 4 5 6 7
 320     static Object[] test0(int[] a, int[] b, int mask) {
 321         for (int i = 0; i < RANGE; i+=8) {
 322             int b0 = a[i+0] & mask;
 323             int b1 = a[i+1] & mask;
 324 
 325             int b4 = a[i+4] & mask;
 326             int b5 = a[i+5] & mask;
 327             int b6 = a[i+6] & mask;
 328             int b7 = a[i+7] & mask;
 329 
 330             b[i+0] = b0;
 331             b[i+1] = b1;
 332 
 333             b[i+4] = b4;
 334             b[i+5] = b5;
 335             b[i+6] = b6;
 336             b[i+7] = b7;
 337             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 338             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 339             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 340             // -> vectorize                                  -> no vectorization
 341         }
 342         return new Object[]{ a, b };
 343     }
 344 
 345     @Test
 346     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 347                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 348                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 349                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 350                   IRNode.STORE_VECTOR, "> 0"},
 351         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 352         applyIfPlatform = {"64-bit", "true"},
 353         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 354     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 355                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 356                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 357                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 358                   IRNode.STORE_VECTOR, "> 0"},
 359         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 360         applyIfPlatform = {"64-bit", "true"},
 361         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 362     // Adjacent Load and Store, but split by Add/Mul
 363     static Object[] test1a(int[] a, int[] b, int mask) {
 364         for (int i = 0; i < RANGE; i+=8) {
 365             b[i+0] = a[i+0] + mask; // Add
 366             b[i+1] = a[i+1] + mask;
 367             b[i+2] = a[i+2] + mask;
 368             b[i+3] = a[i+3] + mask;
 369 
 370             b[i+4] = a[i+4] * mask; // Mul
 371             b[i+5] = a[i+5] * mask;
 372             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 373             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 374             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 375             // -> vectorize                                  -> no vectorization
 376         }
 377         return new Object[]{ a, b };
 378     }
 379 
 380     @Test
 381     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 382                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 383                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 384                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 385                   IRNode.STORE_VECTOR, "> 0"},
 386         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 387         applyIfPlatform = {"64-bit", "true"},
 388         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 389     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 390                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 391                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 392                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 393                   IRNode.STORE_VECTOR, "> 0"},
 394         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 395         applyIfPlatform = {"64-bit", "true"},
 396         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 397     // Adjacent Load and Store, but split by Add/Mul
 398     static Object[] test1b(int[] a, int[] b, int mask) {
 399         for (int i = 0; i < RANGE; i+=8) {
 400             b[i+0] = a[i+0] * mask; // Mul
 401             b[i+1] = a[i+1] * mask;
 402             b[i+2] = a[i+2] * mask;
 403             b[i+3] = a[i+3] * mask;
 404 
 405             b[i+4] = a[i+4] + mask; // Add
 406             b[i+5] = a[i+5] + mask;
 407             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 408             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 409             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 410             // -> vectorize                                  -> no vectorization
 411         }
 412         return new Object[]{ a, b };
 413     }
 414 
 415     @Test
 416     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 417                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 418                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 419                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 420                   IRNode.STORE_VECTOR, "> 0"},
 421         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 422         applyIfPlatform = {"64-bit", "true"},
 423         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 424     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 425                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 426                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 427                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 428                   IRNode.STORE_VECTOR, "> 0"},
 429         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 430         applyIfPlatform = {"64-bit", "true"},
 431         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 432     // Adjacent Load and Store, but split by Add/Mul
 433     static Object[] test1c(int[] a, int[] b, int mask) {
 434         for (int i = 0; i < RANGE; i+=8) {
 435             b[i+0] = a[i+0] + mask; // Add
 436             b[i+1] = a[i+1] + mask;
 437 
 438             b[i+2] = a[i+2] * mask; // Mul
 439             b[i+3] = a[i+3] * mask;
 440             b[i+4] = a[i+4] * mask;
 441             b[i+5] = a[i+5] * mask;
 442             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 443             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 444             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 445             // -> vectorize                                  -> no vectorization
 446         }
 447         return new Object[]{ a, b };
 448     }
 449 
 450     @Test
 451     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 452                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 453                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 454                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 455                   IRNode.STORE_VECTOR, "> 0"},
 456         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 457         applyIfPlatform = {"64-bit", "true"},
 458         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 459     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 460                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 461                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 462                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 463                   IRNode.STORE_VECTOR, "> 0"},
 464         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 465         applyIfPlatform = {"64-bit", "true"},
 466         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 467     // Adjacent Load and Store, but split by Add/Mul
 468     static Object[] test1d(int[] a, int[] b, int mask) {
 469         for (int i = 0; i < RANGE; i+=8) {
 470             b[i+0] = a[i+0] * mask; // Mul
 471             b[i+1] = a[i+1] * mask;
 472 
 473             b[i+2] = a[i+2] + mask; // Add
 474             b[i+3] = a[i+3] + mask;
 475             b[i+4] = a[i+4] + mask;
 476             b[i+5] = a[i+5] + mask;
 477             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 478             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 479             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 480             // -> vectorize                                  -> no vectorization
 481         }
 482         return new Object[]{ a, b };
 483     }
 484 
 485     @Test
 486     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 487                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 488                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 489                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 490                   IRNode.STORE_VECTOR, "> 0"},
 491         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 492         applyIfPlatform = {"64-bit", "true"},
 493         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 494     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 495                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 496                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 497                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 498                   IRNode.STORE_VECTOR, "> 0"},
 499         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 500         applyIfPlatform = {"64-bit", "true"},
 501         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 502     // Split the load
 503     //
 504     //  0 1 2 3 4 5 - -
 505     //  | |  \ \ \ \
 506     //  | |   \ \ \ \
 507     //  | |    \ \ \ \
 508     //  0 1 - - 4 5 6 7
 509     //
 510     static Object[] test2a(int[] a, int[] b, int mask) {
 511         for (int i = 0; i < RANGE; i+=8) {
 512             int b0 = a[i+0] & mask;
 513             int b1 = a[i+1] & mask;
 514             int b2 = a[i+2] & mask;
 515             int b3 = a[i+3] & mask;
 516             int b4 = a[i+4] & mask;
 517             int b5 = a[i+5] & mask;
 518 
 519             b[i+0] = b0;
 520             b[i+1] = b1;
 521 
 522             b[i+4] = b2;
 523             b[i+5] = b3;
 524             b[i+6] = b4;
 525             b[i+7] = b5;
 526             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 527             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 528             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 529             // -> vectorize                                  -> no vectorization
 530         }
 531         return new Object[]{ a, b };
 532     }
 533 
 534     @Test
 535     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 536                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 537                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 538                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 539                   IRNode.STORE_VECTOR, "> 0"},
 540         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 541         applyIfPlatform = {"64-bit", "true"},
 542         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 543     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 544                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 545                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 546                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 547                   IRNode.STORE_VECTOR, "> 0"},
 548         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 549         applyIfPlatform = {"64-bit", "true"},
 550         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 551     // Split the load
 552     //
 553     //  0 1 2 3 4 5 - -
 554     //  | | | |  \ \
 555     //  | | | |   \ \
 556     //  | | | |    \ \
 557     //  0 1 2 3 -- 6 7
 558     //
 559     static Object[] test2b(int[] a, int[] b, int mask) {
 560         for (int i = 0; i < RANGE; i+=8) {
 561             int b0 = a[i+0] & mask;
 562             int b1 = a[i+1] & mask;
 563             int b2 = a[i+2] & mask;
 564             int b3 = a[i+3] & mask;
 565             int b4 = a[i+4] & mask;
 566             int b5 = a[i+5] & mask;
 567 
 568             b[i+0] = b0;
 569             b[i+1] = b1;
 570             b[i+2] = b2;
 571             b[i+3] = b3;
 572 
 573             b[i+6] = b4;
 574             b[i+7] = b5;
 575             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 576             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 577             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 578             // -> vectorize                                  -> no vectorization
 579         }
 580         return new Object[]{ a, b };
 581     }
 582 
 583     @Test
 584     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 585                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 586                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 587                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 588                   IRNode.STORE_VECTOR, "> 0"},
 589         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 590         applyIfPlatform = {"64-bit", "true"},
 591         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 592     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 593                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 594                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 595                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 596                   IRNode.STORE_VECTOR, "> 0"},
 597         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 598         applyIfPlatform = {"64-bit", "true"},
 599         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 600     // Split the load
 601     //
 602     //  0 1 - - 4 5 6 7
 603     //  | |    / / / /
 604     //  | |   / / / /
 605     //  | |  / / / /
 606     //  0 1 2 3 4 5 - -
 607     //
 608     static Object[] test2c(int[] a, int[] b, int mask) {
 609         for (int i = 0; i < RANGE; i+=8) {
 610             int b0 = a[i+0] & mask;
 611             int b1 = a[i+1] & mask;
 612 
 613             int b4 = a[i+4] & mask;
 614             int b5 = a[i+5] & mask;
 615             int b6 = a[i+6] & mask;
 616             int b7 = a[i+7] & mask;
 617 
 618             b[i+0] = b0;
 619             b[i+1] = b1;
 620             b[i+2] = b4;
 621             b[i+3] = b5;
 622             b[i+4] = b6;
 623             b[i+5] = b7;
 624             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 625             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 626             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 627             // -> vectorize                                  -> no vectorization
 628         }
 629         return new Object[]{ a, b };
 630     }
 631 
 632     @Test
 633     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 634                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 635                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 636                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 637                   IRNode.STORE_VECTOR, "> 0"},
 638         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 639         applyIfPlatform = {"64-bit", "true"},
 640         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 641     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 642                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 643                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 644                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 645                   IRNode.STORE_VECTOR, "> 0"},
 646         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 647         applyIfPlatform = {"64-bit", "true"},
 648         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 649     // Split the load
 650     //
 651     //  0 1 2 3 - - 6 7
 652     //  | | | |    / /
 653     //  | | | |   / /
 654     //  | | | |  / /
 655     //  0 1 2 3 4 5 - -
 656     //
 657     static Object[] test2d(int[] a, int[] b, int mask) {
 658         for (int i = 0; i < RANGE; i+=8) {
 659             int b0 = a[i+0] & mask;
 660             int b1 = a[i+1] & mask;
 661             int b2 = a[i+2] & mask;
 662             int b3 = a[i+3] & mask;
 663 
 664             int b6 = a[i+6] & mask;
 665             int b7 = a[i+7] & mask;
 666 
 667             b[i+0] = b0;
 668             b[i+1] = b1;
 669             b[i+2] = b2;
 670             b[i+3] = b3;
 671             b[i+4] = b6;
 672             b[i+5] = b7;
 673             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 674             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 675             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 676             // -> vectorize                                  -> no vectorization
 677         }
 678         return new Object[]{ a, b };
 679     }
 680 
 681     @Test
 682     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 683                   IRNode.STORE_VECTOR, "> 0"},
 684         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 685         applyIfPlatform = {"64-bit", "true"},
 686         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 687     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 688                   IRNode.STORE_VECTOR, "> 0"},
 689         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 690         applyIfPlatform = {"64-bit", "true"},
 691         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 692     // 0 1 2 3 4 5 6 7 -
 693     // | | | | | | | |
 694     // | + + + | | | |
 695     // |       | | | |
 696     // |     v | | | | v
 697     // |     | | | | | |
 698     // 1 - - 3 4 5 6 7 8
 699     static Object[] test3a(short[] a, short[] b, short val) {
 700         int sum = 0;
 701         for (int i = 0; i < RANGE; i+=16) {
 702             short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
 703 
 704             short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
 705             short a2 = a[i+2];
 706             short a3 = a[i+3];
 707 
 708             short a4 = a[i+4]; // 4-pack
 709             short a5 = a[i+5];
 710             short a6 = a[i+6];
 711             short a7 = a[i+7];
 712 
 713 
 714             b[i+0] = a0; // required for alignment / offsets, technical limitation.
 715 
 716             sum += a1 + a2 + a3; // not packed
 717 
 718             b[i+3] = val; // adjacent to 4-pack but needs to be split off
 719 
 720             b[i+4] = a4; // 4-pack
 721             b[i+5] = a5;
 722             b[i+6] = a6;
 723             b[i+7] = a7;
 724 
 725             b[i+8] = val; // adjacent to 4-pack but needs to be split off
 726 
 727             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 728             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 729             // adr = base + 16 + 8 + 32*i  ->  always        adr = base + 12 + 8 + 32*i  ->  never
 730             // -> vectorize                                  -> no vectorization
 731         }
 732         return new Object[]{ a, b, new int[]{ sum } };
 733     }
 734 
 735     @Test
 736     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 737                   IRNode.STORE_VECTOR, "> 0",
 738                   ".*multiversion.*", "= 0"},
 739         phase = CompilePhase.PRINT_IDEAL,
 740         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 741         applyIfPlatform = {"64-bit", "true"},
 742         applyIfCPUFeatureOr = {"sse4.1", "true"})
 743     // Cyclic dependency with distance 2 -> split into 2-packs
 744     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 745                   IRNode.STORE_VECTOR, "> 0",
 746                   ".*multiversion.*", "= 0"},
 747         phase = CompilePhase.PRINT_IDEAL,
 748         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
 749         applyIfPlatform = {"64-bit", "true"},
 750         applyIfCPUFeatureOr = {"sse4.1", "true"})
 751     // Speculative aliasing check -> full vectorization.
 752     static Object[] test4a(short[] a, short[] b) {
 753         for (int i = 0; i < RANGE-64; i++) {
 754           b[i+2] = a[i+0];
 755         }
 756         return new Object[]{ a, b };
 757     }
 758 
 759     @Test
 760     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 761                   IRNode.STORE_VECTOR, "> 0",
 762                   ".*multiversion.*", "= 0"},
 763         phase = CompilePhase.PRINT_IDEAL,
 764         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 765         applyIfPlatform = {"64-bit", "true"},
 766         applyIfCPUFeatureOr = {"sse4.1", "true"})
 767     // Cyclic dependency with distance 3 -> split into 2-packs
 768     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 769                   IRNode.STORE_VECTOR, "> 0",
 770                   ".*multiversion.*", "= 0"},
 771         phase = CompilePhase.PRINT_IDEAL,
 772         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 773         applyIfPlatform = {"64-bit", "true"},
 774         applyIfCPUFeatureOr = {"sse4.1", "true"})
 775     // Speculative aliasing check -> full vectorization.
 776     static Object[] test4b(short[] a, short[] b) {
 777         for (int i = 0; i < RANGE-64; i++) {
 778           b[i+3] = a[i+0];
 779         }
 780         return new Object[]{ a, b };
 781     }
 782 
 783     @Test
 784     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 785                   IRNode.STORE_VECTOR, "> 0",
 786                   ".*multiversion.*", "= 0"},
 787         phase = CompilePhase.PRINT_IDEAL,
 788         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 789         applyIfPlatform = {"64-bit", "true"},
 790         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 791     // Cyclic dependency with distance 4 -> split into 4-packs
 792     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 793                   IRNode.STORE_VECTOR, "> 0",
 794                   ".*multiversion.*", "= 0"},
 795         phase = CompilePhase.PRINT_IDEAL,
 796         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 797         applyIfPlatform = {"64-bit", "true"},
 798         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 799     // Speculative aliasing check -> full vectorization.
 800     static Object[] test4c(short[] a, short[] b) {
 801         for (int i = 0; i < RANGE-64; i++) {
 802           b[i+4] = a[i+0];
 803         }
 804         return new Object[]{ a, b };
 805     }
 806 
 807     @Test
 808     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 809                   IRNode.STORE_VECTOR, "> 0",
 810                   ".*multiversion.*", "= 0"},
 811         phase = CompilePhase.PRINT_IDEAL,
 812         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 813         applyIfPlatform = {"64-bit", "true"},
 814         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 815     // Cyclic dependency with distance 5 -> split into 4-packs
 816     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 817                   IRNode.STORE_VECTOR, "> 0",
 818                   ".*multiversion.*", "= 0"},
 819         phase = CompilePhase.PRINT_IDEAL,
 820         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 821         applyIfPlatform = {"64-bit", "true"},
 822         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 823     // Speculative aliasing check -> full vectorization.
 824     static Object[] test4d(short[] a, short[] b) {
 825         for (int i = 0; i < RANGE-64; i++) {
 826           b[i+5] = a[i+0];
 827         }
 828         return new Object[]{ a, b };
 829     }
 830 
 831     @Test
 832     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 833                   IRNode.STORE_VECTOR, "> 0",
 834                   ".*multiversion.*", "= 0"},
 835         phase = CompilePhase.PRINT_IDEAL,
 836         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 837         applyIfPlatform = {"64-bit", "true"},
 838         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 839     // Cyclic dependency with distance 6 -> split into 4-packs
 840     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 841                   IRNode.STORE_VECTOR, "> 0",
 842                   ".*multiversion.*", "= 0"},
 843         phase = CompilePhase.PRINT_IDEAL,
 844         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 845         applyIfPlatform = {"64-bit", "true"},
 846         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 847     // Speculative aliasing check -> full vectorization.
 848     static Object[] test4e(short[] a, short[] b) {
 849         for (int i = 0; i < RANGE-64; i++) {
 850           b[i+6] = a[i+0];
 851         }
 852         return new Object[]{ a, b };
 853     }
 854 
 855     @Test
 856     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 857                   IRNode.STORE_VECTOR, "> 0",
 858                   ".*multiversion.*", "= 0"},
 859         phase = CompilePhase.PRINT_IDEAL,
 860         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 861         applyIfPlatform = {"64-bit", "true"},
 862         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 863     // Cyclic dependency with distance 7 -> split into 4-packs
 864     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 865                   IRNode.STORE_VECTOR, "> 0",
 866                   ".*multiversion.*", "= 0"},
 867         phase = CompilePhase.PRINT_IDEAL,
 868         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 869         applyIfPlatform = {"64-bit", "true"},
 870         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 871     // Speculative aliasing check -> full vectorization.
 872     static Object[] test4f(short[] a, short[] b) {
 873         for (int i = 0; i < RANGE-64; i++) {
 874           b[i+7] = a[i+0];
 875         }
 876         return new Object[]{ a, b };
 877     }
 878 
 879     @Test
 880     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
 881                   IRNode.STORE_VECTOR, "> 0",
 882                   ".*multiversion.*", "= 0"},
 883         phase = CompilePhase.PRINT_IDEAL,
 884         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 885         applyIfPlatform = {"64-bit", "true"},
 886         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 887     // Cyclic dependency with distance 8 -> split into 8-packs
 888     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 889                   IRNode.STORE_VECTOR, "> 0",
 890                   ".*multiversion.*", "= 0"},
 891         phase = CompilePhase.PRINT_IDEAL,
 892         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 893         applyIfPlatform = {"64-bit", "true"},
 894         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 895     // Speculative aliasing check -> full vectorization.
 896     static Object[] test4g(short[] a, short[] b) {
 897         for (int i = 0; i < RANGE-64; i++) {
 898           b[i+8] = a[i+0];
 899         }
 900         return new Object[]{ a, b };
 901     }
 902 
 903     @Test
 904     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 905                   IRNode.STORE_VECTOR, "> 0",
 906                   ".*multiversion.*", "= 0"},
 907         phase = CompilePhase.PRINT_IDEAL,
 908         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 909         applyIfPlatform = {"64-bit", "true"},
 910         applyIfCPUFeatureOr = {"sse4.1", "true"})
 911     // Cyclic dependency with distance 2 -> split into 2-packs
 912     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 913                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 914                   IRNode.STORE_VECTOR, "> 0",
 915                   ".*multiversion.*", "> 0"},
 916         phase = CompilePhase.PRINT_IDEAL,
 917         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
 918         applyIfPlatform = {"64-bit", "true"},
 919         applyIfCPUFeatureOr = {"sse4.1", "true"})
 920     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 921     static Object[] test4a_alias(short[] a, short[] b) {
 922         for (int i = 0; i < RANGE-64; i++) {
 923           b[i+2] = a[i+0];
 924         }
 925         return new Object[]{ a, b };
 926     }
 927 
 928     @Test
 929     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 930                   IRNode.STORE_VECTOR, "> 0",
 931                   ".*multiversion.*", "= 0"},
 932         phase = CompilePhase.PRINT_IDEAL,
 933         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 934         applyIfPlatform = {"64-bit", "true"},
 935         applyIfCPUFeatureOr = {"sse4.1", "true"})
 936     // Cyclic dependency with distance 3 -> split into 2-packs
 937     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 938                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 939                   IRNode.STORE_VECTOR, "> 0",
 940                   ".*multiversion.*", "> 0"},
 941         phase = CompilePhase.PRINT_IDEAL,
 942         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 943         applyIfPlatform = {"64-bit", "true"},
 944         applyIfCPUFeatureOr = {"sse4.1", "true"})
 945     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 946     static Object[] test4b_alias(short[] a, short[] b) {
 947         for (int i = 0; i < RANGE-64; i++) {
 948           b[i+3] = a[i+0];
 949         }
 950         return new Object[]{ a, b };
 951     }
 952 
 953     @Test
 954     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 955                   IRNode.STORE_VECTOR, "> 0",
 956                   ".*multiversion.*", "= 0"},
 957         phase = CompilePhase.PRINT_IDEAL,
 958         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 959         applyIfPlatform = {"64-bit", "true"},
 960         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 961     // Cyclic dependency with distance 4 -> split into 4-packs
 962     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 963                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 964                   IRNode.STORE_VECTOR, "> 0",
 965                   ".*multiversion.*", "> 0"},
 966         phase = CompilePhase.PRINT_IDEAL,
 967         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 968         applyIfPlatform = {"64-bit", "true"},
 969         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 970     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 971     static Object[] test4c_alias(short[] a, short[] b) {
 972         for (int i = 0; i < RANGE-64; i++) {
 973           b[i+4] = a[i+0];
 974         }
 975         return new Object[]{ a, b };
 976     }
 977 
 978     @Test
 979     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 980                   IRNode.STORE_VECTOR, "> 0",
 981                   ".*multiversion.*", "= 0"},
 982         phase = CompilePhase.PRINT_IDEAL,
 983         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 984         applyIfPlatform = {"64-bit", "true"},
 985         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 986     // Cyclic dependency with distance 5 -> split into 4-packs
 987     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 988                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 989                   IRNode.STORE_VECTOR, "> 0",
 990                   ".*multiversion.*", "> 0"},
 991         phase = CompilePhase.PRINT_IDEAL,
 992         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 993         applyIfPlatform = {"64-bit", "true"},
 994         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 995     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 996     static Object[] test4d_alias(short[] a, short[] b) {
 997         for (int i = 0; i < RANGE-64; i++) {
 998           b[i+5] = a[i+0];
 999         }
1000         return new Object[]{ a, b };
1001     }
1002 
1003     @Test
1004     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1005                   IRNode.STORE_VECTOR, "> 0",
1006                   ".*multiversion.*", "= 0"},
1007         phase = CompilePhase.PRINT_IDEAL,
1008         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
1009         applyIfPlatform = {"64-bit", "true"},
1010         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1011     // Cyclic dependency with distance 6 -> split into 4-packs
1012     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1013                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1014                   IRNode.STORE_VECTOR, "> 0",
1015                   ".*multiversion.*", "> 0"},
1016         phase = CompilePhase.PRINT_IDEAL,
1017         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
1018         applyIfPlatform = {"64-bit", "true"},
1019         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1020     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
1021     static Object[] test4e_alias(short[] a, short[] b) {
1022         for (int i = 0; i < RANGE-64; i++) {
1023           b[i+6] = a[i+0];
1024         }
1025         return new Object[]{ a, b };
1026     }
1027 
1028     @Test
1029     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1030                   IRNode.STORE_VECTOR, "> 0",
1031                   ".*multiversion.*", "= 0"},
1032         phase = CompilePhase.PRINT_IDEAL,
1033         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
1034         applyIfPlatform = {"64-bit", "true"},
1035         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1036     // Cyclic dependency with distance 7 -> split into 4-packs
1037     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1038                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1039                   IRNode.STORE_VECTOR, "> 0",
1040                   ".*multiversion.*", "> 0"},
1041         phase = CompilePhase.PRINT_IDEAL,
1042         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
1043         applyIfPlatform = {"64-bit", "true"},
1044         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1045     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
1046     static Object[] test4f_alias(short[] a, short[] b) {
1047         for (int i = 0; i < RANGE-64; i++) {
1048           b[i+7] = a[i+0];
1049         }
1050         return new Object[]{ a, b };
1051     }
1052 
1053     @Test
1054     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1055                   IRNode.STORE_VECTOR, "> 0",
1056                   ".*multiversion.*", "= 0"},
1057         phase = CompilePhase.PRINT_IDEAL,
1058         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
1059         applyIfPlatform = {"64-bit", "true"},
1060         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1061     // Cyclic dependency with distance 8 -> split into 8-packs
1062     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
1063                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1064                   IRNode.STORE_VECTOR, "> 0",
1065                   ".*multiversion.*", "> 0"},
1066         phase = CompilePhase.PRINT_IDEAL,
1067         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
1068         applyIfPlatform = {"64-bit", "true"},
1069         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1070     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
1071     static Object[] test4g_alias(short[] a, short[] b) {
1072         for (int i = 0; i < RANGE-64; i++) {
1073           b[i+8] = a[i+0];
1074         }
1075         return new Object[]{ a, b };
1076     }
1077 
1078     @Test
1079     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
1080                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1081                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1082                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_2, "> 0",
1083                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_8, "> 0",
1084                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_4, "> 0",
1085                   IRNode.STORE_VECTOR, "> 0"},
1086         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1087         applyIfPlatform = {"64-bit", "true"},
1088         applyIfCPUFeature = {"sse4.1", "true"})
1089     // aarch64 limits minimum vector size to 8B, thus a vector size of
1090     // length 2 for type "short" will not be generated
1091     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
1092                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
1093                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_8, "> 0",
1094                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_4, "> 0",
1095                   IRNode.STORE_VECTOR, "> 0"},
1096         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1097         applyIfPlatform = {"64-bit", "true"},
1098         applyIfCPUFeature = {"sve", "true"})
1099     // Split pack into power-of-2 sizes
1100     static Object[] test5a(short[] a, short[] b, short val) {
1101         for (int i = 0; i < RANGE; i+=16) {
1102             b[i+ 0] = (short)(a[i+ 0] + val); // 8 pack
1103             b[i+ 1] = (short)(a[i+ 1] + val);
1104             b[i+ 2] = (short)(a[i+ 2] + val);
1105             b[i+ 3] = (short)(a[i+ 3] + val);
1106             b[i+ 4] = (short)(a[i+ 4] + val);
1107             b[i+ 5] = (short)(a[i+ 5] + val);
1108             b[i+ 6] = (short)(a[i+ 6] + val);
1109             b[i+ 7] = (short)(a[i+ 7] + val);
1110 
1111             b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
1112             b[i+ 9] = (short)(a[i+ 9] + val);
1113             b[i+10] = (short)(a[i+10] + val);
1114             b[i+11] = (short)(a[i+11] + val);
1115 
1116             b[i+12] = (short)(a[i+12] + val); // 2-pack
1117             b[i+13] = (short)(a[i+13] + val);
1118 
1119             b[i+14] = (short)(a[i+14] + val);
1120         }
1121         return new Object[]{ a, b };
1122     }
1123 
1124     @Test
1125     @IR(counts = {IRNode.LOAD_VECTOR_I,   IRNode.VECTOR_SIZE_4, "> 0",
1126                   IRNode.MUL_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1127                   IRNode.AND_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1128                   IRNode.ADD_VI,          IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1129                   IRNode.ADD_REDUCTION_V,                       "> 0"},
1130         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1131         applyIfPlatform = {"64-bit", "true"},
1132         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1133     @IR(counts = {IRNode.LOAD_VECTOR_I,   IRNode.VECTOR_SIZE_4, "> 0",
1134                   IRNode.MUL_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1135                   IRNode.AND_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1136                   IRNode.ADD_VI,          IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1137                   IRNode.ADD_REDUCTION_V,                       "> 0"},
1138         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
1139         applyIfPlatform = {"64-bit", "true"},
1140         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1141     // Split packs including reductions
1142     static Object[] test6a(int[] a, int[] b) {
1143         int s = 0;
1144         for (int i = 0; i < RANGE; i+=8) {
1145             s += a[i+0] * b[i+0];
1146             s += a[i+1] * b[i+1];
1147             s += a[i+2] * b[i+2];
1148             s += a[i+3] * b[i+3];
1149 
1150             s += a[i+4] & b[i+4];
1151             s += a[i+5] & b[i+5];
1152             s += a[i+6] & b[i+6];
1153             s += a[i+7] & b[i+7];
1154             // With AlignVector, we need 8-byte alignment of vector loads/stores.
1155             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
1156             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
1157             // -> vectorize                                  -> no vectorization
1158         }
1159         return new Object[]{ a, b, new int[]{ s } };
1160     }
1161 
1162     @Test
1163     @IR(counts = {IRNode.LOAD_VECTOR_I,  "> 0",
1164                   IRNode.MUL_VI,         "> 0",
1165                   IRNode.POPULATE_INDEX, "> 0"},
1166         applyIfPlatform = {"64-bit", "true"},
1167         applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1168     // Index Populate:
1169     // There can be an issue when all the (iv + 1), (iv + 2), ...
1170     // get packed, but not (iv). Then we have a pack that is one element
1171     // too short, and we start splitting everything in a bad way.
1172     static Object[] test7a(int[] a, int[] b) {
1173         for (int i = 0; i < RANGE; i++) {
1174             a[i] = b[i] * i;
1175         }
1176         return new Object[]{ a, b };
1177     }
1178 }