1 /*
   2  * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package compiler.loopopts.superword;
  25 
  26 import compiler.lib.ir_framework.*;
  27 import jdk.test.lib.Utils;
  28 import jdk.test.whitebox.WhiteBox;
  29 import java.lang.reflect.Array;
  30 import java.util.Map;
  31 import java.util.HashMap;
  32 import java.util.Random;
  33 import java.nio.ByteOrder;
  34 
  35 /*
  36  * @test
  37  * @bug 8326139 8348659
  38  * @summary Test splitting packs in SuperWord
  39  * @library /test/lib /
  40  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_ySAC
  41  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_ySAC
  42  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_ySAC
  43  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_ySAC
  44  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_nAV_nSAC
  45  * @run driver compiler.loopopts.superword.TestSplitPacks nCOH_yAV_nSAC
  46  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_nAV_nSAC
  47  * @run driver compiler.loopopts.superword.TestSplitPacks yCOH_yAV_nSAC
  48  */
  49 
  50 public class TestSplitPacks {
  51     static int RANGE = 1024*8;
  52     static int RANGE_FINAL = 1024*8;
  53     private static final Random RANDOM = Utils.getRandomInstance();
  54 
  55     // Inputs
  56     byte[] aB;
  57     byte[] bB;
  58     byte mB = (byte)31;
  59     short[] aS;
  60     short[] bS;
  61     short mS = (short)0xF0F0;
  62     int[] aI;
  63     int[] bI;
  64     int mI = 0xF0F0F0F0;
  65     long[] aL;
  66     long[] bL;
  67     long mL = 0xF0F0F0F0F0F0F0F0L;
  68 
  69     // List of tests
  70     Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
  71 
  72     // List of gold, the results from the first run before compilation
  73     Map<String,Object[]> golds = new HashMap<String,Object[]>();
  74 
  75     interface TestFunction {
  76         Object[] run();
  77     }
  78 
  79     public static void main(String[] args) {
  80         TestFramework framework = new TestFramework(TestSplitPacks.class);
  81         framework.addFlags("-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=1000");
  82         switch (args[0]) {
  83             case "nCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  84             case "nCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  85             case "yCOH_nAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  86             case "yCOH_yAV_ySAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+UseAutoVectorizationSpeculativeAliasingChecks"); }
  87             case "nCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  88             case "nCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  89             case "yCOH_nAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  90             case "yCOH_yAV_nSAC" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:-UseAutoVectorizationSpeculativeAliasingChecks"); }
  91             default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
  92         };
  93         framework.start();
  94     }
  95 
  96     public TestSplitPacks() {
  97         // Generate input once
  98         aB = generateB();
  99         bB = generateB();
 100         aS = generateS();
 101         bS = generateS();
 102         aI = generateI();
 103         bI = generateI();
 104         aL = generateL();
 105         bL = generateL();
 106 
 107         // Add all tests to list
 108         tests.put("test0",       () -> { return test0(aI.clone(), bI.clone(), mI); });
 109         tests.put("test1a",      () -> { return test1a(aI.clone(), bI.clone(), mI); });
 110         tests.put("test1b",      () -> { return test1b(aI.clone(), bI.clone(), mI); });
 111         tests.put("test1c",      () -> { return test1c(aI.clone(), bI.clone(), mI); });
 112         tests.put("test1d",      () -> { return test1d(aI.clone(), bI.clone(), mI); });
 113         tests.put("test2a",      () -> { return test2a(aI.clone(), bI.clone(), mI); });
 114         tests.put("test2b",      () -> { return test2b(aI.clone(), bI.clone(), mI); });
 115         tests.put("test2c",      () -> { return test2c(aI.clone(), bI.clone(), mI); });
 116         tests.put("test2d",      () -> { return test2d(aI.clone(), bI.clone(), mI); });
 117         tests.put("test3a",      () -> { return test3a(aS.clone(), bS.clone(), mS); });
 118         tests.put("test4a",      () -> { return test4a(aS.clone(), bS.clone()); });
 119         tests.put("test4b",      () -> { return test4b(aS.clone(), bS.clone()); });
 120         tests.put("test4c",      () -> { return test4c(aS.clone(), bS.clone()); });
 121         tests.put("test4d",      () -> { return test4d(aS.clone(), bS.clone()); });
 122         tests.put("test4e",      () -> { return test4e(aS.clone(), bS.clone()); });
 123         tests.put("test4f",      () -> { return test4f(aS.clone(), bS.clone()); });
 124         tests.put("test4g",      () -> { return test4g(aS.clone(), bS.clone()); });
 125         tests.put("test4a_alias",() -> { short[] x = aS.clone(); return test4a_alias(x, x); });
 126         tests.put("test4b_alias",() -> { short[] x = aS.clone(); return test4b_alias(x, x); });
 127         tests.put("test4c_alias",() -> { short[] x = aS.clone(); return test4c_alias(x, x); });
 128         tests.put("test4d_alias",() -> { short[] x = aS.clone(); return test4d_alias(x, x); });
 129         tests.put("test4e_alias",() -> { short[] x = aS.clone(); return test4e_alias(x, x); });
 130         tests.put("test4f_alias",() -> { short[] x = aS.clone(); return test4f_alias(x, x); });
 131         tests.put("test4g_alias",() -> { short[] x = aS.clone(); return test4g_alias(x, x); });
 132         tests.put("test5a",      () -> { return test5a(aS.clone(), bS.clone(), mS); });
 133         tests.put("test6a",      () -> { return test6a(aI.clone(), bI.clone()); });
 134         tests.put("test7a",      () -> { return test7a(aI.clone(), bI.clone()); });
 135 
 136         // Compute gold value for all test methods before compilation
 137         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 138             String name = entry.getKey();
 139             TestFunction test = entry.getValue();
 140             Object[] gold = test.run();
 141             golds.put(name, gold);
 142         }
 143     }
 144 
 145     @Warmup(100)
 146     @Run(test = {"test0",
 147                  "test1a",
 148                  "test1b",
 149                  "test1c",
 150                  "test1d",
 151                  "test2a",
 152                  "test2b",
 153                  "test2c",
 154                  "test2d",
 155                  "test3a",
 156                  "test4a",
 157                  "test4b",
 158                  "test4c",
 159                  "test4d",
 160                  "test4e",
 161                  "test4f",
 162                  "test4g",
 163                  "test4a_alias",
 164                  "test4b_alias",
 165                  "test4c_alias",
 166                  "test4d_alias",
 167                  "test4e_alias",
 168                  "test4f_alias",
 169                  "test4g_alias",
 170                  "test5a",
 171                  "test6a",
 172                  "test7a"})
 173     public void runTests() {
 174         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 175             String name = entry.getKey();
 176             TestFunction test = entry.getValue();
 177             // Recall gold value from before compilation
 178             Object[] gold = golds.get(name);
 179             // Compute new result
 180             Object[] result = test.run();
 181             // Compare gold and new result
 182             verify(name, gold, result);
 183         }
 184     }
 185 
 186     static byte[] generateB() {
 187         byte[] a = new byte[RANGE];
 188         for (int i = 0; i < a.length; i++) {
 189             a[i] = (byte)RANDOM.nextInt();
 190         }
 191         return a;
 192     }
 193 
 194     static short[] generateS() {
 195         short[] a = new short[RANGE];
 196         for (int i = 0; i < a.length; i++) {
 197             a[i] = (short)RANDOM.nextInt();
 198         }
 199         return a;
 200     }
 201 
 202     static int[] generateI() {
 203         int[] a = new int[RANGE];
 204         for (int i = 0; i < a.length; i++) {
 205             a[i] = RANDOM.nextInt();
 206         }
 207         return a;
 208     }
 209 
 210     static long[] generateL() {
 211         long[] a = new long[RANGE];
 212         for (int i = 0; i < a.length; i++) {
 213             a[i] = RANDOM.nextLong();
 214         }
 215         return a;
 216     }
 217 
 218     static void verify(String name, Object[] gold, Object[] result) {
 219         if (gold.length != result.length) {
 220             throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
 221                                        gold.length + ", result.length = " + result.length);
 222         }
 223         for (int i = 0; i < gold.length; i++) {
 224             Object g = gold[i];
 225             Object r = result[i];
 226             if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
 227                 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
 228                                            " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 229                                            " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 230             }
 231             if (g == r) {
 232                 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
 233                                            " gold[" + i + "] == result[" + i + "]");
 234             }
 235             if (Array.getLength(g) != Array.getLength(r)) {
 236                     throw new RuntimeException("verify " + name + ": arrays must have same length:" +
 237                                            " gold[" + i + "].length = " + Array.getLength(g) +
 238                                            " result[" + i + "].length = " + Array.getLength(r));
 239             }
 240             Class c = g.getClass().getComponentType();
 241             if (c == byte.class) {
 242                 verifyB(name, i, (byte[])g, (byte[])r);
 243             } else if (c == short.class) {
 244                 verifyS(name, i, (short[])g, (short[])r);
 245             } else if (c == int.class) {
 246                 verifyI(name, i, (int[])g, (int[])r);
 247             } else if (c == long.class) {
 248                 verifyL(name, i, (long[])g, (long[])r);
 249             } else {
 250                 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
 251                                        " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 252                                        " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 253             }
 254         }
 255     }
 256 
 257     static void verifyB(String name, int i, byte[] g, byte[] r) {
 258         for (int j = 0; j < g.length; j++) {
 259             if (g[j] != r[j]) {
 260                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 261                                            " gold[" + i + "][" + j + "] = " + g[j] +
 262                                            " result[" + i + "][" + j + "] = " + r[j]);
 263             }
 264         }
 265     }
 266 
 267     static void verifyS(String name, int i, short[] g, short[] r) {
 268         for (int j = 0; j < g.length; j++) {
 269             if (g[j] != r[j]) {
 270                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 271                                            " gold[" + i + "][" + j + "] = " + g[j] +
 272                                            " result[" + i + "][" + j + "] = " + r[j]);
 273             }
 274         }
 275     }
 276 
 277     static void verifyI(String name, int i, int[] g, int[] r) {
 278         for (int j = 0; j < g.length; j++) {
 279             if (g[j] != r[j]) {
 280                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 281                                            " gold[" + i + "][" + j + "] = " + g[j] +
 282                                            " result[" + i + "][" + j + "] = " + r[j]);
 283             }
 284         }
 285     }
 286 
 287     static void verifyL(String name, int i, long[] g, long[] r) {
 288         for (int j = 0; j < g.length; j++) {
 289             if (g[j] != r[j]) {
 290                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 291                                            " gold[" + i + "][" + j + "] = " + g[j] +
 292                                            " result[" + i + "][" + j + "] = " + r[j]);
 293             }
 294         }
 295     }
 296 
 297     @Test
 298     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 299                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 300                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 301                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 302                   IRNode.STORE_VECTOR, "> 0"},
 303         applyIf = {"MaxVectorSize", ">=32"},
 304         applyIfPlatform = {"64-bit", "true"},
 305         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 306     // Load and store are already split
 307     //
 308     //  0 1 - - 4 5 6 7
 309     //  | |     | | | |
 310     //  0 1 - - 4 5 6 7
 311     static Object[] test0(int[] a, int[] b, int mask) {
 312         for (int i = 0; i < RANGE; i+=8) {
 313             int b0 = a[i+0] & mask;
 314             int b1 = a[i+1] & mask;
 315 
 316             int b4 = a[i+4] & mask;
 317             int b5 = a[i+5] & mask;
 318             int b6 = a[i+6] & mask;
 319             int b7 = a[i+7] & mask;
 320 
 321             b[i+0] = b0;
 322             b[i+1] = b1;
 323 
 324             b[i+4] = b4;
 325             b[i+5] = b5;
 326             b[i+6] = b6;
 327             b[i+7] = b7;
 328         }
 329         return new Object[]{ a, b };
 330     }
 331 
 332     @Test
 333     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 334                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 335                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 336                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 337                   IRNode.STORE_VECTOR, "> 0"},
 338         applyIf = {"MaxVectorSize", ">=32"},
 339         applyIfPlatform = {"64-bit", "true"},
 340         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 341     // Adjacent Load and Store, but split by Add/Mul
 342     static Object[] test1a(int[] a, int[] b, int mask) {
 343         for (int i = 0; i < RANGE; i+=8) {
 344             b[i+0] = a[i+0] + mask; // Add
 345             b[i+1] = a[i+1] + mask;
 346             b[i+2] = a[i+2] + mask;
 347             b[i+3] = a[i+3] + mask;
 348 
 349             b[i+4] = a[i+4] * mask; // Mul
 350             b[i+5] = a[i+5] * mask;
 351         }
 352         return new Object[]{ a, b };
 353     }
 354 
 355     @Test
 356     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 357                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 358                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 359                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 360                   IRNode.STORE_VECTOR, "> 0"},
 361         applyIf = {"MaxVectorSize", ">=32"},
 362         applyIfPlatform = {"64-bit", "true"},
 363         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 364     // Adjacent Load and Store, but split by Add/Mul
 365     static Object[] test1b(int[] a, int[] b, int mask) {
 366         for (int i = 0; i < RANGE; i+=8) {
 367             b[i+0] = a[i+0] * mask; // Mul
 368             b[i+1] = a[i+1] * mask;
 369             b[i+2] = a[i+2] * mask;
 370             b[i+3] = a[i+3] * mask;
 371 
 372             b[i+4] = a[i+4] + mask; // Add
 373             b[i+5] = a[i+5] + mask;
 374         }
 375         return new Object[]{ a, b };
 376     }
 377 
 378     @Test
 379     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 380                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 381                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 382                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 383                   IRNode.STORE_VECTOR, "> 0"},
 384         applyIf = {"MaxVectorSize", ">=32"},
 385         applyIfPlatform = {"64-bit", "true"},
 386         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 387     // Adjacent Load and Store, but split by Add/Mul
 388     static Object[] test1c(int[] a, int[] b, int mask) {
 389         for (int i = 0; i < RANGE; i+=8) {
 390             b[i+0] = a[i+0] + mask; // Add
 391             b[i+1] = a[i+1] + mask;
 392 
 393             b[i+2] = a[i+2] * mask; // Mul
 394             b[i+3] = a[i+3] * mask;
 395             b[i+4] = a[i+4] * mask;
 396             b[i+5] = a[i+5] * mask;
 397         }
 398         return new Object[]{ a, b };
 399     }
 400 
 401     @Test
 402     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 403                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 404                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 405                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 406                   IRNode.STORE_VECTOR, "> 0"},
 407         applyIf = {"MaxVectorSize", ">=32"},
 408         applyIfPlatform = {"64-bit", "true"},
 409         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 410     // Adjacent Load and Store, but split by Add/Mul
 411     static Object[] test1d(int[] a, int[] b, int mask) {
 412         for (int i = 0; i < RANGE; i+=8) {
 413             b[i+0] = a[i+0] * mask; // Mul
 414             b[i+1] = a[i+1] * mask;
 415 
 416             b[i+2] = a[i+2] + mask; // Add
 417             b[i+3] = a[i+3] + mask;
 418             b[i+4] = a[i+4] + mask;
 419             b[i+5] = a[i+5] + mask;
 420         }
 421         return new Object[]{ a, b };
 422     }
 423 
 424     @Test
 425     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 426                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 427                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 428                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 429                   IRNode.STORE_VECTOR, "> 0"},
 430         applyIf = {"MaxVectorSize", ">=32"},
 431         applyIfPlatform = {"64-bit", "true"},
 432         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 433     // Split the load
 434     //
 435     //  0 1 2 3 4 5 - -
 436     //  | |  \ \ \ \
 437     //  | |   \ \ \ \
 438     //  | |    \ \ \ \
 439     //  0 1 - - 4 5 6 7
 440     //
 441     static Object[] test2a(int[] a, int[] b, int mask) {
 442         for (int i = 0; i < RANGE; i+=8) {
 443             int b0 = a[i+0] & mask;
 444             int b1 = a[i+1] & mask;
 445             int b2 = a[i+2] & mask;
 446             int b3 = a[i+3] & mask;
 447             int b4 = a[i+4] & mask;
 448             int b5 = a[i+5] & mask;
 449 
 450             b[i+0] = b0;
 451             b[i+1] = b1;
 452 
 453             b[i+4] = b2;
 454             b[i+5] = b3;
 455             b[i+6] = b4;
 456             b[i+7] = b5;
 457         }
 458         return new Object[]{ a, b };
 459     }
 460 
 461     @Test
 462     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 463                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 464                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 465                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 466                   IRNode.STORE_VECTOR, "> 0"},
 467         applyIf = {"MaxVectorSize", ">=32"},
 468         applyIfPlatform = {"64-bit", "true"},
 469         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 470     // Split the load
 471     //
 472     //  0 1 2 3 4 5 - -
 473     //  | | | |  \ \
 474     //  | | | |   \ \
 475     //  | | | |    \ \
 476     //  0 1 2 3 -- 6 7
 477     //
 478     static Object[] test2b(int[] a, int[] b, int mask) {
 479         for (int i = 0; i < RANGE; i+=8) {
 480             int b0 = a[i+0] & mask;
 481             int b1 = a[i+1] & mask;
 482             int b2 = a[i+2] & mask;
 483             int b3 = a[i+3] & mask;
 484             int b4 = a[i+4] & mask;
 485             int b5 = a[i+5] & mask;
 486 
 487             b[i+0] = b0;
 488             b[i+1] = b1;
 489             b[i+2] = b2;
 490             b[i+3] = b3;
 491 
 492             b[i+6] = b4;
 493             b[i+7] = b5;
 494         }
 495         return new Object[]{ a, b };
 496     }
 497 
 498     @Test
 499     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 500                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 501                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 502                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 503                   IRNode.STORE_VECTOR, "> 0"},
 504         applyIf = {"MaxVectorSize", ">=32"},
 505         applyIfPlatform = {"64-bit", "true"},
 506         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 507     // Split the load
 508     //
 509     //  0 1 - - 4 5 6 7
 510     //  | |    / / / /
 511     //  | |   / / / /
 512     //  | |  / / / /
 513     //  0 1 2 3 4 5 - -
 514     //
 515     static Object[] test2c(int[] a, int[] b, int mask) {
 516         for (int i = 0; i < RANGE; i+=8) {
 517             int b0 = a[i+0] & mask;
 518             int b1 = a[i+1] & mask;
 519 
 520             int b4 = a[i+4] & mask;
 521             int b5 = a[i+5] & mask;
 522             int b6 = a[i+6] & mask;
 523             int b7 = a[i+7] & mask;
 524 
 525             b[i+0] = b0;
 526             b[i+1] = b1;
 527             b[i+2] = b4;
 528             b[i+3] = b5;
 529             b[i+4] = b6;
 530             b[i+5] = b7;
 531         }
 532         return new Object[]{ a, b };
 533     }
 534 
 535     @Test
 536     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 537                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 538                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 539                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 540                   IRNode.STORE_VECTOR, "> 0"},
 541         applyIf = {"MaxVectorSize", ">=32"},
 542         applyIfPlatform = {"64-bit", "true"},
 543         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 544     // Split the load
 545     //
 546     //  0 1 2 3 - - 6 7
 547     //  | | | |    / /
 548     //  | | | |   / /
 549     //  | | | |  / /
 550     //  0 1 2 3 4 5 - -
 551     //
 552     static Object[] test2d(int[] a, int[] b, int mask) {
 553         for (int i = 0; i < RANGE; i+=8) {
 554             int b0 = a[i+0] & mask;
 555             int b1 = a[i+1] & mask;
 556             int b2 = a[i+2] & mask;
 557             int b3 = a[i+3] & mask;
 558 
 559             int b6 = a[i+6] & mask;
 560             int b7 = a[i+7] & mask;
 561 
 562             b[i+0] = b0;
 563             b[i+1] = b1;
 564             b[i+2] = b2;
 565             b[i+3] = b3;
 566             b[i+4] = b6;
 567             b[i+5] = b7;
 568         }
 569         return new Object[]{ a, b };
 570     }
 571 
 572     @Test
 573     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 574                   IRNode.STORE_VECTOR, "> 0"},
 575         applyIf = {"MaxVectorSize", ">=32"},
 576         applyIfPlatform = {"64-bit", "true"},
 577         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 578     // 0 1 2 3 4 5 6 7 -
 579     // | | | | | | | |
 580     // | + + + | | | |
 581     // |       | | | |
 582     // |     v | | | | v
 583     // |     | | | | | |
 584     // 1 - - 3 4 5 6 7 8
 585     static Object[] test3a(short[] a, short[] b, short val) {
 586         int sum = 0;
 587         for (int i = 0; i < RANGE; i+=16) {
 588             short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
 589 
 590             short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
 591             short a2 = a[i+2];
 592             short a3 = a[i+3];
 593 
 594             short a4 = a[i+4]; // 4-pack
 595             short a5 = a[i+5];
 596             short a6 = a[i+6];
 597             short a7 = a[i+7];
 598 
 599 
 600             b[i+0] = a0; // required for alignment / offsets, technical limitation.
 601 
 602             sum += a1 + a2 + a3; // not packed
 603 
 604             b[i+3] = val; // adjacent to 4-pack but needs to be split off
 605 
 606             b[i+4] = a4; // 4-pack
 607             b[i+5] = a5;
 608             b[i+6] = a6;
 609             b[i+7] = a7;
 610 
 611             b[i+8] = val; // adjacent to 4-pack but needs to be split off
 612         }
 613         return new Object[]{ a, b, new int[]{ sum } };
 614     }
 615 
 616     @Test
 617     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 618                   IRNode.STORE_VECTOR, "> 0",
 619                   ".*multiversion.*", "= 0"},
 620         phase = CompilePhase.PRINT_IDEAL,
 621         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 622         applyIfPlatform = {"64-bit", "true"},
 623         applyIfCPUFeatureOr = {"sse4.1", "true"})
 624     // Cyclic dependency with distance 2 -> split into 2-packs
 625     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 626                   IRNode.STORE_VECTOR, "> 0",
 627                   ".*multiversion.*", "= 0"},
 628         phase = CompilePhase.PRINT_IDEAL,
 629         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
 630         applyIfPlatform = {"64-bit", "true"},
 631         applyIfCPUFeatureOr = {"sse4.1", "true"})
 632     // Speculative aliasing check -> full vectorization.
 633     static Object[] test4a(short[] a, short[] b) {
 634         for (int i = 0; i < RANGE-64; i++) {
 635           b[i+2] = a[i+0];
 636         }
 637         return new Object[]{ a, b };
 638     }
 639 
 640     @Test
 641     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 642                   IRNode.STORE_VECTOR, "> 0",
 643                   ".*multiversion.*", "= 0"},
 644         phase = CompilePhase.PRINT_IDEAL,
 645         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 646         applyIfPlatform = {"64-bit", "true"},
 647         applyIfCPUFeatureOr = {"sse4.1", "true"})
 648     // Cyclic dependency with distance 3 -> split into 2-packs
 649     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 650                   IRNode.STORE_VECTOR, "> 0",
 651                   ".*multiversion.*", "= 0"},
 652         phase = CompilePhase.PRINT_IDEAL,
 653         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 654         applyIfPlatform = {"64-bit", "true"},
 655         applyIfCPUFeatureOr = {"sse4.1", "true"})
 656     // Speculative aliasing check -> full vectorization.
 657     static Object[] test4b(short[] a, short[] b) {
 658         for (int i = 0; i < RANGE-64; i++) {
 659           b[i+3] = a[i+0];
 660         }
 661         return new Object[]{ a, b };
 662     }
 663 
 664     @Test
 665     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 666                   IRNode.STORE_VECTOR, "> 0",
 667                   ".*multiversion.*", "= 0"},
 668         phase = CompilePhase.PRINT_IDEAL,
 669         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 670         applyIfPlatform = {"64-bit", "true"},
 671         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 672     // Cyclic dependency with distance 4 -> split into 4-packs
 673     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 674                   IRNode.STORE_VECTOR, "> 0",
 675                   ".*multiversion.*", "= 0"},
 676         phase = CompilePhase.PRINT_IDEAL,
 677         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 678         applyIfPlatform = {"64-bit", "true"},
 679         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 680     // Speculative aliasing check -> full vectorization.
 681     static Object[] test4c(short[] a, short[] b) {
 682         for (int i = 0; i < RANGE-64; i++) {
 683           b[i+4] = a[i+0];
 684         }
 685         return new Object[]{ a, b };
 686     }
 687 
 688     @Test
 689     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 690                   IRNode.STORE_VECTOR, "> 0",
 691                   ".*multiversion.*", "= 0"},
 692         phase = CompilePhase.PRINT_IDEAL,
 693         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 694         applyIfPlatform = {"64-bit", "true"},
 695         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 696     // Cyclic dependency with distance 5 -> split into 4-packs
 697     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 698                   IRNode.STORE_VECTOR, "> 0",
 699                   ".*multiversion.*", "= 0"},
 700         phase = CompilePhase.PRINT_IDEAL,
 701         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 702         applyIfPlatform = {"64-bit", "true"},
 703         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 704     // Speculative aliasing check -> full vectorization.
 705     static Object[] test4d(short[] a, short[] b) {
 706         for (int i = 0; i < RANGE-64; i++) {
 707           b[i+5] = a[i+0];
 708         }
 709         return new Object[]{ a, b };
 710     }
 711 
 712     @Test
 713     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 714                   IRNode.STORE_VECTOR, "> 0",
 715                   ".*multiversion.*", "= 0"},
 716         phase = CompilePhase.PRINT_IDEAL,
 717         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 718         applyIfPlatform = {"64-bit", "true"},
 719         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 720     // Cyclic dependency with distance 6 -> split into 4-packs
 721     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 722                   IRNode.STORE_VECTOR, "> 0",
 723                   ".*multiversion.*", "= 0"},
 724         phase = CompilePhase.PRINT_IDEAL,
 725         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 726         applyIfPlatform = {"64-bit", "true"},
 727         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 728     // Speculative aliasing check -> full vectorization.
 729     static Object[] test4e(short[] a, short[] b) {
 730         for (int i = 0; i < RANGE-64; i++) {
 731           b[i+6] = a[i+0];
 732         }
 733         return new Object[]{ a, b };
 734     }
 735 
 736     @Test
 737     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 738                   IRNode.STORE_VECTOR, "> 0",
 739                   ".*multiversion.*", "= 0"},
 740         phase = CompilePhase.PRINT_IDEAL,
 741         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 742         applyIfPlatform = {"64-bit", "true"},
 743         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 744     // Cyclic dependency with distance 7 -> split into 4-packs
 745     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 746                   IRNode.STORE_VECTOR, "> 0",
 747                   ".*multiversion.*", "= 0"},
 748         phase = CompilePhase.PRINT_IDEAL,
 749         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 750         applyIfPlatform = {"64-bit", "true"},
 751         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 752     // Speculative aliasing check -> full vectorization.
 753     static Object[] test4f(short[] a, short[] b) {
 754         for (int i = 0; i < RANGE-64; i++) {
 755           b[i+7] = a[i+0];
 756         }
 757         return new Object[]{ a, b };
 758     }
 759 
 760     @Test
 761     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
 762                   IRNode.STORE_VECTOR, "> 0",
 763                   ".*multiversion.*", "= 0"},
 764         phase = CompilePhase.PRINT_IDEAL,
 765         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 766         applyIfPlatform = {"64-bit", "true"},
 767         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 768     // Cyclic dependency with distance 8 -> split into 8-packs
 769     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 770                   IRNode.STORE_VECTOR, "> 0",
 771                   ".*multiversion.*", "= 0"},
 772         phase = CompilePhase.PRINT_IDEAL,
 773         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 774         applyIfPlatform = {"64-bit", "true"},
 775         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 776     // Speculative aliasing check -> full vectorization.
 777     static Object[] test4g(short[] a, short[] b) {
 778         for (int i = 0; i < RANGE-64; i++) {
 779           b[i+8] = a[i+0];
 780         }
 781         return new Object[]{ a, b };
 782     }
 783 
 784     @Test
 785     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 786                   IRNode.STORE_VECTOR, "> 0",
 787                   ".*multiversion.*", "= 0"},
 788         phase = CompilePhase.PRINT_IDEAL,
 789         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 790         applyIfPlatform = {"64-bit", "true"},
 791         applyIfCPUFeatureOr = {"sse4.1", "true"})
 792     // Cyclic dependency with distance 2 -> split into 2-packs
 793     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 794                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 795                   IRNode.STORE_VECTOR, "> 0",
 796                   ".*multiversion.*", "> 0"},
 797         phase = CompilePhase.PRINT_IDEAL,
 798         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
 799         applyIfPlatform = {"64-bit", "true"},
 800         applyIfCPUFeatureOr = {"sse4.1", "true"})
 801     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 802     static Object[] test4a_alias(short[] a, short[] b) {
 803         for (int i = 0; i < RANGE-64; i++) {
 804           b[i+2] = a[i+0];
 805         }
 806         return new Object[]{ a, b };
 807     }
 808 
 809     @Test
 810     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 811                   IRNode.STORE_VECTOR, "> 0",
 812                   ".*multiversion.*", "= 0"},
 813         phase = CompilePhase.PRINT_IDEAL,
 814         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 815         applyIfPlatform = {"64-bit", "true"},
 816         applyIfCPUFeatureOr = {"sse4.1", "true"})
 817     // Cyclic dependency with distance 3 -> split into 2-packs
 818     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 819                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 820                   IRNode.STORE_VECTOR, "> 0",
 821                   ".*multiversion.*", "> 0"},
 822         phase = CompilePhase.PRINT_IDEAL,
 823         applyIfAnd = {"AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 824         applyIfPlatform = {"64-bit", "true"},
 825         applyIfCPUFeatureOr = {"sse4.1", "true"})
 826     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 827     static Object[] test4b_alias(short[] a, short[] b) {
 828         for (int i = 0; i < RANGE-64; i++) {
 829           b[i+3] = a[i+0];
 830         }
 831         return new Object[]{ a, b };
 832     }
 833 
 834     @Test
 835     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 836                   IRNode.STORE_VECTOR, "> 0",
 837                   ".*multiversion.*", "= 0"},
 838         phase = CompilePhase.PRINT_IDEAL,
 839         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 840         applyIfPlatform = {"64-bit", "true"},
 841         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 842     // Cyclic dependency with distance 4 -> split into 4-packs
 843     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 844                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 845                   IRNode.STORE_VECTOR, "> 0",
 846                   ".*multiversion.*", "> 0"},
 847         phase = CompilePhase.PRINT_IDEAL,
 848         applyIfAnd = {"MaxVectorSize", ">=8", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 849         applyIfPlatform = {"64-bit", "true"},
 850         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 851     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 852     static Object[] test4c_alias(short[] a, short[] b) {
 853         for (int i = 0; i < RANGE-64; i++) {
 854           b[i+4] = a[i+0];
 855         }
 856         return new Object[]{ a, b };
 857     }
 858 
 859     @Test
 860     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 861                   IRNode.STORE_VECTOR, "> 0",
 862                   ".*multiversion.*", "= 0"},
 863         phase = CompilePhase.PRINT_IDEAL,
 864         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 865         applyIfPlatform = {"64-bit", "true"},
 866         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 867     // Cyclic dependency with distance 5 -> split into 4-packs
 868     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 869                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 870                   IRNode.STORE_VECTOR, "> 0",
 871                   ".*multiversion.*", "> 0"},
 872         phase = CompilePhase.PRINT_IDEAL,
 873         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 874         applyIfPlatform = {"64-bit", "true"},
 875         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 876     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 877     static Object[] test4d_alias(short[] a, short[] b) {
 878         for (int i = 0; i < RANGE-64; i++) {
 879           b[i+5] = a[i+0];
 880         }
 881         return new Object[]{ a, b };
 882     }
 883 
 884     @Test
 885     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 886                   IRNode.STORE_VECTOR, "> 0",
 887                   ".*multiversion.*", "= 0"},
 888         phase = CompilePhase.PRINT_IDEAL,
 889         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 890         applyIfPlatform = {"64-bit", "true"},
 891         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 892     // Cyclic dependency with distance 6 -> split into 4-packs
 893     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 894                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 895                   IRNode.STORE_VECTOR, "> 0",
 896                   ".*multiversion.*", "> 0"},
 897         phase = CompilePhase.PRINT_IDEAL,
 898         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 899         applyIfPlatform = {"64-bit", "true"},
 900         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 901     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 902     static Object[] test4e_alias(short[] a, short[] b) {
 903         for (int i = 0; i < RANGE-64; i++) {
 904           b[i+6] = a[i+0];
 905         }
 906         return new Object[]{ a, b };
 907     }
 908 
 909     @Test
 910     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 911                   IRNode.STORE_VECTOR, "> 0",
 912                   ".*multiversion.*", "= 0"},
 913         phase = CompilePhase.PRINT_IDEAL,
 914         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 915         applyIfPlatform = {"64-bit", "true"},
 916         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 917     // Cyclic dependency with distance 7 -> split into 4-packs
 918     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 919                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 920                   IRNode.STORE_VECTOR, "> 0",
 921                   ".*multiversion.*", "> 0"},
 922         phase = CompilePhase.PRINT_IDEAL,
 923         applyIfAnd = {"MaxVectorSize", ">=8", "AlignVector", "false", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 924         applyIfPlatform = {"64-bit", "true"},
 925         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 926     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 927     static Object[] test4f_alias(short[] a, short[] b) {
 928         for (int i = 0; i < RANGE-64; i++) {
 929           b[i+7] = a[i+0];
 930         }
 931         return new Object[]{ a, b };
 932     }
 933 
 934     @Test
 935     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
 936                   IRNode.STORE_VECTOR, "> 0",
 937                   ".*multiversion.*", "= 0"},
 938         phase = CompilePhase.PRINT_IDEAL,
 939         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 940         applyIfPlatform = {"64-bit", "true"},
 941         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 942     // Cyclic dependency with distance 8 -> split into 8-packs
 943     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 944                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
 945                   IRNode.STORE_VECTOR, "> 0",
 946                   ".*multiversion.*", "> 0"},
 947         phase = CompilePhase.PRINT_IDEAL,
 948         applyIfAnd = {"MaxVectorSize", ">=32", "UseAutoVectorizationSpeculativeAliasingChecks", "true"},
 949         applyIfPlatform = {"64-bit", "true"},
 950         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 951     // Speculative aliasing check with multiversioning -> full vectorization & split packs.
 952     static Object[] test4g_alias(short[] a, short[] b) {
 953         for (int i = 0; i < RANGE-64; i++) {
 954           b[i+8] = a[i+0];
 955         }
 956         return new Object[]{ a, b };
 957     }
 958 
 959     @Test
 960     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 961                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 962                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
 963                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_2, "> 0",
 964                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_8, "> 0",
 965                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_4, "> 0",
 966                   IRNode.STORE_VECTOR, "> 0"},
 967         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 968         applyIfPlatform = {"64-bit", "true"},
 969         applyIfCPUFeature = {"sse4.1", "true"})
 970     // aarch64 limits minimum vector size to 8B, thus a vector size of
 971     // length 2 for type "short" will not be generated
 972     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 973                   IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_8, "> 0",
 974                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_8, "> 0",
 975                   IRNode.ADD_VS,        IRNode.VECTOR_SIZE_4, "> 0",
 976                   IRNode.STORE_VECTOR, "> 0"},
 977         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 978         applyIfPlatform = {"64-bit", "true"},
 979         applyIfCPUFeature = {"sve", "true"})
 980     // Split pack into power-of-2 sizes
 981     static Object[] test5a(short[] a, short[] b, short val) {
 982         for (int i = 0; i < RANGE; i+=16) {
 983             b[i+ 0] = (short)(a[i+ 0] + val); // 8 pack
 984             b[i+ 1] = (short)(a[i+ 1] + val);
 985             b[i+ 2] = (short)(a[i+ 2] + val);
 986             b[i+ 3] = (short)(a[i+ 3] + val);
 987             b[i+ 4] = (short)(a[i+ 4] + val);
 988             b[i+ 5] = (short)(a[i+ 5] + val);
 989             b[i+ 6] = (short)(a[i+ 6] + val);
 990             b[i+ 7] = (short)(a[i+ 7] + val);
 991 
 992             b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
 993             b[i+ 9] = (short)(a[i+ 9] + val);
 994             b[i+10] = (short)(a[i+10] + val);
 995             b[i+11] = (short)(a[i+11] + val);
 996 
 997             b[i+12] = (short)(a[i+12] + val); // 2-pack
 998             b[i+13] = (short)(a[i+13] + val);
 999 
1000             b[i+14] = (short)(a[i+14] + val);
1001         }
1002         return new Object[]{ a, b };
1003     }
1004 
1005     @Test
1006     @IR(counts = {IRNode.LOAD_VECTOR_I,   IRNode.VECTOR_SIZE_4, "> 0",
1007                   IRNode.MUL_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1008                   IRNode.AND_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1009                   IRNode.ADD_VI,          IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1010                   IRNode.ADD_REDUCTION_V,                       "> 0"},
1011         applyIf = {"MaxVectorSize", ">=32"},
1012         applyIfPlatform = {"64-bit", "true"},
1013         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1014     // Split packs including reductions
1015     static Object[] test6a(int[] a, int[] b) {
1016         int s = 0;
1017         for (int i = 0; i < RANGE; i+=8) {
1018             s += a[i+0] * b[i+0];
1019             s += a[i+1] * b[i+1];
1020             s += a[i+2] * b[i+2];
1021             s += a[i+3] * b[i+3];
1022 
1023             s += a[i+4] & b[i+4];
1024             s += a[i+5] & b[i+5];
1025             s += a[i+6] & b[i+6];
1026             s += a[i+7] & b[i+7];
1027         }
1028         return new Object[]{ a, b, new int[]{ s } };
1029     }
1030 
1031     @Test
1032     @IR(counts = {IRNode.LOAD_VECTOR_I,  "> 0",
1033                   IRNode.MUL_VI,         "> 0",
1034                   IRNode.POPULATE_INDEX, "> 0"},
1035         applyIfPlatform = {"64-bit", "true"},
1036         applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1037     // Index Populate:
1038     // There can be an issue when all the (iv + 1), (iv + 2), ...
1039     // get packed, but not (iv). Then we have a pack that is one element
1040     // too short, and we start splitting everything in a bad way.
1041     static Object[] test7a(int[] a, int[] b) {
1042         for (int i = 0; i < RANGE; i++) {
1043             a[i] = b[i] * i;
1044         }
1045         return new Object[]{ a, b };
1046     }
1047 }