1 /*
   2  * Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package compiler.loopopts.superword;
  25 
  26 import compiler.lib.ir_framework.*;
  27 import jdk.test.lib.Utils;
  28 import jdk.test.whitebox.WhiteBox;
  29 import jdk.internal.misc.Unsafe;
  30 import java.lang.reflect.Array;
  31 import java.util.Map;
  32 import java.util.HashMap;
  33 import java.util.Random;
  34 import java.nio.ByteOrder;
  35 
  36 /*
  37  * @test id=NoAlignVector
  38  * @bug 8310190
  39  * @key randomness
  40  * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
  41  * @modules java.base/jdk.internal.misc
  42  * @library /test/lib /
  43  * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector
  44  */
  45 
  46 /*
  47  * @test id=AlignVector
  48  * @bug 8310190
  49  * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
  50  * @modules java.base/jdk.internal.misc
  51  * @library /test/lib /
  52  * @run driver compiler.loopopts.superword.TestAlignVector AlignVector
  53  */
  54 
  55 /*
  56  * @test id=VerifyAlignVector
  57  * @bug 8310190
  58  * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
  59  * @modules java.base/jdk.internal.misc
  60  * @library /test/lib /
  61  * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector
  62  */
  63 
  64 /*
  65  * @test id=NoAlignVector-COH
  66  * @bug 8310190
  67  * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
  68  * @modules java.base/jdk.internal.misc
  69  * @library /test/lib /
  70  * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector-COH
  71  */
  72 
  73 /*
  74  * @test id=VerifyAlignVector-COH
  75  * @bug 8310190
  76  * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
  77  * @modules java.base/jdk.internal.misc
  78  * @library /test/lib /
  79  * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector-COH
  80  */
  81 
  82 public class TestAlignVector {
  83     static int RANGE = 1024*8;
  84     static int RANGE_FINAL = 1024*8;
  85     private static final Unsafe UNSAFE = Unsafe.getUnsafe();
  86     private static final Random RANDOM = Utils.getRandomInstance();
  87 
  88     // Inputs
  89     byte[] aB;
  90     byte[] bB;
  91     byte mB = (byte)31;
  92     short[] aS;
  93     short[] bS;
  94     short mS = (short)0xF0F0;
  95     int[] aI;
  96     int[] bI;
  97     int mI = 0xF0F0F0F0;
  98     long[] aL;
  99     long[] bL;
 100     long mL = 0xF0F0F0F0F0F0F0F0L;
 101 
 102     // List of tests
 103     Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
 104 
 105     // List of gold, the results from the first run before compilation
 106     Map<String,Object[]> golds = new HashMap<String,Object[]>();
 107 
 108     interface TestFunction {
 109         Object[] run();
 110     }
 111 
 112     public static void main(String[] args) {
 113         TestFramework framework = new TestFramework(TestAlignVector.class);
 114         framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
 115                            "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250");
 116 
 117         switch (args[0]) {
 118             case "NoAlignVector"         -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); }
 119             case "AlignVector"           -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); }
 120             case "VerifyAlignVector"     -> { framework.addFlags("-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
 121             case "NoAlignVector-COH"     -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
 122             case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
 123             default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
 124         }
 125         framework.start();
 126     }
 127 
 128     public TestAlignVector() {
 129         // Generate input once
 130         aB = generateB();
 131         bB = generateB();
 132         aS = generateS();
 133         bS = generateS();
 134         aI = generateI();
 135         bI = generateI();
 136         aL = generateL();
 137         bL = generateL();
 138 
 139         // Add all tests to list
 140         tests.put("test0",       () -> { return test0(aB.clone(), bB.clone(), mB); });
 141         tests.put("test1",       () -> { return test1(aB.clone(), bB.clone(), mB); });
 142         tests.put("test2",       () -> { return test2(aB.clone(), bB.clone(), mB); });
 143         tests.put("test3",       () -> { return test3(aB.clone(), bB.clone(), mB); });
 144         tests.put("test4",       () -> { return test4(aB.clone(), bB.clone(), mB); });
 145         tests.put("test5",       () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
 146         tests.put("test6",       () -> { return test6(aB.clone(), bB.clone(), mB); });
 147         tests.put("test7",       () -> { return test7(aS.clone(), bS.clone(), mS); });
 148         tests.put("test8",       () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
 149         tests.put("test8",       () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
 150         tests.put("test9",       () -> { return test9(aB.clone(), bB.clone(), mB); });
 151 
 152         tests.put("test10a",     () -> { return test10a(aB.clone(), bB.clone(), mB); });
 153         tests.put("test10b",     () -> { return test10b(aB.clone(), bB.clone(), mB); });
 154         tests.put("test10c",     () -> { return test10c(aS.clone(), bS.clone(), mS); });
 155         tests.put("test10d",     () -> { return test10d(aS.clone(), bS.clone(), mS); });
 156 
 157         tests.put("test11aB",    () -> { return test11aB(aB.clone(), bB.clone(), mB); });
 158         tests.put("test11aS",    () -> { return test11aS(aS.clone(), bS.clone(), mS); });
 159         tests.put("test11aI",    () -> { return test11aI(aI.clone(), bI.clone(), mI); });
 160         tests.put("test11aL",    () -> { return test11aL(aL.clone(), bL.clone(), mL); });
 161 
 162         tests.put("test11bB",    () -> { return test11bB(aB.clone(), bB.clone(), mB); });
 163         tests.put("test11bS",    () -> { return test11bS(aS.clone(), bS.clone(), mS); });
 164         tests.put("test11bI",    () -> { return test11bI(aI.clone(), bI.clone(), mI); });
 165         tests.put("test11bL",    () -> { return test11bL(aL.clone(), bL.clone(), mL); });
 166 
 167         tests.put("test11cB",    () -> { return test11cB(aB.clone(), bB.clone(), mB); });
 168         tests.put("test11cS",    () -> { return test11cS(aS.clone(), bS.clone(), mS); });
 169         tests.put("test11cI",    () -> { return test11cI(aI.clone(), bI.clone(), mI); });
 170         tests.put("test11cL",    () -> { return test11cL(aL.clone(), bL.clone(), mL); });
 171 
 172         tests.put("test11dB",    () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
 173         tests.put("test11dS",    () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
 174         tests.put("test11dI",    () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
 175         tests.put("test11dL",    () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
 176 
 177         tests.put("test12",      () -> { return test12(aB.clone(), bB.clone(), mB); });
 178 
 179         tests.put("test13aIL",   () -> { return test13aIL(aI.clone(), aL.clone()); });
 180         tests.put("test13aIB",   () -> { return test13aIB(aI.clone(), aB.clone()); });
 181         tests.put("test13aIS",   () -> { return test13aIS(aI.clone(), aS.clone()); });
 182         tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
 183 
 184         tests.put("test13bIL",   () -> { return test13bIL(aI.clone(), aL.clone()); });
 185         tests.put("test13bIB",   () -> { return test13bIB(aI.clone(), aB.clone()); });
 186         tests.put("test13bIS",   () -> { return test13bIS(aI.clone(), aS.clone()); });
 187         tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
 188 
 189         tests.put("test14aB",    () -> { return test14aB(aB.clone()); });
 190         tests.put("test14bB",    () -> { return test14bB(aB.clone()); });
 191         tests.put("test14cB",    () -> { return test14cB(aB.clone()); });
 192         tests.put("test14dB",    () -> { return test14dB(aB.clone()); });
 193         tests.put("test14eB",    () -> { return test14eB(aB.clone()); });
 194         tests.put("test14fB",    () -> { return test14fB(aB.clone()); });
 195 
 196         tests.put("test15aB",    () -> { return test15aB(aB.clone()); });
 197         tests.put("test15bB",    () -> { return test15bB(aB.clone()); });
 198         tests.put("test15cB",    () -> { return test15cB(aB.clone()); });
 199 
 200         tests.put("test16a",     () -> { return test16a(aB.clone(), aS.clone()); });
 201         tests.put("test16b",     () -> { return test16b(aB.clone()); });
 202 
 203         tests.put("test17a",     () -> { return test17a(aL.clone()); });
 204         tests.put("test17b",     () -> { return test17b(aL.clone()); });
 205         tests.put("test17c",     () -> { return test17c(aL.clone()); });
 206         tests.put("test17d",     () -> { return test17d(aL.clone()); });
 207 
 208         tests.put("test18a",     () -> { return test18a(aB.clone(), aI.clone()); });
 209         tests.put("test18b",     () -> { return test18b(aB.clone(), aI.clone()); });
 210 
 211         tests.put("test19",      () -> { return test19(aI.clone(), bI.clone()); });
 212         tests.put("test20",      () -> { return test20(aB.clone()); });
 213 
 214         // Compute gold value for all test methods before compilation
 215         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 216             String name = entry.getKey();
 217             TestFunction test = entry.getValue();
 218             Object[] gold = test.run();
 219             golds.put(name, gold);
 220         }
 221     }
 222 
 223     @Warmup(100)
 224     @Run(test = {"test0",
 225                  "test1",
 226                  "test2",
 227                  "test3",
 228                  "test4",
 229                  "test5",
 230                  "test6",
 231                  "test7",
 232                  "test8",
 233                  "test9",
 234                  "test10a",
 235                  "test10b",
 236                  "test10c",
 237                  "test10d",
 238                  "test11aB",
 239                  "test11aS",
 240                  "test11aI",
 241                  "test11aL",
 242                  "test11bB",
 243                  "test11bS",
 244                  "test11bI",
 245                  "test11bL",
 246                  "test11cB",
 247                  "test11cS",
 248                  "test11cI",
 249                  "test11cL",
 250                  "test11dB",
 251                  "test11dS",
 252                  "test11dI",
 253                  "test11dL",
 254                  "test12",
 255                  "test13aIL",
 256                  "test13aIB",
 257                  "test13aIS",
 258                  "test13aBSIL",
 259                  "test13bIL",
 260                  "test13bIB",
 261                  "test13bIS",
 262                  "test13bBSIL",
 263                  "test14aB",
 264                  "test14bB",
 265                  "test14cB",
 266                  "test14dB",
 267                  "test14eB",
 268                  "test14fB",
 269                  "test15aB",
 270                  "test15bB",
 271                  "test15cB",
 272                  "test16a",
 273                  "test16b",
 274                  "test17a",
 275                  "test17b",
 276                  "test17c",
 277                  "test17d",
 278                  "test18a",
 279                  "test18b",
 280                  "test19",
 281                  "test20"})
 282     public void runTests() {
 283         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 284             String name = entry.getKey();
 285             TestFunction test = entry.getValue();
 286             // Recall gold value from before compilation
 287             Object[] gold = golds.get(name);
 288             // Compute new result
 289             Object[] result = test.run();
 290             // Compare gold and new result
 291             verify(name, gold, result);
 292         }
 293     }
 294 
 295     static byte[] generateB() {
 296         byte[] a = new byte[RANGE];
 297         for (int i = 0; i < a.length; i++) {
 298             a[i] = (byte)RANDOM.nextInt();
 299         }
 300         return a;
 301     }
 302 
 303     static short[] generateS() {
 304         short[] a = new short[RANGE];
 305         for (int i = 0; i < a.length; i++) {
 306             a[i] = (short)RANDOM.nextInt();
 307         }
 308         return a;
 309     }
 310 
 311     static int[] generateI() {
 312         int[] a = new int[RANGE];
 313         for (int i = 0; i < a.length; i++) {
 314             a[i] = RANDOM.nextInt();
 315         }
 316         return a;
 317     }
 318 
 319     static long[] generateL() {
 320         long[] a = new long[RANGE];
 321         for (int i = 0; i < a.length; i++) {
 322             a[i] = RANDOM.nextLong();
 323         }
 324         return a;
 325     }
 326 
 327     static void verify(String name, Object[] gold, Object[] result) {
 328         if (gold.length != result.length) {
 329             throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
 330                                        gold.length + ", result.length = " + result.length);
 331         }
 332         for (int i = 0; i < gold.length; i++) {
 333             Object g = gold[i];
 334             Object r = result[i];
 335             if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
 336                 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
 337                                            " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 338                                            " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 339             }
 340             if (g == r) {
 341                 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
 342                                            " gold[" + i + "] == result[" + i + "]");
 343             }
 344             if (Array.getLength(g) != Array.getLength(r)) {
 345                     throw new RuntimeException("verify " + name + ": arrays must have same length:" +
 346                                            " gold[" + i + "].length = " + Array.getLength(g) +
 347                                            " result[" + i + "].length = " + Array.getLength(r));
 348             }
 349             Class c = g.getClass().getComponentType();
 350             if (c == byte.class) {
 351                 verifyB(name, i, (byte[])g, (byte[])r);
 352             } else if (c == short.class) {
 353                 verifyS(name, i, (short[])g, (short[])r);
 354             } else if (c == int.class) {
 355                 verifyI(name, i, (int[])g, (int[])r);
 356             } else if (c == long.class) {
 357                 verifyL(name, i, (long[])g, (long[])r);
 358             } else {
 359                 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
 360                                        " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 361                                        " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 362             }
 363         }
 364     }
 365 
 366     static void verifyB(String name, int i, byte[] g, byte[] r) {
 367         for (int j = 0; j < g.length; j++) {
 368             if (g[j] != r[j]) {
 369                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 370                                            " gold[" + i + "][" + j + "] = " + g[j] +
 371                                            " result[" + i + "][" + j + "] = " + r[j]);
 372             }
 373         }
 374     }
 375 
 376     static void verifyS(String name, int i, short[] g, short[] r) {
 377         for (int j = 0; j < g.length; j++) {
 378             if (g[j] != r[j]) {
 379                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 380                                            " gold[" + i + "][" + j + "] = " + g[j] +
 381                                            " result[" + i + "][" + j + "] = " + r[j]);
 382             }
 383         }
 384     }
 385 
 386     static void verifyI(String name, int i, int[] g, int[] r) {
 387         for (int j = 0; j < g.length; j++) {
 388             if (g[j] != r[j]) {
 389                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 390                                            " gold[" + i + "][" + j + "] = " + g[j] +
 391                                            " result[" + i + "][" + j + "] = " + r[j]);
 392             }
 393         }
 394     }
 395 
 396     static void verifyL(String name, int i, long[] g, long[] r) {
 397         for (int j = 0; j < g.length; j++) {
 398             if (g[j] != r[j]) {
 399                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 400                                            " gold[" + i + "][" + j + "] = " + g[j] +
 401                                            " result[" + i + "][" + j + "] = " + r[j]);
 402             }
 403         }
 404     }
 405 
 406     @Test
 407     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 408                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 409                   IRNode.STORE_VECTOR, "> 0"},
 410         applyIf = {"MaxVectorSize", ">=8"},
 411         applyIfPlatform = {"64-bit", "true"},
 412         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 413     static Object[] test0(byte[] a, byte[] b, byte mask) {
 414         for (int i = 0; i < RANGE; i+=8) {
 415             // Safe to vectorize with AlignVector
 416             b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
 417             b[i+1] = (byte)(a[i+1] & mask);
 418             b[i+2] = (byte)(a[i+2] & mask);
 419             b[i+3] = (byte)(a[i+3] & mask);
 420         }
 421         return new Object[]{ a, b };
 422     }
 423 
 424     @Test
 425     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 426                   IRNode.AND_VB, "> 0",
 427                   IRNode.STORE_VECTOR, "> 0"},
 428         applyIfPlatform = {"64-bit", "true"},
 429         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 430     static Object[] test1(byte[] a, byte[] b, byte mask) {
 431         for (int i = 0; i < RANGE; i+=8) {
 432             b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
 433             b[i+1] = (byte)(a[i+1] & mask);
 434             b[i+2] = (byte)(a[i+2] & mask);
 435             b[i+3] = (byte)(a[i+3] & mask);
 436             b[i+4] = (byte)(a[i+4] & mask);
 437             b[i+5] = (byte)(a[i+5] & mask);
 438             b[i+6] = (byte)(a[i+6] & mask);
 439             b[i+7] = (byte)(a[i+7] & mask);
 440         }
 441         return new Object[]{ a, b };
 442     }
 443 
 444     @Test
 445     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 446                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 447                   IRNode.STORE_VECTOR, "> 0"},
 448         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 449         applyIfPlatform = {"64-bit", "true"},
 450         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 451     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 452                   IRNode.AND_VB, "= 0",
 453                   IRNode.STORE_VECTOR, "= 0"},
 454         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 455         applyIfPlatform = {"64-bit", "true"},
 456         applyIf = {"AlignVector", "true"})
 457     static Object[] test2(byte[] a, byte[] b, byte mask) {
 458         for (int i = 0; i < RANGE; i+=8) {
 459             // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
 460             b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
 461             b[i+4] = (byte)(a[i+4] & mask);
 462             b[i+5] = (byte)(a[i+5] & mask);
 463             b[i+6] = (byte)(a[i+6] & mask);
 464         }
 465         return new Object[]{ a, b };
 466     }
 467 
 468     @Test
 469     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 470                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 471                   IRNode.STORE_VECTOR, "> 0"},
 472         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 473         applyIfPlatform = {"64-bit", "true"},
 474         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 475     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 476                   IRNode.AND_VB, "= 0",
 477                   IRNode.STORE_VECTOR, "= 0"},
 478         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 479         applyIfPlatform = {"64-bit", "true"},
 480         applyIf = {"AlignVector", "true"})
 481     static Object[] test3(byte[] a, byte[] b, byte mask) {
 482         for (int i = 0; i < RANGE; i+=8) {
 483             // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
 484 
 485             // Problematic for AlignVector
 486             b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0
 487 
 488             b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes
 489             b[i+4] = (byte)(a[i+4] & mask);
 490             b[i+5] = (byte)(a[i+5] & mask);
 491             b[i+6] = (byte)(a[i+6] & mask);
 492         }
 493         return new Object[]{ a, b };
 494     }
 495 
 496     @Test
 497     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 498                   IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0",
 499                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 500                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_8, "> 0",
 501                   IRNode.STORE_VECTOR, "> 0"},
 502         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 503         applyIfPlatform = {"64-bit", "true"},
 504         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
 505     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 506                   IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
 507                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 508                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_8, "= 0",// unaligned
 509                   IRNode.STORE_VECTOR, "> 0"},
 510         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 511         applyIfPlatform = {"64-bit", "true"},
 512         applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"})
 513     static Object[] test4(byte[] a, byte[] b, byte mask) {
 514         for (int i = 0; i < RANGE/16; i++) {
 515             // Problematic for AlignVector
 516             b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned
 517             b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask);
 518             b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask);
 519             b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask);
 520 
 521             b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned
 522             b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask);
 523             b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask);
 524             b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask);
 525             b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask);
 526             b[i*16 + 10] = (byte)(a[i*16 + 10] & mask);
 527             b[i*16 + 11] = (byte)(a[i*16 + 11] & mask);
 528             b[i*16 + 12] = (byte)(a[i*16 + 12] & mask);
 529         }
 530         return new Object[]{ a, b };
 531     }
 532 
 533     @Test
 534     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 535                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 536                   IRNode.STORE_VECTOR, "> 0"},
 537         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 538         applyIfPlatform = {"64-bit", "true"},
 539         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 540     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 541                   IRNode.AND_VB, "= 0",
 542                   IRNode.STORE_VECTOR, "= 0"},
 543         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 544         applyIfPlatform = {"64-bit", "true"},
 545         applyIf = {"AlignVector", "true"})
 546     static Object[] test5(byte[] a, byte[] b, byte mask, int inv) {
 547         for (int i = 0; i < RANGE; i+=8) {
 548             // Cannot align with AlignVector because of invariant
 549             b[i+inv+0] = (byte)(a[i+inv+0] & mask);
 550 
 551             b[i+inv+3] = (byte)(a[i+inv+3] & mask);
 552             b[i+inv+4] = (byte)(a[i+inv+4] & mask);
 553             b[i+inv+5] = (byte)(a[i+inv+5] & mask);
 554             b[i+inv+6] = (byte)(a[i+inv+6] & mask);
 555         }
 556         return new Object[]{ a, b };
 557     }
 558 
 559     @Test
 560     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 561                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 562                   IRNode.STORE_VECTOR, "> 0"},
 563         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 564         applyIfPlatform = {"64-bit", "true"},
 565         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 566     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 567                   IRNode.AND_VB, "= 0",
 568                   IRNode.STORE_VECTOR, "= 0"},
 569         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 570         applyIfPlatform = {"64-bit", "true"},
 571         applyIf = {"AlignVector", "true"})
 572     static Object[] test6(byte[] a, byte[] b, byte mask) {
 573         for (int i = 0; i < RANGE/8; i+=2) {
 574             // Cannot align with AlignVector because offset is odd
 575             b[i*4+0] = (byte)(a[i*4+0] & mask);
 576 
 577             b[i*4+3] = (byte)(a[i*4+3] & mask);
 578             b[i*4+4] = (byte)(a[i*4+4] & mask);
 579             b[i*4+5] = (byte)(a[i*4+5] & mask);
 580             b[i*4+6] = (byte)(a[i*4+6] & mask);
 581         }
 582         return new Object[]{ a, b };
 583     }
 584 
 585     @Test
 586     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 587                   IRNode.AND_VS,        IRNode.VECTOR_SIZE_4, "> 0",
 588                   IRNode.STORE_VECTOR, "> 0"},
 589         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"},
 590         applyIfPlatform = {"64-bit", "true"},
 591         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 592     @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
 593                   IRNode.AND_VS, "= 0",
 594                   IRNode.STORE_VECTOR, "= 0"},
 595         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 596         applyIfPlatform = {"64-bit", "true"},
 597         applyIf = {"AlignVector", "true"})
 598     static Object[] test7(short[] a, short[] b, short mask) {
 599         for (int i = 0; i < RANGE/8; i+=2) {
 600             // Cannot align with AlignVector because offset is odd
 601             b[i*4+0] = (short)(a[i*4+0] & mask);
 602 
 603             b[i*4+3] = (short)(a[i*4+3] & mask);
 604             b[i*4+4] = (short)(a[i*4+4] & mask);
 605             b[i*4+5] = (short)(a[i*4+5] & mask);
 606             b[i*4+6] = (short)(a[i*4+6] & mask);
 607         }
 608         return new Object[]{ a, b };
 609     }
 610 
 611     @Test
 612     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 613                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 614                   IRNode.STORE_VECTOR, "> 0"},
 615         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 616         applyIfPlatform = {"64-bit", "true"},
 617         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 618     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 619                   IRNode.AND_VB, "= 0",
 620                   IRNode.STORE_VECTOR, "= 0"},
 621         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 622         applyIfPlatform = {"64-bit", "true"},
 623         applyIf = {"AlignVector", "true"})
 624     static Object[] test8(byte[] a, byte[] b, byte mask, int init) {
 625         for (int i = init; i < RANGE; i+=8) {
 626             // Cannot align with AlignVector because of invariant (variable init becomes invar)
 627             b[i+0] = (byte)(a[i+0] & mask);
 628 
 629             b[i+3] = (byte)(a[i+3] & mask);
 630             b[i+4] = (byte)(a[i+4] & mask);
 631             b[i+5] = (byte)(a[i+5] & mask);
 632             b[i+6] = (byte)(a[i+6] & mask);
 633         }
 634         return new Object[]{ a, b };
 635     }
 636 
 637     @Test
 638     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 639                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 640                   IRNode.STORE_VECTOR, "> 0"},
 641         applyIf = {"MaxVectorSize", ">=8"},
 642         applyIfPlatform = {"64-bit", "true"},
 643         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 644     static Object[] test9(byte[] a, byte[] b, byte mask) {
 645         // known non-zero init value does not affect offset, but has implicit effect on iv
 646         for (int i = 13; i < RANGE-8; i+=8) {
 647             b[i+0] = (byte)(a[i+0] & mask);
 648 
 649             b[i+3] = (byte)(a[i+3] & mask);
 650             b[i+4] = (byte)(a[i+4] & mask);
 651             b[i+5] = (byte)(a[i+5] & mask);
 652             b[i+6] = (byte)(a[i+6] & mask);
 653         }
 654         return new Object[]{ a, b };
 655     }
 656 
 657     @Test
 658     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 659                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 660                   IRNode.STORE_VECTOR, "> 0"},
 661         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 662         applyIfPlatform = {"64-bit", "true"},
 663         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
 664     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 665                   IRNode.AND_VB, "= 0",
 666                   IRNode.STORE_VECTOR, "= 0"},
 667         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 668         applyIfPlatform = {"64-bit", "true"},
 669         applyIf = {"AlignVector", "true"})
 670     static Object[] test10a(byte[] a, byte[] b, byte mask) {
 671         // This is not alignable with pre-loop, because of odd init.
 672         for (int i = 3; i < RANGE-8; i+=8) {
 673             b[i+0] = (byte)(a[i+0] & mask);
 674             b[i+1] = (byte)(a[i+1] & mask);
 675             b[i+2] = (byte)(a[i+2] & mask);
 676             b[i+3] = (byte)(a[i+3] & mask);
 677         }
 678         return new Object[]{ a, b };
 679     }
 680 
 681     @Test
 682     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 683                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 684                   IRNode.STORE_VECTOR, "> 0"},
 685         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 686         applyIfPlatform = {"64-bit", "true"},
 687         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
 688     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 689                   IRNode.AND_VB, "= 0",
 690                   IRNode.STORE_VECTOR, "= 0"},
 691         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 692         applyIfPlatform = {"64-bit", "true"},
 693         applyIf = {"AlignVector", "true"})
 694     static Object[] test10b(byte[] a, byte[] b, byte mask) {
 695         // This is not alignable with pre-loop, because of odd init.
 696         // Seems not correctly handled.
 697         for (int i = 13; i < RANGE-8; i+=8) {
 698             b[i+0] = (byte)(a[i+0] & mask);
 699             b[i+1] = (byte)(a[i+1] & mask);
 700             b[i+2] = (byte)(a[i+2] & mask);
 701             b[i+3] = (byte)(a[i+3] & mask);
 702         }
 703         return new Object[]{ a, b };
 704     }
 705 
 706     @Test
 707     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 708                   IRNode.AND_VS,        IRNode.VECTOR_SIZE_4, "> 0",
 709                   IRNode.STORE_VECTOR, "> 0"},
 710         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 711         applyIfPlatform = {"64-bit", "true"},
 712         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
 713     @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
 714                   IRNode.AND_VS, "= 0",
 715                   IRNode.STORE_VECTOR, "= 0"},
 716         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 717         applyIfPlatform = {"64-bit", "true"},
 718         applyIf = {"AlignVector", "true"})
 719     static Object[] test10c(short[] a, short[] b, short mask) {
 720         // This is not alignable with pre-loop, because of odd init.
 721         // Seems not correctly handled with MaxVectorSize >= 32.
 722         for (int i = 13; i < RANGE-8; i+=8) {
 723             b[i+0] = (short)(a[i+0] & mask);
 724             b[i+1] = (short)(a[i+1] & mask);
 725             b[i+2] = (short)(a[i+2] & mask);
 726             b[i+3] = (short)(a[i+3] & mask);
 727         }
 728         return new Object[]{ a, b };
 729     }
 730 
 731     @Test
 732     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 733                   IRNode.AND_VS,        IRNode.VECTOR_SIZE_4, "> 0",
 734                   IRNode.STORE_VECTOR, "> 0"},
 735         applyIf = {"MaxVectorSize", ">=16"},
 736         applyIfPlatform = {"64-bit", "true"},
 737         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 738     static Object[] test10d(short[] a, short[] b, short mask) {
 739         for (int i = 13; i < RANGE-16; i+=8) {
 740             // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
 741             b[i+0+3] = (short)(a[i+0+3] & mask);
 742             b[i+1+3] = (short)(a[i+1+3] & mask);
 743             b[i+2+3] = (short)(a[i+2+3] & mask);
 744             b[i+3+3] = (short)(a[i+3+3] & mask);
 745         }
 746         return new Object[]{ a, b };
 747     }
 748 
 749     @Test
 750     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 751                   IRNode.AND_VB, "> 0",
 752                   IRNode.STORE_VECTOR, "> 0"},
 753         applyIfPlatform = {"64-bit", "true"},
 754         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 755     static Object[] test11aB(byte[] a, byte[] b, byte mask) {
 756         for (int i = 0; i < RANGE; i++) {
 757             // always alignable
 758             b[i+0] = (byte)(a[i+0] & mask);
 759         }
 760         return new Object[]{ a, b };
 761     }
 762 
 763     @Test
 764     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 765                   IRNode.AND_VS, "> 0",
 766                   IRNode.STORE_VECTOR, "> 0"},
 767         applyIfPlatform = {"64-bit", "true"},
 768         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 769     static Object[] test11aS(short[] a, short[] b, short mask) {
 770         for (int i = 0; i < RANGE; i++) {
 771             // always alignable
 772             b[i+0] = (short)(a[i+0] & mask);
 773         }
 774         return new Object[]{ a, b };
 775     }
 776 
 777     @Test
 778     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
 779                   IRNode.AND_VI, "> 0",
 780                   IRNode.STORE_VECTOR, "> 0"},
 781         applyIfPlatform = {"64-bit", "true"},
 782         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 783     static Object[] test11aI(int[] a, int[] b, int mask) {
 784         for (int i = 0; i < RANGE; i++) {
 785             // always alignable
 786             b[i+0] = (int)(a[i+0] & mask);
 787         }
 788         return new Object[]{ a, b };
 789     }
 790 
 791     @Test
 792     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
 793                   IRNode.AND_VL, "> 0",
 794                   IRNode.STORE_VECTOR, "> 0"},
 795         applyIfPlatform = {"64-bit", "true"},
 796         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 797     static Object[] test11aL(long[] a, long[] b, long mask) {
 798         for (int i = 0; i < RANGE; i++) {
 799             // always alignable
 800             b[i+0] = (long)(a[i+0] & mask);
 801         }
 802         return new Object[]{ a, b };
 803     }
 804 
 805     @Test
 806     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 807                   IRNode.AND_VB, "> 0",
 808                   IRNode.STORE_VECTOR, "> 0"},
 809         applyIfPlatform = {"64-bit", "true"},
 810         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 811     static Object[] test11bB(byte[] a, byte[] b, byte mask) {
 812         for (int i = 1; i < RANGE; i++) {
 813             // always alignable
 814             b[i+0] = (byte)(a[i+0] & mask);
 815         }
 816         return new Object[]{ a, b };
 817     }
 818 
 819     @Test
 820     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 821                   IRNode.AND_VS, "> 0",
 822                   IRNode.STORE_VECTOR, "> 0"},
 823         applyIfPlatform = {"64-bit", "true"},
 824         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 825     static Object[] test11bS(short[] a, short[] b, short mask) {
 826         for (int i = 1; i < RANGE; i++) {
 827             // always alignable
 828             b[i+0] = (short)(a[i+0] & mask);
 829         }
 830         return new Object[]{ a, b };
 831     }
 832 
 833     @Test
 834     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
 835                   IRNode.AND_VI, "> 0",
 836                   IRNode.STORE_VECTOR, "> 0"},
 837         applyIfPlatform = {"64-bit", "true"},
 838         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 839     static Object[] test11bI(int[] a, int[] b, int mask) {
 840         for (int i = 1; i < RANGE; i++) {
 841             // always alignable
 842             b[i+0] = (int)(a[i+0] & mask);
 843         }
 844         return new Object[]{ a, b };
 845     }
 846 
 847     @Test
 848     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
 849                   IRNode.AND_VL, "> 0",
 850                   IRNode.STORE_VECTOR, "> 0"},
 851         applyIfPlatform = {"64-bit", "true"},
 852         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 853     static Object[] test11bL(long[] a, long[] b, long mask) {
 854         for (int i = 1; i < RANGE; i++) {
 855             // always alignable
 856             b[i+0] = (long)(a[i+0] & mask);
 857         }
 858         return new Object[]{ a, b };
 859     }
 860 
 861     @Test
 862     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 863                   IRNode.AND_VB, "> 0",
 864                   IRNode.STORE_VECTOR, "> 0"},
 865         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 866         applyIfPlatform = {"64-bit", "true"},
 867         applyIf = {"AlignVector", "false"})
 868     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 869                   IRNode.AND_VB, "= 0",
 870                   IRNode.STORE_VECTOR, "= 0"},
 871         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 872         applyIfPlatform = {"64-bit", "true"},
 873         applyIf = {"AlignVector", "true"})
 874     static Object[] test11cB(byte[] a, byte[] b, byte mask) {
 875         for (int i = 1; i < RANGE-1; i++) {
 876             // 1 byte offset -> not alignable with AlignVector
 877             b[i+0] = (byte)(a[i+1] & mask);
 878         }
 879         return new Object[]{ a, b };
 880     }
 881 
 882     @Test
 883     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 884                   IRNode.AND_VS, "> 0",
 885                   IRNode.STORE_VECTOR, "> 0"},
 886         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 887         applyIfPlatform = {"64-bit", "true"},
 888         applyIf = {"AlignVector", "false"})
 889     @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
 890                   IRNode.AND_VS, "= 0",
 891                   IRNode.STORE_VECTOR, "= 0"},
 892         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 893         applyIfPlatform = {"64-bit", "true"},
 894         applyIf = {"AlignVector", "true"})
 895     static Object[] test11cS(short[] a, short[] b, short mask) {
 896         for (int i = 1; i < RANGE-1; i++) {
 897             // 2 byte offset -> not alignable with AlignVector
 898             b[i+0] = (short)(a[i+1] & mask);
 899         }
 900         return new Object[]{ a, b };
 901     }
 902 
 903     @Test
 904     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
 905                   IRNode.AND_VI, "> 0",
 906                   IRNode.STORE_VECTOR, "> 0"},
 907         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 908         applyIfPlatform = {"64-bit", "true"},
 909         applyIf = {"AlignVector", "false"})
 910     @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
 911                   IRNode.AND_VI, "= 0",
 912                   IRNode.STORE_VECTOR, "= 0"},
 913         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
 914         applyIfPlatform = {"64-bit", "true"},
 915         applyIf = {"AlignVector", "true"})
 916     static Object[] test11cI(int[] a, int[] b, int mask) {
 917         for (int i = 1; i < RANGE-1; i++) {
 918             // 4 byte offset -> not alignable with AlignVector
 919             b[i+0] = (int)(a[i+1] & mask);
 920         }
 921         return new Object[]{ a, b };
 922     }
 923 
 924     @Test
 925     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
 926                   IRNode.AND_VL, "> 0",
 927                   IRNode.STORE_VECTOR, "> 0"},
 928         applyIfPlatform = {"64-bit", "true"},
 929         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 930     static Object[] test11cL(long[] a, long[] b, long mask) {
 931         for (int i = 1; i < RANGE-1; i++) {
 932             // always alignable (8 byte offset)
 933             b[i+0] = (long)(a[i+1] & mask);
 934         }
 935         return new Object[]{ a, b };
 936     }
 937 
 938     @Test
 939     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 940                   IRNode.AND_VB, "> 0",
 941                   IRNode.STORE_VECTOR, "> 0"},
 942         applyIfPlatform = {"64-bit", "true"},
 943         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 944     static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) {
 945         for (int i = 0; i < RANGE; i++) {
 946             b[i+0+invar] = (byte)(a[i+0+invar] & mask);
 947         }
 948         return new Object[]{ a, b };
 949     }
 950 
 951     @Test
 952     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 953                   IRNode.AND_VS, "> 0",
 954                   IRNode.STORE_VECTOR, "> 0"},
 955         applyIfPlatform = {"64-bit", "true"},
 956         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 957     static Object[] test11dS(short[] a, short[] b, short mask, int invar) {
 958         for (int i = 0; i < RANGE; i++) {
 959             b[i+0+invar] = (short)(a[i+0+invar] & mask);
 960         }
 961         return new Object[]{ a, b };
 962     }
 963 
 964     @Test
 965     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
 966                   IRNode.AND_VI, "> 0",
 967                   IRNode.STORE_VECTOR, "> 0"},
 968         applyIfPlatform = {"64-bit", "true"},
 969         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 970     static Object[] test11dI(int[] a, int[] b, int mask, int invar) {
 971         for (int i = 0; i < RANGE; i++) {
 972             b[i+0+invar] = (int)(a[i+0+invar] & mask);
 973         }
 974         return new Object[]{ a, b };
 975     }
 976 
 977     @Test
 978     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
 979                   IRNode.AND_VL, "> 0",
 980                   IRNode.STORE_VECTOR, "> 0"},
 981         applyIfPlatform = {"64-bit", "true"},
 982         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 983     static Object[] test11dL(long[] a, long[] b, long mask, int invar) {
 984         for (int i = 0; i < RANGE; i++) {
 985             b[i+0+invar] = (long)(a[i+0+invar] & mask);
 986         }
 987         return new Object[]{ a, b };
 988     }
 989 
 990     @Test
 991     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
 992                   IRNode.AND_VB,        IRNode.VECTOR_SIZE + "min(max_byte, 4)", "> 0",
 993                   IRNode.STORE_VECTOR,                                           "> 0"},
 994         applyIfPlatform = {"64-bit", "true"},
 995         applyIf = {"AlignVector", "false"},
 996         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 997     static Object[] test12(byte[] a, byte[] b, byte mask) {
 998         for (int i = 0; i < RANGE/16; i++) {
 999             // Non-power-of-2 stride. Vectorization of 4 bytes, then 2-bytes gap.
1000             b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask);
1001             b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask);
1002             b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask);
1003             b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask);
1004         }
1005         return new Object[]{ a, b };
1006     }
1007 
1008     @Test
1009     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1010                   IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1011                   IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1012                   IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1013                   IRNode.STORE_VECTOR, "> 0"},
1014         applyIfPlatform = {"64-bit", "true"},
1015         applyIfCPUFeature = {"avx2", "true"})
1016     // require avx to ensure vectors are larger than what unrolling produces
1017     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1018                   IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1019                   IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1020                   IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1021                   IRNode.STORE_VECTOR, "> 0"},
1022         applyIfPlatform = {"riscv64", "true"},
1023         applyIfCPUFeature = {"rvv", "true"},
1024         applyIf = {"MaxVectorSize", ">=32"})
1025     static Object[] test13aIL(int[] a, long[] b) {
1026         for (int i = 0; i < RANGE; i++) {
1027             a[i]++;
1028             b[i]++;
1029         }
1030         return new Object[]{ a, b };
1031     }
1032 
1033     @Test
1034     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1035                   IRNode.LOAD_VECTOR_I, "> 0",
1036                   IRNode.ADD_VB, "> 0",
1037                   IRNode.ADD_VI, "> 0",
1038                   IRNode.STORE_VECTOR, "> 0"},
1039         applyIfPlatform = {"64-bit", "true"},
1040         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1041     static Object[] test13aIB(int[] a, byte[] b) {
1042         for (int i = 0; i < RANGE; i++) {
1043             a[i]++;
1044             b[i]++;
1045         }
1046         return new Object[]{ a, b };
1047     }
1048 
1049     @Test
1050     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1051                   IRNode.LOAD_VECTOR_S, "> 0",
1052                   IRNode.ADD_VI, "> 0",
1053                   IRNode.ADD_VS, "> 0",
1054                   IRNode.STORE_VECTOR, "> 0"},
1055         applyIfPlatform = {"64-bit", "true"},
1056         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1057     static Object[] test13aIS(int[] a, short[] b) {
1058         for (int i = 0; i < RANGE; i++) {
1059             a[i]++;
1060             b[i]++;
1061         }
1062         return new Object[]{ a, b };
1063     }
1064 
1065     @Test
1066     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1067                   IRNode.LOAD_VECTOR_S, "> 0",
1068                   IRNode.LOAD_VECTOR_I, "> 0",
1069                   IRNode.LOAD_VECTOR_L, "> 0",
1070                   IRNode.ADD_VB, "> 0",
1071                   IRNode.ADD_VS, "> 0",
1072                   IRNode.ADD_VI, "> 0",
1073                   IRNode.ADD_VL, "> 0",
1074                   IRNode.STORE_VECTOR, "> 0"},
1075         applyIfPlatform = {"64-bit", "true"},
1076         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1077     static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1078         for (int i = 0; i < RANGE; i++) {
1079             a[i]++;
1080             b[i]++;
1081             c[i]++;
1082             d[i]++;
1083         }
1084         return new Object[]{ a, b, c, d };
1085     }
1086 
1087     @Test
1088     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1089                   IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1090                   IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1091                   IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1092                   IRNode.STORE_VECTOR, "> 0"},
1093         applyIfPlatform = {"64-bit", "true"},
1094         applyIfCPUFeature = {"avx2", "true"})
1095     // require avx to ensure vectors are larger than what unrolling produces
1096     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1097                   IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1098                   IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1099                   IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1100                   IRNode.STORE_VECTOR, "> 0"},
1101         applyIfPlatform = {"riscv64", "true"},
1102         applyIfCPUFeature = {"rvv", "true"},
1103         applyIf = {"MaxVectorSize", ">=32"})
1104     static Object[] test13bIL(int[] a, long[] b) {
1105         for (int i = 1; i < RANGE; i++) {
1106             a[i]++;
1107             b[i]++;
1108         }
1109         return new Object[]{ a, b };
1110     }
1111 
1112     @Test
1113     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1114                   IRNode.LOAD_VECTOR_I, "> 0",
1115                   IRNode.ADD_VB, "> 0",
1116                   IRNode.ADD_VI, "> 0",
1117                   IRNode.STORE_VECTOR, "> 0"},
1118         applyIfPlatform = {"64-bit", "true"},
1119         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1120     static Object[] test13bIB(int[] a, byte[] b) {
1121         for (int i = 1; i < RANGE; i++) {
1122             a[i]++;
1123             b[i]++;
1124         }
1125         return new Object[]{ a, b };
1126     }
1127 
1128     @Test
1129     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1130                   IRNode.LOAD_VECTOR_S, "> 0",
1131                   IRNode.ADD_VI, "> 0",
1132                   IRNode.ADD_VS, "> 0",
1133                   IRNode.STORE_VECTOR, "> 0"},
1134         applyIfPlatform = {"64-bit", "true"},
1135         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1136     static Object[] test13bIS(int[] a, short[] b) {
1137         for (int i = 1; i < RANGE; i++) {
1138             a[i]++;
1139             b[i]++;
1140         }
1141         return new Object[]{ a, b };
1142     }
1143 
1144     @Test
1145     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1146                   IRNode.LOAD_VECTOR_S, "> 0",
1147                   IRNode.LOAD_VECTOR_I, "> 0",
1148                   IRNode.LOAD_VECTOR_L, "> 0",
1149                   IRNode.ADD_VB, "> 0",
1150                   IRNode.ADD_VS, "> 0",
1151                   IRNode.ADD_VI, "> 0",
1152                   IRNode.ADD_VL, "> 0",
1153                   IRNode.STORE_VECTOR, "> 0"},
1154         applyIfPlatform = {"64-bit", "true"},
1155         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
1156     static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1157         for (int i = 1; i < RANGE; i++) {
1158             a[i]++;
1159             b[i]++;
1160             c[i]++;
1161             d[i]++;
1162         }
1163         return new Object[]{ a, b, c, d };
1164     }
1165 
1166     @Test
1167     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1168                   IRNode.ADD_VB, "= 0",
1169                   IRNode.STORE_VECTOR, "= 0"},
1170         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1171         applyIfPlatform = {"64-bit", "true"},
1172         applyIf = {"AlignVector", "false"})
1173     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1174                   IRNode.ADD_VB, "= 0",
1175                   IRNode.STORE_VECTOR, "= 0"},
1176         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1177         applyIfPlatform = {"64-bit", "true"},
1178         applyIf = {"AlignVector", "true"})
1179     static Object[] test14aB(byte[] a) {
1180         // non-power-of-2 stride
1181         for (int i = 0; i < RANGE-20; i+=9) {
1182             // Since the stride is shorter than the vector length, there will be always
1183             // partial overlap of loads with previous stores, this leads to failure in
1184             // store-to-load-forwarding -> vectorization not profitable.
1185             a[i+0]++;
1186             a[i+1]++;
1187             a[i+2]++;
1188             a[i+3]++;
1189             a[i+4]++;
1190             a[i+5]++;
1191             a[i+6]++;
1192             a[i+7]++;
1193             a[i+8]++;
1194             a[i+9]++;
1195             a[i+10]++;
1196             a[i+11]++;
1197             a[i+12]++;
1198             a[i+13]++;
1199             a[i+14]++;
1200             a[i+15]++;
1201         }
1202         return new Object[]{ a };
1203     }
1204 
1205     @Test
1206     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1207                   IRNode.ADD_VB, "= 0",
1208                   IRNode.STORE_VECTOR, "= 0"},
1209         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1210         applyIfPlatform = {"64-bit", "true"},
1211         applyIf = {"AlignVector", "false"})
1212     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1213                   IRNode.ADD_VB, "= 0",
1214                   IRNode.STORE_VECTOR, "= 0"},
1215         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1216         applyIfPlatform = {"64-bit", "true"},
1217         applyIf = {"AlignVector", "true"})
1218     static Object[] test14bB(byte[] a) {
1219         // non-power-of-2 stride
1220         for (int i = 0; i < RANGE-20; i+=3) {
1221             // Since the stride is shorter than the vector length, there will be always
1222             // partial overlap of loads with previous stores, this leads to failure in
1223             // store-to-load-forwarding -> vectorization not profitable.
1224             a[i+0]++;
1225             a[i+1]++;
1226             a[i+2]++;
1227             a[i+3]++;
1228             a[i+4]++;
1229             a[i+5]++;
1230             a[i+6]++;
1231             a[i+7]++;
1232             a[i+8]++;
1233             a[i+9]++;
1234             a[i+10]++;
1235             a[i+11]++;
1236             a[i+12]++;
1237             a[i+13]++;
1238             a[i+14]++;
1239             a[i+15]++;
1240         }
1241         return new Object[]{ a };
1242     }
1243 
1244     @Test
1245     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1246                   IRNode.ADD_VB, "= 0",
1247                   IRNode.STORE_VECTOR, "= 0"},
1248         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1249         applyIfPlatform = {"64-bit", "true"},
1250         applyIf = {"AlignVector", "false"})
1251     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1252                   IRNode.ADD_VB, "= 0",
1253                   IRNode.STORE_VECTOR, "= 0"},
1254         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1255         applyIfPlatform = {"64-bit", "true"},
1256         applyIf = {"AlignVector", "true"})
1257     static Object[] test14cB(byte[] a) {
1258         // non-power-of-2 stride
1259         for (int i = 0; i < RANGE-20; i+=5) {
1260             // Since the stride is shorter than the vector length, there will be always
1261             // partial overlap of loads with previous stores, this leads to failure in
1262             // store-to-load-forwarding -> vectorization not profitable.
1263             a[i+0]++;
1264             a[i+1]++;
1265             a[i+2]++;
1266             a[i+3]++;
1267             a[i+4]++;
1268             a[i+5]++;
1269             a[i+6]++;
1270             a[i+7]++;
1271             a[i+8]++;
1272             a[i+9]++;
1273             a[i+10]++;
1274             a[i+11]++;
1275             a[i+12]++;
1276             a[i+13]++;
1277             a[i+14]++;
1278             a[i+15]++;
1279         }
1280         return new Object[]{ a };
1281     }
1282 
1283     @Test
1284     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1285                   IRNode.ADD_VB,        IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1286                   IRNode.STORE_VECTOR,                                           "> 0"},
1287         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1288         applyIfPlatform = {"64-bit", "true"},
1289         applyIf = {"AlignVector", "false"})
1290     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1291                   IRNode.ADD_VB, "= 0",
1292                   IRNode.STORE_VECTOR, "= 0"},
1293         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1294         applyIfPlatform = {"64-bit", "true"},
1295         applyIf = {"AlignVector", "true"})
1296     static Object[] test14dB(byte[] a) {
1297         // non-power-of-2 stride
1298         for (int i = 0; i < RANGE-20; i+=9) {
1299             a[i+0]++;
1300             a[i+1]++;
1301             a[i+2]++;
1302             a[i+3]++;
1303             a[i+4]++;
1304             a[i+5]++;
1305             a[i+6]++;
1306             a[i+7]++;
1307         }
1308         return new Object[]{ a };
1309     }
1310 
1311     @Test
1312     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1313                   IRNode.ADD_VB,        IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1314                   IRNode.STORE_VECTOR,                                           "> 0"},
1315         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1316         applyIfPlatform = {"64-bit", "true"},
1317         applyIf = {"AlignVector", "false"})
1318     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1319                   IRNode.ADD_VB, "= 0",
1320                   IRNode.STORE_VECTOR, "= 0"},
1321         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1322         applyIfPlatform = {"64-bit", "true"},
1323         applyIf = {"AlignVector", "true"})
1324     static Object[] test14eB(byte[] a) {
1325         // non-power-of-2 stride
1326         for (int i = 0; i < RANGE-32; i+=11) {
1327             a[i+0]++;
1328             a[i+1]++;
1329             a[i+2]++;
1330             a[i+3]++;
1331             a[i+4]++;
1332             a[i+5]++;
1333             a[i+6]++;
1334             a[i+7]++;
1335         }
1336         return new Object[]{ a };
1337     }
1338 
1339     @Test
1340     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1341                   IRNode.ADD_VB,        IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1342                   IRNode.STORE_VECTOR,                                           "> 0"},
1343         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1344         applyIfPlatform = {"64-bit", "true"},
1345         applyIf = {"AlignVector", "false"})
1346     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1347                   IRNode.ADD_VB, "= 0",
1348                   IRNode.STORE_VECTOR, "= 0"},
1349         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1350         applyIfPlatform = {"64-bit", "true"},
1351         applyIf = {"AlignVector", "true"})
1352     static Object[] test14fB(byte[] a) {
1353         // non-power-of-2 stride
1354         for (int i = 0; i < RANGE-40; i+=12) {
1355             a[i+0]++;
1356             a[i+1]++;
1357             a[i+2]++;
1358             a[i+3]++;
1359             a[i+4]++;
1360             a[i+5]++;
1361             a[i+6]++;
1362             a[i+7]++;
1363         }
1364         return new Object[]{ a };
1365     }
1366 
1367     @Test
1368     // IR rules difficult because of modulo wrapping with offset after peeling.
1369     static Object[] test15aB(byte[] a) {
1370         // non-power-of-2 scale
1371         for (int i = 0; i < RANGE/64-20; i++) {
1372             a[53*i+0]++;
1373             a[53*i+1]++;
1374             a[53*i+2]++;
1375             a[53*i+3]++;
1376             a[53*i+4]++;
1377             a[53*i+5]++;
1378             a[53*i+6]++;
1379             a[53*i+7]++;
1380             a[53*i+8]++;
1381             a[53*i+9]++;
1382             a[53*i+10]++;
1383             a[53*i+11]++;
1384             a[53*i+12]++;
1385             a[53*i+13]++;
1386             a[53*i+14]++;
1387             a[53*i+15]++;
1388         }
1389         return new Object[]{ a };
1390     }
1391 
1392     @Test
1393     // IR rules difficult because of modulo wrapping with offset after peeling.
1394     static Object[] test15bB(byte[] a) {
1395         // non-power-of-2 scale
1396         for (int i = 0; i < RANGE/64-20; i++) {
1397             a[25*i+0]++;
1398             a[25*i+1]++;
1399             a[25*i+2]++;
1400             a[25*i+3]++;
1401             a[25*i+4]++;
1402             a[25*i+5]++;
1403             a[25*i+6]++;
1404             a[25*i+7]++;
1405             a[25*i+8]++;
1406             a[25*i+9]++;
1407             a[25*i+10]++;
1408             a[25*i+11]++;
1409             a[25*i+12]++;
1410             a[25*i+13]++;
1411             a[25*i+14]++;
1412             a[25*i+15]++;
1413         }
1414         return new Object[]{ a };
1415     }
1416 
1417     @Test
1418     // IR rules difficult because of modulo wrapping with offset after peeling.
1419     static Object[] test15cB(byte[] a) {
1420         // non-power-of-2 scale
1421         for (int i = 0; i < RANGE/64-20; i++) {
1422             a[19*i+0]++;
1423             a[19*i+1]++;
1424             a[19*i+2]++;
1425             a[19*i+3]++;
1426             a[19*i+4]++;
1427             a[19*i+5]++;
1428             a[19*i+6]++;
1429             a[19*i+7]++;
1430             a[19*i+8]++;
1431             a[19*i+9]++;
1432             a[19*i+10]++;
1433             a[19*i+11]++;
1434             a[19*i+12]++;
1435             a[19*i+13]++;
1436             a[19*i+14]++;
1437             a[19*i+15]++;
1438         }
1439         return new Object[]{ a };
1440     }
1441 
1442     @Test
1443     static Object[] test16a(byte[] a, short[] b) {
1444         // infinite loop issues
1445         for (int i = 0; i < RANGE/2-20; i++) {
1446             a[2*i+0]++;
1447             a[2*i+1]++;
1448             a[2*i+2]++;
1449             a[2*i+3]++;
1450             a[2*i+4]++;
1451             a[2*i+5]++;
1452             a[2*i+6]++;
1453             a[2*i+7]++;
1454             a[2*i+8]++;
1455             a[2*i+9]++;
1456             a[2*i+10]++;
1457             a[2*i+11]++;
1458             a[2*i+12]++;
1459             a[2*i+13]++;
1460             a[2*i+14]++;
1461 
1462             b[2*i+0]++;
1463             b[2*i+1]++;
1464             b[2*i+2]++;
1465             b[2*i+3]++;
1466         }
1467         return new Object[]{ a, b };
1468     }
1469 
1470     @Test
1471     static Object[] test16b(byte[] a) {
1472         // infinite loop issues
1473         for (int i = 0; i < RANGE/2-20; i++) {
1474             a[2*i+0]++;
1475             a[2*i+1]++;
1476             a[2*i+2]++;
1477             a[2*i+3]++;
1478             a[2*i+4]++;
1479             a[2*i+5]++;
1480             a[2*i+6]++;
1481             a[2*i+7]++;
1482             a[2*i+8]++;
1483             a[2*i+9]++;
1484             a[2*i+10]++;
1485             a[2*i+11]++;
1486             a[2*i+12]++;
1487             a[2*i+13]++;
1488             a[2*i+14]++;
1489         }
1490         return new Object[]{ a };
1491     }
1492 
1493     @Test
1494     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
1495                   IRNode.ADD_VL, "> 0",
1496                   IRNode.STORE_VECTOR, "> 0"},
1497         applyIfPlatform = {"64-bit", "true"},
1498         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1499     static Object[] test17a(long[] a) {
1500         // Unsafe: vectorizes with profiling (not xcomp)
1501         for (int i = 0; i < RANGE; i++) {
1502             long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1503             long v = UNSAFE.getLongUnaligned(a, adr);
1504             UNSAFE.putLongUnaligned(a, adr, v + 1);
1505         }
1506         return new Object[]{ a };
1507     }
1508 
1509     @Test
1510     // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs.
1511     static Object[] test17b(long[] a) {
1512         // Not alignable
1513         for (int i = 0; i < RANGE-1; i++) {
1514             long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1515             long v = UNSAFE.getLongUnaligned(a, adr);
1516             UNSAFE.putLongUnaligned(a, adr, v + 1);
1517         }
1518         return new Object[]{ a };
1519     }
1520 
1521     @Test
1522     @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1523                   IRNode.ADD_VL,        IRNode.VECTOR_SIZE_2, "> 0",
1524                   IRNode.STORE_VECTOR, "> 0"},
1525         applyIf = {"MaxVectorSize", ">=32"},
1526         applyIfPlatform = {"64-bit", "true"},
1527         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1528     static Object[] test17c(long[] a) {
1529         // Unsafe: aligned vectorizes
1530         for (int i = 0; i < RANGE-1; i+=4) {
1531             long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1532             long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1533             long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1534             UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1535             UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1536         }
1537         return new Object[]{ a };
1538     }
1539 
1540     @Test
1541     @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1542                   IRNode.ADD_VL,        IRNode.VECTOR_SIZE_2, "> 0",
1543                   IRNode.STORE_VECTOR, "> 0"},
1544         applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true", "rvv", "true"},
1545         applyIfPlatform = {"64-bit", "true"},
1546         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"})
1547     // Ensure vector width is large enough to fit 64 byte for longs:
1548     // The offsets are: 25, 33, 57, 65
1549     // In modulo 32:    25,  1, 25,  1  -> does not vectorize
1550     // In modulo 64:    25, 33, 57,  1  -> at least first pair vectorizes
1551     // This problem is because we compute modulo vector width in memory_alignment.
1552     @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0",
1553                   IRNode.ADD_VL, "= 0",
1554                   IRNode.STORE_VECTOR, "= 0"},
1555         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"},
1556         applyIfPlatform = {"64-bit", "true"},
1557         applyIf = {"AlignVector", "true"})
1558     static Object[] test17d(long[] a) {
1559         // Not alignable
1560         for (int i = 0; i < RANGE-1; i+=4) {
1561             long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1562             long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1563             long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1564             UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1565             UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1566         }
1567         return new Object[]{ a };
1568     }
1569 
1570     @Test
1571     static Object[] test18a(byte[] a, int[] b) {
1572         // scale = 0  -->  no iv
1573         for (int i = 0; i < RANGE; i++) {
1574             a[0] = 1;
1575             b[i] = 2;
1576             a[1] = 1;
1577         }
1578         return new Object[]{ a, b };
1579     }
1580 
1581     @Test
1582     static Object[] test18b(byte[] a, int[] b) {
1583         // scale = 0  -->  no iv
1584         for (int i = 0; i < RANGE; i++) {
1585             a[1] = 1;
1586             b[i] = 2;
1587             a[2] = 1;
1588         }
1589         return new Object[]{ a, b };
1590     }
1591 
1592     @Test
1593     static Object[] test19(int[] a, int[] b) {
1594         for (int i = 5000; i > 0; i--) {
1595             a[RANGE_FINAL - i] = b[RANGE_FINAL - i];
1596         }
1597         return new Object[]{ a, b };
1598     }
1599 
1600     @Test
1601     static Object[] test20(byte[] a) {
1602         // Example where it is easy to pass alignment check,
1603         // but used to fail the alignment calculation
1604         for (int i = 1; i < RANGE/2-50; i++) {
1605             a[2*i+0+30]++;
1606             a[2*i+1+30]++;
1607             a[2*i+2+30]++;
1608             a[2*i+3+30]++;
1609         }
1610         return new Object[]{ a };
1611     }
1612 }