1 /*
   2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package compiler.loopopts.superword;
  25 
  26 import compiler.lib.ir_framework.*;
  27 import jdk.test.lib.Utils;
  28 import jdk.test.whitebox.WhiteBox;
  29 import jdk.internal.misc.Unsafe;
  30 import java.lang.reflect.Array;
  31 import java.util.Map;
  32 import java.util.HashMap;
  33 import java.util.Random;
  34 import java.nio.ByteOrder;
  35 
  36 /*
  37  * @test id=NoAlignVector
  38  * @bug 8310190
  39  * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
  40  * @modules java.base/jdk.internal.misc
  41  * @library /test/lib /
  42  * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector
  43  */
  44 
  45 /*
  46  * @test id=AlignVector
  47  * @bug 8310190
  48  * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
  49  * @modules java.base/jdk.internal.misc
  50  * @library /test/lib /
  51  * @run driver compiler.loopopts.superword.TestAlignVector AlignVector
  52  */
  53 
  54 /*
  55  * @test id=VerifyAlignVector
  56  * @bug 8310190
  57  * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
  58  * @modules java.base/jdk.internal.misc
  59  * @library /test/lib /
  60  * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector
  61  */
  62 
  63 /*
  64  * @test id=NoAlignVector-COH
  65  * @bug 8310190
  66  * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
  67  * @modules java.base/jdk.internal.misc
  68  * @library /test/lib /
  69  * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector-COH
  70  */
  71 
  72 /*
  73  * @test id=VerifyAlignVector-COH
  74  * @bug 8310190
  75  * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
  76  * @modules java.base/jdk.internal.misc
  77  * @library /test/lib /
  78  * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector-COH
  79  */
  80 
  81 public class TestAlignVector {
  82     static int RANGE = 1024*8;
  83     static int RANGE_FINAL = 1024*8;
  84     private static final Unsafe UNSAFE = Unsafe.getUnsafe();
  85     private static final Random RANDOM = Utils.getRandomInstance();
  86 
  87     // Inputs
  88     byte[] aB;
  89     byte[] bB;
  90     byte mB = (byte)31;
  91     short[] aS;
  92     short[] bS;
  93     short mS = (short)0xF0F0;
  94     int[] aI;
  95     int[] bI;
  96     int mI = 0xF0F0F0F0;
  97     long[] aL;
  98     long[] bL;
  99     long mL = 0xF0F0F0F0F0F0F0F0L;
 100 
 101     // List of tests
 102     Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
 103 
 104     // List of gold, the results from the first run before compilation
 105     Map<String,Object[]> golds = new HashMap<String,Object[]>();
 106 
 107     interface TestFunction {
 108         Object[] run();
 109     }
 110 
 111     public static void main(String[] args) {
 112         TestFramework framework = new TestFramework(TestAlignVector.class);
 113         framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
 114                            "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250");
 115 
 116         switch (args[0]) {
 117             case "NoAlignVector"         -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); }
 118             case "AlignVector"           -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); }
 119             case "VerifyAlignVector"     -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
 120             case "NoAlignVector-COH"     -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); }
 121             case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
 122             default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
 123         }
 124         framework.start();
 125     }
 126 
 127     public TestAlignVector() {
 128         // Generate input once
 129         aB = generateB();
 130         bB = generateB();
 131         aS = generateS();
 132         bS = generateS();
 133         aI = generateI();
 134         bI = generateI();
 135         aL = generateL();
 136         bL = generateL();
 137 
 138         // Add all tests to list
 139         tests.put("test0",       () -> { return test0(aB.clone(), bB.clone(), mB); });
 140         tests.put("test1",       () -> { return test1(aB.clone(), bB.clone(), mB); });
 141         tests.put("test2",       () -> { return test2(aB.clone(), bB.clone(), mB); });
 142         tests.put("test3",       () -> { return test3(aB.clone(), bB.clone(), mB); });
 143         tests.put("test4",       () -> { return test4(aB.clone(), bB.clone(), mB); });
 144         tests.put("test5",       () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
 145         tests.put("test6",       () -> { return test6(aB.clone(), bB.clone(), mB); });
 146         tests.put("test7",       () -> { return test7(aS.clone(), bS.clone(), mS); });
 147         tests.put("test8",       () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
 148         tests.put("test8",       () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
 149         tests.put("test9",       () -> { return test9(aB.clone(), bB.clone(), mB); });
 150 
 151         tests.put("test10a",     () -> { return test10a(aB.clone(), bB.clone(), mB); });
 152         tests.put("test10b",     () -> { return test10b(aB.clone(), bB.clone(), mB); });
 153         tests.put("test10c",     () -> { return test10c(aS.clone(), bS.clone(), mS); });
 154         tests.put("test10d",     () -> { return test10d(aS.clone(), bS.clone(), mS); });
 155 
 156         tests.put("test11aB",    () -> { return test11aB(aB.clone(), bB.clone(), mB); });
 157         tests.put("test11aS",    () -> { return test11aS(aS.clone(), bS.clone(), mS); });
 158         tests.put("test11aI",    () -> { return test11aI(aI.clone(), bI.clone(), mI); });
 159         tests.put("test11aL",    () -> { return test11aL(aL.clone(), bL.clone(), mL); });
 160 
 161         tests.put("test11bB",    () -> { return test11bB(aB.clone(), bB.clone(), mB); });
 162         tests.put("test11bS",    () -> { return test11bS(aS.clone(), bS.clone(), mS); });
 163         tests.put("test11bI",    () -> { return test11bI(aI.clone(), bI.clone(), mI); });
 164         tests.put("test11bL",    () -> { return test11bL(aL.clone(), bL.clone(), mL); });
 165 
 166         tests.put("test11cB",    () -> { return test11cB(aB.clone(), bB.clone(), mB); });
 167         tests.put("test11cS",    () -> { return test11cS(aS.clone(), bS.clone(), mS); });
 168         tests.put("test11cI",    () -> { return test11cI(aI.clone(), bI.clone(), mI); });
 169         tests.put("test11cL",    () -> { return test11cL(aL.clone(), bL.clone(), mL); });
 170 
 171         tests.put("test11dB",    () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
 172         tests.put("test11dS",    () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
 173         tests.put("test11dI",    () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
 174         tests.put("test11dL",    () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
 175 
 176         tests.put("test12",      () -> { return test12(aB.clone(), bB.clone(), mB); });
 177 
 178         tests.put("test13aIL",   () -> { return test13aIL(aI.clone(), aL.clone()); });
 179         tests.put("test13aIB",   () -> { return test13aIB(aI.clone(), aB.clone()); });
 180         tests.put("test13aIS",   () -> { return test13aIS(aI.clone(), aS.clone()); });
 181         tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
 182 
 183         tests.put("test13bIL",   () -> { return test13bIL(aI.clone(), aL.clone()); });
 184         tests.put("test13bIB",   () -> { return test13bIB(aI.clone(), aB.clone()); });
 185         tests.put("test13bIS",   () -> { return test13bIS(aI.clone(), aS.clone()); });
 186         tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
 187 
 188         tests.put("test14aB",    () -> { return test14aB(aB.clone()); });
 189         tests.put("test14bB",    () -> { return test14bB(aB.clone()); });
 190         tests.put("test14cB",    () -> { return test14cB(aB.clone()); });
 191         tests.put("test14dB",    () -> { return test14dB(aB.clone()); });
 192         tests.put("test14eB",    () -> { return test14eB(aB.clone()); });
 193         tests.put("test14fB",    () -> { return test14fB(aB.clone()); });
 194 
 195         tests.put("test15aB",    () -> { return test15aB(aB.clone()); });
 196         tests.put("test15bB",    () -> { return test15bB(aB.clone()); });
 197         tests.put("test15cB",    () -> { return test15cB(aB.clone()); });
 198 
 199         tests.put("test16a",     () -> { return test16a(aB.clone(), aS.clone()); });
 200         tests.put("test16b",     () -> { return test16b(aB.clone()); });
 201 
 202         tests.put("test17a",     () -> { return test17a(aL.clone()); });
 203         tests.put("test17b",     () -> { return test17b(aL.clone()); });
 204         tests.put("test17c",     () -> { return test17c(aL.clone()); });
 205         tests.put("test17d",     () -> { return test17d(aL.clone()); });
 206 
 207         tests.put("test18a",     () -> { return test18a(aB.clone(), aI.clone()); });
 208         tests.put("test18b",     () -> { return test18b(aB.clone(), aI.clone()); });
 209 
 210         tests.put("test19",      () -> { return test19(aI.clone(), bI.clone()); });
 211         tests.put("test20",      () -> { return test20(aB.clone()); });
 212 
 213         // Compute gold value for all test methods before compilation
 214         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 215             String name = entry.getKey();
 216             TestFunction test = entry.getValue();
 217             Object[] gold = test.run();
 218             golds.put(name, gold);
 219         }
 220     }
 221 
 222     @Warmup(100)
 223     @Run(test = {"test0",
 224                  "test1",
 225                  "test2",
 226                  "test3",
 227                  "test4",
 228                  "test5",
 229                  "test6",
 230                  "test7",
 231                  "test8",
 232                  "test9",
 233                  "test10a",
 234                  "test10b",
 235                  "test10c",
 236                  "test10d",
 237                  "test11aB",
 238                  "test11aS",
 239                  "test11aI",
 240                  "test11aL",
 241                  "test11bB",
 242                  "test11bS",
 243                  "test11bI",
 244                  "test11bL",
 245                  "test11cB",
 246                  "test11cS",
 247                  "test11cI",
 248                  "test11cL",
 249                  "test11dB",
 250                  "test11dS",
 251                  "test11dI",
 252                  "test11dL",
 253                  "test12",
 254                  "test13aIL",
 255                  "test13aIB",
 256                  "test13aIS",
 257                  "test13aBSIL",
 258                  "test13bIL",
 259                  "test13bIB",
 260                  "test13bIS",
 261                  "test13bBSIL",
 262                  "test14aB",
 263                  "test14bB",
 264                  "test14cB",
 265                  "test14dB",
 266                  "test14eB",
 267                  "test14fB",
 268                  "test15aB",
 269                  "test15bB",
 270                  "test15cB",
 271                  "test16a",
 272                  "test16b",
 273                  "test17a",
 274                  "test17b",
 275                  "test17c",
 276                  "test17d",
 277                  "test18a",
 278                  "test18b",
 279                  "test19",
 280                  "test20"})
 281     public void runTests() {
 282         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 283             String name = entry.getKey();
 284             TestFunction test = entry.getValue();
 285             // Recall gold value from before compilation
 286             Object[] gold = golds.get(name);
 287             // Compute new result
 288             Object[] result = test.run();
 289             // Compare gold and new result
 290             verify(name, gold, result);
 291         }
 292     }
 293 
 294     static byte[] generateB() {
 295         byte[] a = new byte[RANGE];
 296         for (int i = 0; i < a.length; i++) {
 297             a[i] = (byte)RANDOM.nextInt();
 298         }
 299         return a;
 300     }
 301 
 302     static short[] generateS() {
 303         short[] a = new short[RANGE];
 304         for (int i = 0; i < a.length; i++) {
 305             a[i] = (short)RANDOM.nextInt();
 306         }
 307         return a;
 308     }
 309 
 310     static int[] generateI() {
 311         int[] a = new int[RANGE];
 312         for (int i = 0; i < a.length; i++) {
 313             a[i] = RANDOM.nextInt();
 314         }
 315         return a;
 316     }
 317 
 318     static long[] generateL() {
 319         long[] a = new long[RANGE];
 320         for (int i = 0; i < a.length; i++) {
 321             a[i] = RANDOM.nextLong();
 322         }
 323         return a;
 324     }
 325 
 326     static void verify(String name, Object[] gold, Object[] result) {
 327         if (gold.length != result.length) {
 328             throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
 329                                        gold.length + ", result.length = " + result.length);
 330         }
 331         for (int i = 0; i < gold.length; i++) {
 332             Object g = gold[i];
 333             Object r = result[i];
 334             if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
 335                 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
 336                                            " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 337                                            " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 338             }
 339             if (g == r) {
 340                 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
 341                                            " gold[" + i + "] == result[" + i + "]");
 342             }
 343             if (Array.getLength(g) != Array.getLength(r)) {
 344                     throw new RuntimeException("verify " + name + ": arrays must have same length:" +
 345                                            " gold[" + i + "].length = " + Array.getLength(g) +
 346                                            " result[" + i + "].length = " + Array.getLength(r));
 347             }
 348             Class c = g.getClass().getComponentType();
 349             if (c == byte.class) {
 350                 verifyB(name, i, (byte[])g, (byte[])r);
 351             } else if (c == short.class) {
 352                 verifyS(name, i, (short[])g, (short[])r);
 353             } else if (c == int.class) {
 354                 verifyI(name, i, (int[])g, (int[])r);
 355             } else if (c == long.class) {
 356                 verifyL(name, i, (long[])g, (long[])r);
 357             } else {
 358                 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
 359                                        " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 360                                        " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 361             }
 362         }
 363     }
 364 
 365     static void verifyB(String name, int i, byte[] g, byte[] r) {
 366         for (int j = 0; j < g.length; j++) {
 367             if (g[j] != r[j]) {
 368                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 369                                            " gold[" + i + "][" + j + "] = " + g[j] +
 370                                            " result[" + i + "][" + j + "] = " + r[j]);
 371             }
 372         }
 373     }
 374 
 375     static void verifyS(String name, int i, short[] g, short[] r) {
 376         for (int j = 0; j < g.length; j++) {
 377             if (g[j] != r[j]) {
 378                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 379                                            " gold[" + i + "][" + j + "] = " + g[j] +
 380                                            " result[" + i + "][" + j + "] = " + r[j]);
 381             }
 382         }
 383     }
 384 
 385     static void verifyI(String name, int i, int[] g, int[] r) {
 386         for (int j = 0; j < g.length; j++) {
 387             if (g[j] != r[j]) {
 388                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 389                                            " gold[" + i + "][" + j + "] = " + g[j] +
 390                                            " result[" + i + "][" + j + "] = " + r[j]);
 391             }
 392         }
 393     }
 394 
 395     static void verifyL(String name, int i, long[] g, long[] r) {
 396         for (int j = 0; j < g.length; j++) {
 397             if (g[j] != r[j]) {
 398                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 399                                            " gold[" + i + "][" + j + "] = " + g[j] +
 400                                            " result[" + i + "][" + j + "] = " + r[j]);
 401             }
 402         }
 403     }
 404 
 405     @Test
 406     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 407                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 408                   IRNode.STORE_VECTOR, "> 0"},
 409         applyIf = {"MaxVectorSize", ">=8"},
 410         applyIfPlatform = {"64-bit", "true"},
 411         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 412     static Object[] test0(byte[] a, byte[] b, byte mask) {
 413         for (int i = 0; i < RANGE; i+=8) {
 414             // Safe to vectorize with AlignVector
 415             b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
 416             b[i+1] = (byte)(a[i+1] & mask);
 417             b[i+2] = (byte)(a[i+2] & mask);
 418             b[i+3] = (byte)(a[i+3] & mask);
 419         }
 420         return new Object[]{ a, b };
 421     }
 422 
 423     @Test
 424     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 425                   IRNode.AND_VB, "> 0",
 426                   IRNode.STORE_VECTOR, "> 0"},
 427         applyIfPlatform = {"64-bit", "true"},
 428         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
 429     static Object[] test1(byte[] a, byte[] b, byte mask) {
 430         for (int i = 0; i < RANGE; i+=8) {
 431             b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8
 432             b[i+1] = (byte)(a[i+1] & mask);
 433             b[i+2] = (byte)(a[i+2] & mask);
 434             b[i+3] = (byte)(a[i+3] & mask);
 435             b[i+4] = (byte)(a[i+4] & mask);
 436             b[i+5] = (byte)(a[i+5] & mask);
 437             b[i+6] = (byte)(a[i+6] & mask);
 438             b[i+7] = (byte)(a[i+7] & mask);
 439         }
 440         return new Object[]{ a, b };
 441     }
 442 
 443     @Test
 444     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 445                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 446                   IRNode.STORE_VECTOR, "> 0"},
 447         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 448         applyIfPlatform = {"64-bit", "true"},
 449         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 450     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 451                   IRNode.AND_VB, "= 0",
 452                   IRNode.STORE_VECTOR, "= 0"},
 453         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 454         applyIfPlatform = {"64-bit", "true"},
 455         applyIf = {"AlignVector", "true"})
 456     static Object[] test2(byte[] a, byte[] b, byte mask) {
 457         for (int i = 0; i < RANGE; i+=8) {
 458             // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
 459             b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
 460             b[i+4] = (byte)(a[i+4] & mask);
 461             b[i+5] = (byte)(a[i+5] & mask);
 462             b[i+6] = (byte)(a[i+6] & mask);
 463         }
 464         return new Object[]{ a, b };
 465     }
 466 
 467     @Test
 468     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 469                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 470                   IRNode.STORE_VECTOR, "> 0"},
 471         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 472         applyIfPlatform = {"64-bit", "true"},
 473         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 474     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 475                   IRNode.AND_VB, "= 0",
 476                   IRNode.STORE_VECTOR, "= 0"},
 477         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 478         applyIfPlatform = {"64-bit", "true"},
 479         applyIf = {"AlignVector", "true"})
 480     static Object[] test3(byte[] a, byte[] b, byte mask) {
 481         for (int i = 0; i < RANGE; i+=8) {
 482             // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
 483 
 484             // Problematic for AlignVector
 485             b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0
 486 
 487             b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes
 488             b[i+4] = (byte)(a[i+4] & mask);
 489             b[i+5] = (byte)(a[i+5] & mask);
 490             b[i+6] = (byte)(a[i+6] & mask);
 491         }
 492         return new Object[]{ a, b };
 493     }
 494 
 495     @Test
 496     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 497                   IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0",
 498                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 499                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_8, "> 0",
 500                   IRNode.STORE_VECTOR, "> 0"},
 501         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 502         applyIfPlatform = {"64-bit", "true"},
 503         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
 504     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 505                   IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
 506                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 507                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_8, "= 0",// unaligned
 508                   IRNode.STORE_VECTOR, "> 0"},
 509         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 510         applyIfPlatform = {"64-bit", "true"},
 511         applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"})
 512     static Object[] test4(byte[] a, byte[] b, byte mask) {
 513         for (int i = 0; i < RANGE/16; i++) {
 514             // Problematic for AlignVector
 515             b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned
 516             b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask);
 517             b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask);
 518             b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask);
 519 
 520             b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned
 521             b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask);
 522             b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask);
 523             b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask);
 524             b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask);
 525             b[i*16 + 10] = (byte)(a[i*16 + 10] & mask);
 526             b[i*16 + 11] = (byte)(a[i*16 + 11] & mask);
 527             b[i*16 + 12] = (byte)(a[i*16 + 12] & mask);
 528         }
 529         return new Object[]{ a, b };
 530     }
 531 
 532     @Test
 533     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 534                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 535                   IRNode.STORE_VECTOR, "> 0"},
 536         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 537         applyIfPlatform = {"64-bit", "true"},
 538         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 539     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 540                   IRNode.AND_VB, "= 0",
 541                   IRNode.STORE_VECTOR, "= 0"},
 542         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 543         applyIfPlatform = {"64-bit", "true"},
 544         applyIf = {"AlignVector", "true"})
 545     static Object[] test5(byte[] a, byte[] b, byte mask, int inv) {
 546         for (int i = 0; i < RANGE; i+=8) {
 547             // Cannot align with AlignVector because of invariant
 548             b[i+inv+0] = (byte)(a[i+inv+0] & mask);
 549 
 550             b[i+inv+3] = (byte)(a[i+inv+3] & mask);
 551             b[i+inv+4] = (byte)(a[i+inv+4] & mask);
 552             b[i+inv+5] = (byte)(a[i+inv+5] & mask);
 553             b[i+inv+6] = (byte)(a[i+inv+6] & mask);
 554         }
 555         return new Object[]{ a, b };
 556     }
 557 
 558     @Test
 559     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 560                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 561                   IRNode.STORE_VECTOR, "> 0"},
 562         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 563         applyIfPlatform = {"64-bit", "true"},
 564         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 565     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 566                   IRNode.AND_VB, "= 0",
 567                   IRNode.STORE_VECTOR, "= 0"},
 568         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 569         applyIfPlatform = {"64-bit", "true"},
 570         applyIf = {"AlignVector", "true"})
 571     static Object[] test6(byte[] a, byte[] b, byte mask) {
 572         for (int i = 0; i < RANGE/8; i+=2) {
 573             // Cannot align with AlignVector because offset is odd
 574             b[i*4+0] = (byte)(a[i*4+0] & mask);
 575 
 576             b[i*4+3] = (byte)(a[i*4+3] & mask);
 577             b[i*4+4] = (byte)(a[i*4+4] & mask);
 578             b[i*4+5] = (byte)(a[i*4+5] & mask);
 579             b[i*4+6] = (byte)(a[i*4+6] & mask);
 580         }
 581         return new Object[]{ a, b };
 582     }
 583 
 584     @Test
 585     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 586                   IRNode.AND_VS,        IRNode.VECTOR_SIZE_4, "> 0",
 587                   IRNode.STORE_VECTOR, "> 0"},
 588         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"},
 589         applyIfPlatform = {"64-bit", "true"},
 590         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
 591     @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
 592                   IRNode.AND_VS, "= 0",
 593                   IRNode.STORE_VECTOR, "= 0"},
 594         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 595         applyIfPlatform = {"64-bit", "true"},
 596         applyIf = {"AlignVector", "true"})
 597     static Object[] test7(short[] a, short[] b, short mask) {
 598         for (int i = 0; i < RANGE/8; i+=2) {
 599             // Cannot align with AlignVector because offset is odd
 600             b[i*4+0] = (short)(a[i*4+0] & mask);
 601 
 602             b[i*4+3] = (short)(a[i*4+3] & mask);
 603             b[i*4+4] = (short)(a[i*4+4] & mask);
 604             b[i*4+5] = (short)(a[i*4+5] & mask);
 605             b[i*4+6] = (short)(a[i*4+6] & mask);
 606         }
 607         return new Object[]{ a, b };
 608     }
 609 
 610     @Test
 611     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 612                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 613                   IRNode.STORE_VECTOR, "> 0"},
 614         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 615         applyIfPlatform = {"64-bit", "true"},
 616         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 617     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 618                   IRNode.AND_VB, "= 0",
 619                   IRNode.STORE_VECTOR, "= 0"},
 620         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 621         applyIfPlatform = {"64-bit", "true"},
 622         applyIf = {"AlignVector", "true"})
 623     static Object[] test8(byte[] a, byte[] b, byte mask, int init) {
 624         for (int i = init; i < RANGE; i+=8) {
 625             // Cannot align with AlignVector because of invariant (variable init becomes invar)
 626             b[i+0] = (byte)(a[i+0] & mask);
 627 
 628             b[i+3] = (byte)(a[i+3] & mask);
 629             b[i+4] = (byte)(a[i+4] & mask);
 630             b[i+5] = (byte)(a[i+5] & mask);
 631             b[i+6] = (byte)(a[i+6] & mask);
 632         }
 633         return new Object[]{ a, b };
 634     }
 635 
 636     @Test
 637     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 638                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 639                   IRNode.STORE_VECTOR, "> 0"},
 640         applyIf = {"MaxVectorSize", ">=8"},
 641         applyIfPlatform = {"64-bit", "true"},
 642         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 643     static Object[] test9(byte[] a, byte[] b, byte mask) {
 644         // known non-zero init value does not affect offset, but has implicit effect on iv
 645         for (int i = 13; i < RANGE-8; i+=8) {
 646             b[i+0] = (byte)(a[i+0] & mask);
 647 
 648             b[i+3] = (byte)(a[i+3] & mask);
 649             b[i+4] = (byte)(a[i+4] & mask);
 650             b[i+5] = (byte)(a[i+5] & mask);
 651             b[i+6] = (byte)(a[i+6] & mask);
 652         }
 653         return new Object[]{ a, b };
 654     }
 655 
 656     @Test
 657     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 658                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 659                   IRNode.STORE_VECTOR, "> 0"},
 660         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 661         applyIfPlatform = {"64-bit", "true"},
 662         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
 663     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 664                   IRNode.AND_VB, "= 0",
 665                   IRNode.STORE_VECTOR, "= 0"},
 666         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 667         applyIfPlatform = {"64-bit", "true"},
 668         applyIf = {"AlignVector", "true"})
 669     static Object[] test10a(byte[] a, byte[] b, byte mask) {
 670         // This is not alignable with pre-loop, because of odd init.
 671         for (int i = 3; i < RANGE-8; i+=8) {
 672             b[i+0] = (byte)(a[i+0] & mask);
 673             b[i+1] = (byte)(a[i+1] & mask);
 674             b[i+2] = (byte)(a[i+2] & mask);
 675             b[i+3] = (byte)(a[i+3] & mask);
 676         }
 677         return new Object[]{ a, b };
 678     }
 679 
 680     @Test
 681     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 682                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 683                   IRNode.STORE_VECTOR, "> 0"},
 684         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 685         applyIfPlatform = {"64-bit", "true"},
 686         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
 687     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 688                   IRNode.AND_VB, "= 0",
 689                   IRNode.STORE_VECTOR, "= 0"},
 690         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 691         applyIfPlatform = {"64-bit", "true"},
 692         applyIf = {"AlignVector", "true"})
 693     static Object[] test10b(byte[] a, byte[] b, byte mask) {
 694         // This is not alignable with pre-loop, because of odd init.
 695         // Seems not correctly handled.
 696         for (int i = 13; i < RANGE-8; i+=8) {
 697             b[i+0] = (byte)(a[i+0] & mask);
 698             b[i+1] = (byte)(a[i+1] & mask);
 699             b[i+2] = (byte)(a[i+2] & mask);
 700             b[i+3] = (byte)(a[i+3] & mask);
 701         }
 702         return new Object[]{ a, b };
 703     }
 704 
 705     @Test
 706     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 707                   IRNode.AND_VS,        IRNode.VECTOR_SIZE_4, "> 0",
 708                   IRNode.STORE_VECTOR, "> 0"},
 709         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 710         applyIfPlatform = {"64-bit", "true"},
 711         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
 712     @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
 713                   IRNode.AND_VS, "= 0",
 714                   IRNode.STORE_VECTOR, "= 0"},
 715         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 716         applyIfPlatform = {"64-bit", "true"},
 717         applyIf = {"AlignVector", "true"})
 718     static Object[] test10c(short[] a, short[] b, short mask) {
 719         // This is not alignable with pre-loop, because of odd init.
 720         // Seems not correctly handled with MaxVectorSize >= 32.
 721         for (int i = 13; i < RANGE-8; i+=8) {
 722             b[i+0] = (short)(a[i+0] & mask);
 723             b[i+1] = (short)(a[i+1] & mask);
 724             b[i+2] = (short)(a[i+2] & mask);
 725             b[i+3] = (short)(a[i+3] & mask);
 726         }
 727         return new Object[]{ a, b };
 728     }
 729 
 730     @Test
 731     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 732                   IRNode.AND_VS,        IRNode.VECTOR_SIZE_4, "> 0",
 733                   IRNode.STORE_VECTOR, "> 0"},
 734         applyIf = {"MaxVectorSize", ">=16"},
 735         applyIfPlatform = {"64-bit", "true"},
 736         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
 737     static Object[] test10d(short[] a, short[] b, short mask) {
 738         for (int i = 13; i < RANGE-16; i+=8) {
 739             // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16
 740             b[i+0+3] = (short)(a[i+0+3] & mask);
 741             b[i+1+3] = (short)(a[i+1+3] & mask);
 742             b[i+2+3] = (short)(a[i+2+3] & mask);
 743             b[i+3+3] = (short)(a[i+3+3] & mask);
 744         }
 745         return new Object[]{ a, b };
 746     }
 747 
 748     @Test
 749     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 750                   IRNode.AND_VB, "> 0",
 751                   IRNode.STORE_VECTOR, "> 0"},
 752         applyIfPlatform = {"64-bit", "true"},
 753         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 754     static Object[] test11aB(byte[] a, byte[] b, byte mask) {
 755         for (int i = 0; i < RANGE; i++) {
 756             // always alignable
 757             b[i+0] = (byte)(a[i+0] & mask);
 758         }
 759         return new Object[]{ a, b };
 760     }
 761 
 762     @Test
 763     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 764                   IRNode.AND_VS, "> 0",
 765                   IRNode.STORE_VECTOR, "> 0"},
 766         applyIfPlatform = {"64-bit", "true"},
 767         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 768     static Object[] test11aS(short[] a, short[] b, short mask) {
 769         for (int i = 0; i < RANGE; i++) {
 770             // always alignable
 771             b[i+0] = (short)(a[i+0] & mask);
 772         }
 773         return new Object[]{ a, b };
 774     }
 775 
 776     @Test
 777     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
 778                   IRNode.AND_VI, "> 0",
 779                   IRNode.STORE_VECTOR, "> 0"},
 780         applyIfPlatform = {"64-bit", "true"},
 781         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 782     static Object[] test11aI(int[] a, int[] b, int mask) {
 783         for (int i = 0; i < RANGE; i++) {
 784             // always alignable
 785             b[i+0] = (int)(a[i+0] & mask);
 786         }
 787         return new Object[]{ a, b };
 788     }
 789 
 790     @Test
 791     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
 792                   IRNode.AND_VL, "> 0",
 793                   IRNode.STORE_VECTOR, "> 0"},
 794         applyIfPlatform = {"64-bit", "true"},
 795         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 796     static Object[] test11aL(long[] a, long[] b, long mask) {
 797         for (int i = 0; i < RANGE; i++) {
 798             // always alignable
 799             b[i+0] = (long)(a[i+0] & mask);
 800         }
 801         return new Object[]{ a, b };
 802     }
 803 
 804     @Test
 805     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 806                   IRNode.AND_VB, "> 0",
 807                   IRNode.STORE_VECTOR, "> 0"},
 808         applyIfPlatform = {"64-bit", "true"},
 809         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 810     static Object[] test11bB(byte[] a, byte[] b, byte mask) {
 811         for (int i = 1; i < RANGE; i++) {
 812             // always alignable
 813             b[i+0] = (byte)(a[i+0] & mask);
 814         }
 815         return new Object[]{ a, b };
 816     }
 817 
 818     @Test
 819     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 820                   IRNode.AND_VS, "> 0",
 821                   IRNode.STORE_VECTOR, "> 0"},
 822         applyIfPlatform = {"64-bit", "true"},
 823         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 824     static Object[] test11bS(short[] a, short[] b, short mask) {
 825         for (int i = 1; i < RANGE; i++) {
 826             // always alignable
 827             b[i+0] = (short)(a[i+0] & mask);
 828         }
 829         return new Object[]{ a, b };
 830     }
 831 
 832     @Test
 833     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
 834                   IRNode.AND_VI, "> 0",
 835                   IRNode.STORE_VECTOR, "> 0"},
 836         applyIfPlatform = {"64-bit", "true"},
 837         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 838     static Object[] test11bI(int[] a, int[] b, int mask) {
 839         for (int i = 1; i < RANGE; i++) {
 840             // always alignable
 841             b[i+0] = (int)(a[i+0] & mask);
 842         }
 843         return new Object[]{ a, b };
 844     }
 845 
 846     @Test
 847     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
 848                   IRNode.AND_VL, "> 0",
 849                   IRNode.STORE_VECTOR, "> 0"},
 850         applyIfPlatform = {"64-bit", "true"},
 851         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 852     static Object[] test11bL(long[] a, long[] b, long mask) {
 853         for (int i = 1; i < RANGE; i++) {
 854             // always alignable
 855             b[i+0] = (long)(a[i+0] & mask);
 856         }
 857         return new Object[]{ a, b };
 858     }
 859 
 860     @Test
 861     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 862                   IRNode.AND_VB, "> 0",
 863                   IRNode.STORE_VECTOR, "> 0"},
 864         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 865         applyIfPlatform = {"64-bit", "true"},
 866         applyIf = {"AlignVector", "false"})
 867     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 868                   IRNode.AND_VB, "= 0",
 869                   IRNode.STORE_VECTOR, "= 0"},
 870         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 871         applyIfPlatform = {"64-bit", "true"},
 872         applyIf = {"AlignVector", "true"})
 873     static Object[] test11cB(byte[] a, byte[] b, byte mask) {
 874         for (int i = 1; i < RANGE-1; i++) {
 875             // 1 byte offset -> not alignable with AlignVector
 876             b[i+0] = (byte)(a[i+1] & mask);
 877         }
 878         return new Object[]{ a, b };
 879     }
 880 
 881     @Test
 882     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 883                   IRNode.AND_VS, "> 0",
 884                   IRNode.STORE_VECTOR, "> 0"},
 885         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 886         applyIfPlatform = {"64-bit", "true"},
 887         applyIf = {"AlignVector", "false"})
 888     @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
 889                   IRNode.AND_VS, "= 0",
 890                   IRNode.STORE_VECTOR, "= 0"},
 891         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 892         applyIfPlatform = {"64-bit", "true"},
 893         applyIf = {"AlignVector", "true"})
 894     static Object[] test11cS(short[] a, short[] b, short mask) {
 895         for (int i = 1; i < RANGE-1; i++) {
 896             // 2 byte offset -> not alignable with AlignVector
 897             b[i+0] = (short)(a[i+1] & mask);
 898         }
 899         return new Object[]{ a, b };
 900     }
 901 
 902     @Test
 903     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
 904                   IRNode.AND_VI, "> 0",
 905                   IRNode.STORE_VECTOR, "> 0"},
 906         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 907         applyIfPlatform = {"64-bit", "true"},
 908         applyIf = {"AlignVector", "false"})
 909     @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
 910                   IRNode.AND_VI, "= 0",
 911                   IRNode.STORE_VECTOR, "= 0"},
 912         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 913         applyIfPlatform = {"64-bit", "true"},
 914         applyIf = {"AlignVector", "true"})
 915     static Object[] test11cI(int[] a, int[] b, int mask) {
 916         for (int i = 1; i < RANGE-1; i++) {
 917             // 4 byte offset -> not alignable with AlignVector
 918             b[i+0] = (int)(a[i+1] & mask);
 919         }
 920         return new Object[]{ a, b };
 921     }
 922 
 923     @Test
 924     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
 925                   IRNode.AND_VL, "> 0",
 926                   IRNode.STORE_VECTOR, "> 0"},
 927         applyIfPlatform = {"64-bit", "true"},
 928         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 929     static Object[] test11cL(long[] a, long[] b, long mask) {
 930         for (int i = 1; i < RANGE-1; i++) {
 931             // always alignable (8 byte offset)
 932             b[i+0] = (long)(a[i+1] & mask);
 933         }
 934         return new Object[]{ a, b };
 935     }
 936 
 937     @Test
 938     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 939                   IRNode.AND_VB, "> 0",
 940                   IRNode.STORE_VECTOR, "> 0"},
 941         applyIfPlatform = {"64-bit", "true"},
 942         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 943     static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) {
 944         for (int i = 0; i < RANGE; i++) {
 945             b[i+0+invar] = (byte)(a[i+0+invar] & mask);
 946         }
 947         return new Object[]{ a, b };
 948     }
 949 
 950     @Test
 951     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 952                   IRNode.AND_VS, "> 0",
 953                   IRNode.STORE_VECTOR, "> 0"},
 954         applyIfPlatform = {"64-bit", "true"},
 955         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 956     static Object[] test11dS(short[] a, short[] b, short mask, int invar) {
 957         for (int i = 0; i < RANGE; i++) {
 958             b[i+0+invar] = (short)(a[i+0+invar] & mask);
 959         }
 960         return new Object[]{ a, b };
 961     }
 962 
 963     @Test
 964     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
 965                   IRNode.AND_VI, "> 0",
 966                   IRNode.STORE_VECTOR, "> 0"},
 967         applyIfPlatform = {"64-bit", "true"},
 968         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 969     static Object[] test11dI(int[] a, int[] b, int mask, int invar) {
 970         for (int i = 0; i < RANGE; i++) {
 971             b[i+0+invar] = (int)(a[i+0+invar] & mask);
 972         }
 973         return new Object[]{ a, b };
 974     }
 975 
 976     @Test
 977     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
 978                   IRNode.AND_VL, "> 0",
 979                   IRNode.STORE_VECTOR, "> 0"},
 980         applyIfPlatform = {"64-bit", "true"},
 981         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 982     static Object[] test11dL(long[] a, long[] b, long mask, int invar) {
 983         for (int i = 0; i < RANGE; i++) {
 984             b[i+0+invar] = (long)(a[i+0+invar] & mask);
 985         }
 986         return new Object[]{ a, b };
 987     }
 988 
 989     @Test
 990     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 991                   IRNode.AND_VB, "= 0",
 992                   IRNode.STORE_VECTOR, "= 0"},
 993         applyIfPlatform = {"64-bit", "true"},
 994         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 995     static Object[] test12(byte[] a, byte[] b, byte mask) {
 996         for (int i = 0; i < RANGE/16; i++) {
 997             // Currently does not vectorize at all
 998             b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask);
 999             b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask);
1000             b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask);
1001             b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask);
1002         }
1003         return new Object[]{ a, b };
1004     }
1005 
1006     @Test
1007     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1008                   IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1009                   IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1010                   IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1011                   IRNode.STORE_VECTOR, "> 0"},
1012         applyIfPlatform = {"64-bit", "true"},
1013         applyIfCPUFeatureOr = {"avx2", "true"})
1014     // require avx to ensure vectors are larger than what unrolling produces
1015     static Object[] test13aIL(int[] a, long[] b) {
1016         for (int i = 0; i < RANGE; i++) {
1017             a[i]++;
1018             b[i]++;
1019         }
1020         return new Object[]{ a, b };
1021     }
1022 
1023     @Test
1024     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1025                   IRNode.LOAD_VECTOR_I, "> 0",
1026                   IRNode.ADD_VB, "> 0",
1027                   IRNode.ADD_VI, "> 0",
1028                   IRNode.STORE_VECTOR, "> 0"},
1029         applyIfPlatform = {"64-bit", "true"},
1030         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1031     static Object[] test13aIB(int[] a, byte[] b) {
1032         for (int i = 0; i < RANGE; i++) {
1033             a[i]++;
1034             b[i]++;
1035         }
1036         return new Object[]{ a, b };
1037     }
1038 
1039     @Test
1040     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1041                   IRNode.LOAD_VECTOR_S, "> 0",
1042                   IRNode.ADD_VI, "> 0",
1043                   IRNode.ADD_VS, "> 0",
1044                   IRNode.STORE_VECTOR, "> 0"},
1045         applyIfPlatform = {"64-bit", "true"},
1046         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1047     static Object[] test13aIS(int[] a, short[] b) {
1048         for (int i = 0; i < RANGE; i++) {
1049             a[i]++;
1050             b[i]++;
1051         }
1052         return new Object[]{ a, b };
1053     }
1054 
1055     @Test
1056     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1057                   IRNode.LOAD_VECTOR_S, "> 0",
1058                   IRNode.LOAD_VECTOR_I, "> 0",
1059                   IRNode.LOAD_VECTOR_L, "> 0",
1060                   IRNode.ADD_VB, "> 0",
1061                   IRNode.ADD_VS, "> 0",
1062                   IRNode.ADD_VI, "> 0",
1063                   IRNode.ADD_VL, "> 0",
1064                   IRNode.STORE_VECTOR, "> 0"},
1065         applyIfPlatform = {"64-bit", "true"},
1066         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1067     static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1068         for (int i = 0; i < RANGE; i++) {
1069             a[i]++;
1070             b[i]++;
1071             c[i]++;
1072             d[i]++;
1073         }
1074         return new Object[]{ a, b, c, d };
1075     }
1076 
1077     @Test
1078     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1079                   IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1080                   IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1081                   IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1082                   IRNode.STORE_VECTOR, "> 0"},
1083         applyIfPlatform = {"64-bit", "true"},
1084         applyIfCPUFeatureOr = {"avx2", "true"})
1085     // require avx to ensure vectors are larger than what unrolling produces
1086     static Object[] test13bIL(int[] a, long[] b) {
1087         for (int i = 1; i < RANGE; i++) {
1088             a[i]++;
1089             b[i]++;
1090         }
1091         return new Object[]{ a, b };
1092     }
1093 
1094     @Test
1095     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1096                   IRNode.LOAD_VECTOR_I, "> 0",
1097                   IRNode.ADD_VB, "> 0",
1098                   IRNode.ADD_VI, "> 0",
1099                   IRNode.STORE_VECTOR, "> 0"},
1100         applyIfPlatform = {"64-bit", "true"},
1101         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1102     static Object[] test13bIB(int[] a, byte[] b) {
1103         for (int i = 1; i < RANGE; i++) {
1104             a[i]++;
1105             b[i]++;
1106         }
1107         return new Object[]{ a, b };
1108     }
1109 
1110     @Test
1111     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1112                   IRNode.LOAD_VECTOR_S, "> 0",
1113                   IRNode.ADD_VI, "> 0",
1114                   IRNode.ADD_VS, "> 0",
1115                   IRNode.STORE_VECTOR, "> 0"},
1116         applyIfPlatform = {"64-bit", "true"},
1117         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1118     static Object[] test13bIS(int[] a, short[] b) {
1119         for (int i = 1; i < RANGE; i++) {
1120             a[i]++;
1121             b[i]++;
1122         }
1123         return new Object[]{ a, b };
1124     }
1125 
1126     @Test
1127     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1128                   IRNode.LOAD_VECTOR_S, "> 0",
1129                   IRNode.LOAD_VECTOR_I, "> 0",
1130                   IRNode.LOAD_VECTOR_L, "> 0",
1131                   IRNode.ADD_VB, "> 0",
1132                   IRNode.ADD_VS, "> 0",
1133                   IRNode.ADD_VI, "> 0",
1134                   IRNode.ADD_VL, "> 0",
1135                   IRNode.STORE_VECTOR, "> 0"},
1136         applyIfPlatform = {"64-bit", "true"},
1137         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1138     static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1139         for (int i = 1; i < RANGE; i++) {
1140             a[i]++;
1141             b[i]++;
1142             c[i]++;
1143             d[i]++;
1144         }
1145         return new Object[]{ a, b, c, d };
1146     }
1147 
1148     @Test
1149     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1150                   IRNode.ADD_VB, "= 0",
1151                   IRNode.STORE_VECTOR, "= 0"},
1152         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1153         applyIfPlatform = {"64-bit", "true"},
1154         applyIf = {"AlignVector", "false"})
1155     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1156                   IRNode.ADD_VB, "= 0",
1157                   IRNode.STORE_VECTOR, "= 0"},
1158         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1159         applyIfPlatform = {"64-bit", "true"},
1160         applyIf = {"AlignVector", "true"})
1161     static Object[] test14aB(byte[] a) {
1162         // non-power-of-2 stride
1163         for (int i = 0; i < RANGE-20; i+=9) {
1164             // Since the stride is shorter than the vector length, there will be always
1165             // partial overlap of loads with previous stores, this leads to failure in
1166             // store-to-load-forwarding -> vectorization not profitable.
1167             a[i+0]++;
1168             a[i+1]++;
1169             a[i+2]++;
1170             a[i+3]++;
1171             a[i+4]++;
1172             a[i+5]++;
1173             a[i+6]++;
1174             a[i+7]++;
1175             a[i+8]++;
1176             a[i+9]++;
1177             a[i+10]++;
1178             a[i+11]++;
1179             a[i+12]++;
1180             a[i+13]++;
1181             a[i+14]++;
1182             a[i+15]++;
1183         }
1184         return new Object[]{ a };
1185     }
1186 
1187     @Test
1188     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1189                   IRNode.ADD_VB, "= 0",
1190                   IRNode.STORE_VECTOR, "= 0"},
1191         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1192         applyIfPlatform = {"64-bit", "true"},
1193         applyIf = {"AlignVector", "false"})
1194     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1195                   IRNode.ADD_VB, "= 0",
1196                   IRNode.STORE_VECTOR, "= 0"},
1197         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1198         applyIfPlatform = {"64-bit", "true"},
1199         applyIf = {"AlignVector", "true"})
1200     static Object[] test14bB(byte[] a) {
1201         // non-power-of-2 stride
1202         for (int i = 0; i < RANGE-20; i+=3) {
1203             // Since the stride is shorter than the vector length, there will be always
1204             // partial overlap of loads with previous stores, this leads to failure in
1205             // store-to-load-forwarding -> vectorization not profitable.
1206             a[i+0]++;
1207             a[i+1]++;
1208             a[i+2]++;
1209             a[i+3]++;
1210             a[i+4]++;
1211             a[i+5]++;
1212             a[i+6]++;
1213             a[i+7]++;
1214             a[i+8]++;
1215             a[i+9]++;
1216             a[i+10]++;
1217             a[i+11]++;
1218             a[i+12]++;
1219             a[i+13]++;
1220             a[i+14]++;
1221             a[i+15]++;
1222         }
1223         return new Object[]{ a };
1224     }
1225 
1226     @Test
1227     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1228                   IRNode.ADD_VB, "= 0",
1229                   IRNode.STORE_VECTOR, "= 0"},
1230         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1231         applyIfPlatform = {"64-bit", "true"},
1232         applyIf = {"AlignVector", "false"})
1233     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1234                   IRNode.ADD_VB, "= 0",
1235                   IRNode.STORE_VECTOR, "= 0"},
1236         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1237         applyIfPlatform = {"64-bit", "true"},
1238         applyIf = {"AlignVector", "true"})
1239     static Object[] test14cB(byte[] a) {
1240         // non-power-of-2 stride
1241         for (int i = 0; i < RANGE-20; i+=5) {
1242             // Since the stride is shorter than the vector length, there will be always
1243             // partial overlap of loads with previous stores, this leads to failure in
1244             // store-to-load-forwarding -> vectorization not profitable.
1245             a[i+0]++;
1246             a[i+1]++;
1247             a[i+2]++;
1248             a[i+3]++;
1249             a[i+4]++;
1250             a[i+5]++;
1251             a[i+6]++;
1252             a[i+7]++;
1253             a[i+8]++;
1254             a[i+9]++;
1255             a[i+10]++;
1256             a[i+11]++;
1257             a[i+12]++;
1258             a[i+13]++;
1259             a[i+14]++;
1260             a[i+15]++;
1261         }
1262         return new Object[]{ a };
1263     }
1264 
1265     @Test
1266     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1267                   IRNode.ADD_VB,        IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1268                   IRNode.STORE_VECTOR,                                           "> 0"},
1269         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1270         applyIfPlatform = {"64-bit", "true"},
1271         applyIf = {"AlignVector", "false"})
1272     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1273                   IRNode.ADD_VB, "= 0",
1274                   IRNode.STORE_VECTOR, "= 0"},
1275         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1276         applyIfPlatform = {"64-bit", "true"},
1277         applyIf = {"AlignVector", "true"})
1278     static Object[] test14dB(byte[] a) {
1279         // non-power-of-2 stride
1280         for (int i = 0; i < RANGE-20; i+=9) {
1281             a[i+0]++;
1282             a[i+1]++;
1283             a[i+2]++;
1284             a[i+3]++;
1285             a[i+4]++;
1286             a[i+5]++;
1287             a[i+6]++;
1288             a[i+7]++;
1289         }
1290         return new Object[]{ a };
1291     }
1292 
1293     @Test
1294     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1295                   IRNode.ADD_VB,        IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1296                   IRNode.STORE_VECTOR,                                           "> 0"},
1297         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1298         applyIfPlatform = {"64-bit", "true"},
1299         applyIf = {"AlignVector", "false"})
1300     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1301                   IRNode.ADD_VB, "= 0",
1302                   IRNode.STORE_VECTOR, "= 0"},
1303         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1304         applyIfPlatform = {"64-bit", "true"},
1305         applyIf = {"AlignVector", "true"})
1306     static Object[] test14eB(byte[] a) {
1307         // non-power-of-2 stride
1308         for (int i = 0; i < RANGE-32; i+=11) {
1309             a[i+0]++;
1310             a[i+1]++;
1311             a[i+2]++;
1312             a[i+3]++;
1313             a[i+4]++;
1314             a[i+5]++;
1315             a[i+6]++;
1316             a[i+7]++;
1317         }
1318         return new Object[]{ a };
1319     }
1320 
1321     @Test
1322     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1323                   IRNode.ADD_VB,        IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0",
1324                   IRNode.STORE_VECTOR,                                           "> 0"},
1325         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1326         applyIfPlatform = {"64-bit", "true"},
1327         applyIf = {"AlignVector", "false"})
1328     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1329                   IRNode.ADD_VB, "= 0",
1330                   IRNode.STORE_VECTOR, "= 0"},
1331         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1332         applyIfPlatform = {"64-bit", "true"},
1333         applyIf = {"AlignVector", "true"})
1334     static Object[] test14fB(byte[] a) {
1335         // non-power-of-2 stride
1336         for (int i = 0; i < RANGE-40; i+=12) {
1337             a[i+0]++;
1338             a[i+1]++;
1339             a[i+2]++;
1340             a[i+3]++;
1341             a[i+4]++;
1342             a[i+5]++;
1343             a[i+6]++;
1344             a[i+7]++;
1345         }
1346         return new Object[]{ a };
1347     }
1348 
1349     @Test
1350     // IR rules difficult because of modulo wrapping with offset after peeling.
1351     static Object[] test15aB(byte[] a) {
1352         // non-power-of-2 scale
1353         for (int i = 0; i < RANGE/64-20; i++) {
1354             a[53*i+0]++;
1355             a[53*i+1]++;
1356             a[53*i+2]++;
1357             a[53*i+3]++;
1358             a[53*i+4]++;
1359             a[53*i+5]++;
1360             a[53*i+6]++;
1361             a[53*i+7]++;
1362             a[53*i+8]++;
1363             a[53*i+9]++;
1364             a[53*i+10]++;
1365             a[53*i+11]++;
1366             a[53*i+12]++;
1367             a[53*i+13]++;
1368             a[53*i+14]++;
1369             a[53*i+15]++;
1370         }
1371         return new Object[]{ a };
1372     }
1373 
1374     @Test
1375     // IR rules difficult because of modulo wrapping with offset after peeling.
1376     static Object[] test15bB(byte[] a) {
1377         // non-power-of-2 scale
1378         for (int i = 0; i < RANGE/64-20; i++) {
1379             a[25*i+0]++;
1380             a[25*i+1]++;
1381             a[25*i+2]++;
1382             a[25*i+3]++;
1383             a[25*i+4]++;
1384             a[25*i+5]++;
1385             a[25*i+6]++;
1386             a[25*i+7]++;
1387             a[25*i+8]++;
1388             a[25*i+9]++;
1389             a[25*i+10]++;
1390             a[25*i+11]++;
1391             a[25*i+12]++;
1392             a[25*i+13]++;
1393             a[25*i+14]++;
1394             a[25*i+15]++;
1395         }
1396         return new Object[]{ a };
1397     }
1398 
1399     @Test
1400     // IR rules difficult because of modulo wrapping with offset after peeling.
1401     static Object[] test15cB(byte[] a) {
1402         // non-power-of-2 scale
1403         for (int i = 0; i < RANGE/64-20; i++) {
1404             a[19*i+0]++;
1405             a[19*i+1]++;
1406             a[19*i+2]++;
1407             a[19*i+3]++;
1408             a[19*i+4]++;
1409             a[19*i+5]++;
1410             a[19*i+6]++;
1411             a[19*i+7]++;
1412             a[19*i+8]++;
1413             a[19*i+9]++;
1414             a[19*i+10]++;
1415             a[19*i+11]++;
1416             a[19*i+12]++;
1417             a[19*i+13]++;
1418             a[19*i+14]++;
1419             a[19*i+15]++;
1420         }
1421         return new Object[]{ a };
1422     }
1423 
1424     @Test
1425     static Object[] test16a(byte[] a, short[] b) {
1426         // infinite loop issues
1427         for (int i = 0; i < RANGE/2-20; i++) {
1428             a[2*i+0]++;
1429             a[2*i+1]++;
1430             a[2*i+2]++;
1431             a[2*i+3]++;
1432             a[2*i+4]++;
1433             a[2*i+5]++;
1434             a[2*i+6]++;
1435             a[2*i+7]++;
1436             a[2*i+8]++;
1437             a[2*i+9]++;
1438             a[2*i+10]++;
1439             a[2*i+11]++;
1440             a[2*i+12]++;
1441             a[2*i+13]++;
1442             a[2*i+14]++;
1443 
1444             b[2*i+0]++;
1445             b[2*i+1]++;
1446             b[2*i+2]++;
1447             b[2*i+3]++;
1448         }
1449         return new Object[]{ a, b };
1450     }
1451 
1452     @Test
1453     static Object[] test16b(byte[] a) {
1454         // infinite loop issues
1455         for (int i = 0; i < RANGE/2-20; i++) {
1456             a[2*i+0]++;
1457             a[2*i+1]++;
1458             a[2*i+2]++;
1459             a[2*i+3]++;
1460             a[2*i+4]++;
1461             a[2*i+5]++;
1462             a[2*i+6]++;
1463             a[2*i+7]++;
1464             a[2*i+8]++;
1465             a[2*i+9]++;
1466             a[2*i+10]++;
1467             a[2*i+11]++;
1468             a[2*i+12]++;
1469             a[2*i+13]++;
1470             a[2*i+14]++;
1471         }
1472         return new Object[]{ a };
1473     }
1474 
1475     @Test
1476     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
1477                   IRNode.ADD_VL, "> 0",
1478                   IRNode.STORE_VECTOR, "> 0"},
1479         applyIfPlatform = {"64-bit", "true"},
1480         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
1481     static Object[] test17a(long[] a) {
1482         // Unsafe: vectorizes with profiling (not xcomp)
1483         for (int i = 0; i < RANGE; i++) {
1484             long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1485             long v = UNSAFE.getLongUnaligned(a, adr);
1486             UNSAFE.putLongUnaligned(a, adr, v + 1);
1487         }
1488         return new Object[]{ a };
1489     }
1490 
1491     @Test
1492     // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs.
1493     static Object[] test17b(long[] a) {
1494         // Not alignable
1495         for (int i = 0; i < RANGE-1; i++) {
1496             long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1497             long v = UNSAFE.getLongUnaligned(a, adr);
1498             UNSAFE.putLongUnaligned(a, adr, v + 1);
1499         }
1500         return new Object[]{ a };
1501     }
1502 
1503     @Test
1504     @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1505                   IRNode.ADD_VL,        IRNode.VECTOR_SIZE_2, "> 0",
1506                   IRNode.STORE_VECTOR, "> 0"},
1507         applyIf = {"MaxVectorSize", ">=32"},
1508         applyIfPlatform = {"64-bit", "true"},
1509         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
1510     static Object[] test17c(long[] a) {
1511         // Unsafe: aligned vectorizes
1512         for (int i = 0; i < RANGE-1; i+=4) {
1513             long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
1514             long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1515             long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1516             UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1517             UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1518         }
1519         return new Object[]{ a };
1520     }
1521 
1522     @Test
1523     @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1524                   IRNode.ADD_VL,        IRNode.VECTOR_SIZE_2, "> 0",
1525                   IRNode.STORE_VECTOR, "> 0"},
1526         applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true"},
1527         applyIfPlatform = {"64-bit", "true"},
1528         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"})
1529     // Ensure vector width is large enough to fit 64 byte for longs:
1530     // The offsets are: 25, 33, 57, 65
1531     // In modulo 32:    25,  1, 25,  1  -> does not vectorize
1532     // In modulo 64:    25, 33, 57,  1  -> at least first pair vectorizes
1533     // This problem is because we compute modulo vector width in memory_alignment.
1534     @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0",
1535                   IRNode.ADD_VL, "= 0",
1536                   IRNode.STORE_VECTOR, "= 0"},
1537         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1538         applyIfPlatform = {"64-bit", "true"},
1539         applyIf = {"AlignVector", "true"})
1540     static Object[] test17d(long[] a) {
1541         // Not alignable
1542         for (int i = 0; i < RANGE-1; i+=4) {
1543             long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
1544             long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1545             long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1546             UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1547             UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1548         }
1549         return new Object[]{ a };
1550     }
1551 
1552     @Test
1553     static Object[] test18a(byte[] a, int[] b) {
1554         // scale = 0  -->  no iv
1555         for (int i = 0; i < RANGE; i++) {
1556             a[0] = 1;
1557             b[i] = 2;
1558             a[1] = 1;
1559         }
1560         return new Object[]{ a, b };
1561     }
1562 
1563     @Test
1564     static Object[] test18b(byte[] a, int[] b) {
1565         // scale = 0  -->  no iv
1566         for (int i = 0; i < RANGE; i++) {
1567             a[1] = 1;
1568             b[i] = 2;
1569             a[2] = 1;
1570         }
1571         return new Object[]{ a, b };
1572     }
1573 
1574     @Test
1575     static Object[] test19(int[] a, int[] b) {
1576         for (int i = 5000; i > 0; i--) {
1577             a[RANGE_FINAL - i] = b[RANGE_FINAL - i];
1578         }
1579         return new Object[]{ a, b };
1580     }
1581 
1582     @Test
1583     static Object[] test20(byte[] a) {
1584         // Example where it is easy to pass alignment check,
1585         // but used to fail the alignment calculation
1586         for (int i = 1; i < RANGE/2-50; i++) {
1587             a[2*i+0+30]++;
1588             a[2*i+1+30]++;
1589             a[2*i+2+30]++;
1590             a[2*i+3+30]++;
1591         }
1592         return new Object[]{ a };
1593     }
1594 }