1 /*
   2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 package compiler.loopopts.superword;
  25 
  26 import compiler.lib.ir_framework.*;
  27 import jdk.test.lib.Utils;
  28 import jdk.test.whitebox.WhiteBox;
  29 import jdk.internal.misc.Unsafe;
  30 import java.lang.reflect.Array;
  31 import java.util.Map;
  32 import java.util.HashMap;
  33 import java.util.Random;
  34 import java.nio.ByteOrder;
  35 
  36 /*
  37  * @test id=NoAlignVector
  38  * @bug 8310190
  39  * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
  40  * @modules java.base/jdk.internal.misc
  41  * @library /test/lib /
  42  * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector
  43  */
  44 
  45 /*
  46  * @test id=AlignVector
  47  * @bug 8310190
  48  * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
  49  * @modules java.base/jdk.internal.misc
  50  * @library /test/lib /
  51  * @run driver compiler.loopopts.superword.TestAlignVector AlignVector
  52  */
  53 
  54 /*
  55  * @test id=VerifyAlignVector
  56  * @bug 8310190
  57  * @summary Test AlignVector with various loop init, stride, scale, invar, etc.
  58  * @modules java.base/jdk.internal.misc
  59  * @library /test/lib /
  60  * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector
  61  */
  62 
  63 public class TestAlignVector {
  64     static int RANGE = 1024*8;
  65     static int RANGE_FINAL = 1024*8;
  66     private static final Unsafe UNSAFE = Unsafe.getUnsafe();
  67     private static final Random RANDOM = Utils.getRandomInstance();
  68 
  69     // Inputs
  70     byte[] aB;
  71     byte[] bB;
  72     byte mB = (byte)31;
  73     short[] aS;
  74     short[] bS;
  75     short mS = (short)0xF0F0;
  76     int[] aI;
  77     int[] bI;
  78     int mI = 0xF0F0F0F0;
  79     long[] aL;
  80     long[] bL;
  81     long mL = 0xF0F0F0F0F0F0F0F0L;
  82 
  83     // List of tests
  84     Map<String,TestFunction> tests = new HashMap<String,TestFunction>();
  85 
  86     // List of gold, the results from the first run before compilation
  87     Map<String,Object[]> golds = new HashMap<String,Object[]>();
  88 
  89     interface TestFunction {
  90         Object[] run();
  91     }
  92 
  93     public static void main(String[] args) {
  94         TestFramework framework = new TestFramework(TestAlignVector.class);
  95         framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
  96                            "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250");
  97 
  98         switch (args[0]) {
  99             case "NoAlignVector"     -> { framework.addFlags("-XX:-AlignVector"); }
 100             case "AlignVector"       -> { framework.addFlags("-XX:+AlignVector"); }
 101             case "VerifyAlignVector" -> { framework.addFlags("-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); }
 102             default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); }
 103         }
 104         framework.start();
 105     }
 106 
 107     public TestAlignVector() {
 108         // Generate input once
 109         aB = generateB();
 110         bB = generateB();
 111         aS = generateS();
 112         bS = generateS();
 113         aI = generateI();
 114         bI = generateI();
 115         aL = generateL();
 116         bL = generateL();
 117 
 118         // Add all tests to list
 119         tests.put("test0",       () -> { return test0(aB.clone(), bB.clone(), mB); });
 120         tests.put("test1",       () -> { return test1(aB.clone(), bB.clone(), mB); });
 121         tests.put("test2",       () -> { return test2(aB.clone(), bB.clone(), mB); });
 122         tests.put("test3",       () -> { return test3(aB.clone(), bB.clone(), mB); });
 123         tests.put("test4",       () -> { return test4(aB.clone(), bB.clone(), mB); });
 124         tests.put("test5",       () -> { return test5(aB.clone(), bB.clone(), mB, 0); });
 125         tests.put("test6",       () -> { return test6(aB.clone(), bB.clone(), mB); });
 126         tests.put("test7",       () -> { return test7(aS.clone(), bS.clone(), mS); });
 127         tests.put("test8",       () -> { return test8(aB.clone(), bB.clone(), mB, 0); });
 128         tests.put("test8",       () -> { return test8(aB.clone(), bB.clone(), mB, 1); });
 129         tests.put("test9",       () -> { return test9(aB.clone(), bB.clone(), mB); });
 130 
 131         tests.put("test10a",     () -> { return test10a(aB.clone(), bB.clone(), mB); });
 132         tests.put("test10b",     () -> { return test10b(aB.clone(), bB.clone(), mB); });
 133         tests.put("test10c",     () -> { return test10c(aS.clone(), bS.clone(), mS); });
 134         tests.put("test10d",     () -> { return test10d(aS.clone(), bS.clone(), mS); });
 135 
 136         tests.put("test11aB",    () -> { return test11aB(aB.clone(), bB.clone(), mB); });
 137         tests.put("test11aS",    () -> { return test11aS(aS.clone(), bS.clone(), mS); });
 138         tests.put("test11aI",    () -> { return test11aI(aI.clone(), bI.clone(), mI); });
 139         tests.put("test11aL",    () -> { return test11aL(aL.clone(), bL.clone(), mL); });
 140 
 141         tests.put("test11bB",    () -> { return test11bB(aB.clone(), bB.clone(), mB); });
 142         tests.put("test11bS",    () -> { return test11bS(aS.clone(), bS.clone(), mS); });
 143         tests.put("test11bI",    () -> { return test11bI(aI.clone(), bI.clone(), mI); });
 144         tests.put("test11bL",    () -> { return test11bL(aL.clone(), bL.clone(), mL); });
 145 
 146         tests.put("test11cB",    () -> { return test11cB(aB.clone(), bB.clone(), mB); });
 147         tests.put("test11cS",    () -> { return test11cS(aS.clone(), bS.clone(), mS); });
 148         tests.put("test11cI",    () -> { return test11cI(aI.clone(), bI.clone(), mI); });
 149         tests.put("test11cL",    () -> { return test11cL(aL.clone(), bL.clone(), mL); });
 150 
 151         tests.put("test11dB",    () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); });
 152         tests.put("test11dS",    () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); });
 153         tests.put("test11dI",    () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); });
 154         tests.put("test11dL",    () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); });
 155 
 156         tests.put("test12",      () -> { return test12(aB.clone(), bB.clone(), mB); });
 157 
 158         tests.put("test13aIL",   () -> { return test13aIL(aI.clone(), aL.clone()); });
 159         tests.put("test13aIB",   () -> { return test13aIB(aI.clone(), aB.clone()); });
 160         tests.put("test13aIS",   () -> { return test13aIS(aI.clone(), aS.clone()); });
 161         tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
 162 
 163         tests.put("test13bIL",   () -> { return test13bIL(aI.clone(), aL.clone()); });
 164         tests.put("test13bIB",   () -> { return test13bIB(aI.clone(), aB.clone()); });
 165         tests.put("test13bIS",   () -> { return test13bIS(aI.clone(), aS.clone()); });
 166         tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); });
 167 
 168         tests.put("test14aB",    () -> { return test14aB(aB.clone()); });
 169         tests.put("test14bB",    () -> { return test14bB(aB.clone()); });
 170         tests.put("test14cB",    () -> { return test14cB(aB.clone()); });
 171 
 172         tests.put("test15aB",    () -> { return test15aB(aB.clone()); });
 173         tests.put("test15bB",    () -> { return test15bB(aB.clone()); });
 174         tests.put("test15cB",    () -> { return test15cB(aB.clone()); });
 175 
 176         tests.put("test16a",     () -> { return test16a(aB.clone(), aS.clone()); });
 177         tests.put("test16b",     () -> { return test16b(aB.clone()); });
 178 
 179         tests.put("test17a",     () -> { return test17a(aL.clone()); });
 180         tests.put("test17b",     () -> { return test17b(aL.clone()); });
 181         tests.put("test17c",     () -> { return test17c(aL.clone()); });
 182         tests.put("test17d",     () -> { return test17d(aL.clone()); });
 183 
 184         tests.put("test18a",     () -> { return test18a(aB.clone(), aI.clone()); });
 185         tests.put("test18b",     () -> { return test18b(aB.clone(), aI.clone()); });
 186 
 187         tests.put("test19",      () -> { return test19(aI.clone(), bI.clone()); });
 188         tests.put("test20",      () -> { return test20(aB.clone()); });
 189 
 190         // Compute gold value for all test methods before compilation
 191         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 192             String name = entry.getKey();
 193             TestFunction test = entry.getValue();
 194             Object[] gold = test.run();
 195             golds.put(name, gold);
 196         }
 197     }
 198 
 199     @Warmup(100)
 200     @Run(test = {"test0",
 201                  "test1",
 202                  "test2",
 203                  "test3",
 204                  "test4",
 205                  "test5",
 206                  "test6",
 207                  "test7",
 208                  "test8",
 209                  "test9",
 210                  "test10a",
 211                  "test10b",
 212                  "test10c",
 213                  "test10d",
 214                  "test11aB",
 215                  "test11aS",
 216                  "test11aI",
 217                  "test11aL",
 218                  "test11bB",
 219                  "test11bS",
 220                  "test11bI",
 221                  "test11bL",
 222                  "test11cB",
 223                  "test11cS",
 224                  "test11cI",
 225                  "test11cL",
 226                  "test11dB",
 227                  "test11dS",
 228                  "test11dI",
 229                  "test11dL",
 230                  "test12",
 231                  "test13aIL",
 232                  "test13aIB",
 233                  "test13aIS",
 234                  "test13aBSIL",
 235                  "test13bIL",
 236                  "test13bIB",
 237                  "test13bIS",
 238                  "test13bBSIL",
 239                  "test14aB",
 240                  "test14bB",
 241                  "test14cB",
 242                  "test15aB",
 243                  "test15bB",
 244                  "test15cB",
 245                  "test16a",
 246                  "test16b",
 247                  "test17a",
 248                  "test17b",
 249                  "test17c",
 250                  "test17d",
 251                  "test18a",
 252                  "test18b",
 253                  "test19",
 254                  "test20"})
 255     public void runTests() {
 256         for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
 257             String name = entry.getKey();
 258             TestFunction test = entry.getValue();
 259             // Recall gold value from before compilation
 260             Object[] gold = golds.get(name);
 261             // Compute new result
 262             Object[] result = test.run();
 263             // Compare gold and new result
 264             verify(name, gold, result);
 265         }
 266     }
 267 
 268     static byte[] generateB() {
 269         byte[] a = new byte[RANGE];
 270         for (int i = 0; i < a.length; i++) {
 271             a[i] = (byte)RANDOM.nextInt();
 272         }
 273         return a;
 274     }
 275 
 276     static short[] generateS() {
 277         short[] a = new short[RANGE];
 278         for (int i = 0; i < a.length; i++) {
 279             a[i] = (short)RANDOM.nextInt();
 280         }
 281         return a;
 282     }
 283 
 284     static int[] generateI() {
 285         int[] a = new int[RANGE];
 286         for (int i = 0; i < a.length; i++) {
 287             a[i] = RANDOM.nextInt();
 288         }
 289         return a;
 290     }
 291 
 292     static long[] generateL() {
 293         long[] a = new long[RANGE];
 294         for (int i = 0; i < a.length; i++) {
 295             a[i] = RANDOM.nextLong();
 296         }
 297         return a;
 298     }
 299 
 300     static void verify(String name, Object[] gold, Object[] result) {
 301         if (gold.length != result.length) {
 302             throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
 303                                        gold.length + ", result.length = " + result.length);
 304         }
 305         for (int i = 0; i < gold.length; i++) {
 306             Object g = gold[i];
 307             Object r = result[i];
 308             if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) {
 309                 throw new RuntimeException("verify " + name + ": must both be array of same type:" +
 310                                            " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 311                                            " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 312             }
 313             if (g == r) {
 314                 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" +
 315                                            " gold[" + i + "] == result[" + i + "]");
 316             }
 317             if (Array.getLength(g) != Array.getLength(r)) {
 318                     throw new RuntimeException("verify " + name + ": arrays must have same length:" +
 319                                            " gold[" + i + "].length = " + Array.getLength(g) +
 320                                            " result[" + i + "].length = " + Array.getLength(r));
 321             }
 322             Class c = g.getClass().getComponentType();
 323             if (c == byte.class) {
 324                 verifyB(name, i, (byte[])g, (byte[])r);
 325             } else if (c == short.class) {
 326                 verifyS(name, i, (short[])g, (short[])r);
 327             } else if (c == int.class) {
 328                 verifyI(name, i, (int[])g, (int[])r);
 329             } else if (c == long.class) {
 330                 verifyL(name, i, (long[])g, (long[])r);
 331             } else {
 332                 throw new RuntimeException("verify " + name + ": array type not supported for verify:" +
 333                                        " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() +
 334                                        " result[" + i + "].getClass() = " + r.getClass().getSimpleName());
 335             }
 336         }
 337     }
 338 
 339     static void verifyB(String name, int i, byte[] g, byte[] r) {
 340         for (int j = 0; j < g.length; j++) {
 341             if (g[j] != r[j]) {
 342                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 343                                            " gold[" + i + "][" + j + "] = " + g[j] +
 344                                            " result[" + i + "][" + j + "] = " + r[j]);
 345             }
 346         }
 347     }
 348 
 349     static void verifyS(String name, int i, short[] g, short[] r) {
 350         for (int j = 0; j < g.length; j++) {
 351             if (g[j] != r[j]) {
 352                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 353                                            " gold[" + i + "][" + j + "] = " + g[j] +
 354                                            " result[" + i + "][" + j + "] = " + r[j]);
 355             }
 356         }
 357     }
 358 
 359     static void verifyI(String name, int i, int[] g, int[] r) {
 360         for (int j = 0; j < g.length; j++) {
 361             if (g[j] != r[j]) {
 362                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 363                                            " gold[" + i + "][" + j + "] = " + g[j] +
 364                                            " result[" + i + "][" + j + "] = " + r[j]);
 365             }
 366         }
 367     }
 368 
 369     static void verifyL(String name, int i, long[] g, long[] r) {
 370         for (int j = 0; j < g.length; j++) {
 371             if (g[j] != r[j]) {
 372                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 373                                            " gold[" + i + "][" + j + "] = " + g[j] +
 374                                            " result[" + i + "][" + j + "] = " + r[j]);
 375             }
 376         }
 377     }
 378 
 379     @Test
 380     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 381                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 382                   IRNode.STORE_VECTOR, "> 0"},
 383         applyIf = {"MaxVectorSize", ">=8"},
 384         applyIfPlatform = {"64-bit", "true"},
 385         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 386     static Object[] test0(byte[] a, byte[] b, byte mask) {
 387         for (int i = 0; i < RANGE; i+=8) {
 388             // Safe to vectorize with AlignVector
 389             b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
 390             b[i+1] = (byte)(a[i+1] & mask);
 391             b[i+2] = (byte)(a[i+2] & mask);
 392             b[i+3] = (byte)(a[i+3] & mask);
 393         }
 394         return new Object[]{ a, b };
 395     }
 396 
 397     @Test
 398     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 399                   IRNode.AND_VB, "> 0",
 400                   IRNode.STORE_VECTOR, "> 0"},
 401         applyIf = {"UseCompactObjectHeaders", "false"},
 402         applyIfPlatform = {"64-bit", "true"},
 403         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
 404     static Object[] test1(byte[] a, byte[] b, byte mask) {
 405         for (int i = 0; i < RANGE; i+=8) {
 406             // Safe to vectorize with AlignVector
 407             b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0
 408             b[i+1] = (byte)(a[i+1] & mask);
 409             b[i+2] = (byte)(a[i+2] & mask);
 410             b[i+3] = (byte)(a[i+3] & mask);
 411             b[i+4] = (byte)(a[i+4] & mask);
 412             b[i+5] = (byte)(a[i+5] & mask);
 413             b[i+6] = (byte)(a[i+6] & mask);
 414             b[i+7] = (byte)(a[i+7] & mask);
 415         }
 416         return new Object[]{ a, b };
 417     }
 418 
 419     @Test
 420     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 421                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 422                   IRNode.STORE_VECTOR, "> 0"},
 423         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 424         applyIfPlatform = {"64-bit", "true"},
 425         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 426     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 427                   IRNode.AND_VB, "= 0",
 428                   IRNode.STORE_VECTOR, "= 0"},
 429         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 430         applyIfPlatform = {"64-bit", "true"},
 431         applyIf = {"AlignVector", "true"})
 432     static Object[] test2(byte[] a, byte[] b, byte mask) {
 433         for (int i = 0; i < RANGE; i+=8) {
 434             // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
 435             b[i+3] = (byte)(a[i+3] & mask); // at alignment 3
 436             b[i+4] = (byte)(a[i+4] & mask);
 437             b[i+5] = (byte)(a[i+5] & mask);
 438             b[i+6] = (byte)(a[i+6] & mask);
 439         }
 440         return new Object[]{ a, b };
 441     }
 442 
 443     @Test
 444     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 445                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 446                   IRNode.STORE_VECTOR, "> 0"},
 447         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 448         applyIfPlatform = {"64-bit", "true"},
 449         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 450     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 451                   IRNode.AND_VB, "= 0",
 452                   IRNode.STORE_VECTOR, "= 0"},
 453         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 454         applyIfPlatform = {"64-bit", "true"},
 455         applyIf = {"AlignVector", "true"})
 456     static Object[] test3(byte[] a, byte[] b, byte mask) {
 457         for (int i = 0; i < RANGE; i+=8) {
 458             // Cannot align with AlignVector: 3 + x * 8 % 8 = 3
 459 
 460             // Problematic for AlignVector
 461             b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0
 462 
 463             b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes
 464             b[i+4] = (byte)(a[i+4] & mask);
 465             b[i+5] = (byte)(a[i+5] & mask);
 466             b[i+6] = (byte)(a[i+6] & mask);
 467         }
 468         return new Object[]{ a, b };
 469     }
 470 
 471     @Test
 472     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 473                   IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0",
 474                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 475                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_8, "> 0",
 476                   IRNode.STORE_VECTOR, "> 0"},
 477         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 478         applyIfPlatform = {"64-bit", "true"},
 479         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
 480     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 481                   IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned
 482                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 483                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_8, "= 0",// unaligned
 484                   IRNode.STORE_VECTOR, "> 0"},
 485         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 486         applyIfPlatform = {"64-bit", "true"},
 487         applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"})
 488     static Object[] test4(byte[] a, byte[] b, byte mask) {
 489         for (int i = 0; i < RANGE/16; i++) {
 490             // Problematic for AlignVector
 491             b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned
 492             b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask);
 493             b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask);
 494             b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask);
 495 
 496             b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned
 497             b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask);
 498             b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask);
 499             b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask);
 500             b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask);
 501             b[i*16 + 10] = (byte)(a[i*16 + 10] & mask);
 502             b[i*16 + 11] = (byte)(a[i*16 + 11] & mask);
 503             b[i*16 + 12] = (byte)(a[i*16 + 12] & mask);
 504         }
 505         return new Object[]{ a, b };
 506     }
 507 
 508     @Test
 509     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 510                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 511                   IRNode.STORE_VECTOR, "> 0"},
 512         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 513         applyIfPlatform = {"64-bit", "true"},
 514         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 515     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 516                   IRNode.AND_VB, "= 0",
 517                   IRNode.STORE_VECTOR, "= 0"},
 518         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 519         applyIfPlatform = {"64-bit", "true"},
 520         applyIf = {"AlignVector", "true"})
 521     static Object[] test5(byte[] a, byte[] b, byte mask, int inv) {
 522         for (int i = 0; i < RANGE; i+=8) {
 523             // Cannot align with AlignVector because of invariant
 524             b[i+inv+0] = (byte)(a[i+inv+0] & mask);
 525 
 526             b[i+inv+3] = (byte)(a[i+inv+3] & mask);
 527             b[i+inv+4] = (byte)(a[i+inv+4] & mask);
 528             b[i+inv+5] = (byte)(a[i+inv+5] & mask);
 529             b[i+inv+6] = (byte)(a[i+inv+6] & mask);
 530         }
 531         return new Object[]{ a, b };
 532     }
 533 
 534     @Test
 535     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 536                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 537                   IRNode.STORE_VECTOR, "> 0"},
 538         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 539         applyIfPlatform = {"64-bit", "true"},
 540         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 541     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 542                   IRNode.AND_VB, "= 0",
 543                   IRNode.STORE_VECTOR, "= 0"},
 544         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 545         applyIfPlatform = {"64-bit", "true"},
 546         applyIf = {"AlignVector", "true"})
 547     static Object[] test6(byte[] a, byte[] b, byte mask) {
 548         for (int i = 0; i < RANGE/8; i+=2) {
 549             // Cannot align with AlignVector because offset is odd
 550             b[i*4+0] = (byte)(a[i*4+0] & mask);
 551 
 552             b[i*4+3] = (byte)(a[i*4+3] & mask);
 553             b[i*4+4] = (byte)(a[i*4+4] & mask);
 554             b[i*4+5] = (byte)(a[i*4+5] & mask);
 555             b[i*4+6] = (byte)(a[i*4+6] & mask);
 556         }
 557         return new Object[]{ a, b };
 558     }
 559 
 560     @Test
 561     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 562                   IRNode.AND_VS,        IRNode.VECTOR_SIZE_4, "> 0",
 563                   IRNode.STORE_VECTOR, "> 0"},
 564         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"},
 565         applyIfPlatform = {"64-bit", "true"},
 566         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
 567     @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
 568                   IRNode.AND_VS, "= 0",
 569                   IRNode.STORE_VECTOR, "= 0"},
 570         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 571         applyIfPlatform = {"64-bit", "true"},
 572         applyIf = {"AlignVector", "true"})
 573     static Object[] test7(short[] a, short[] b, short mask) {
 574         for (int i = 0; i < RANGE/8; i+=2) {
 575             // Cannot align with AlignVector because offset is odd
 576             b[i*4+0] = (short)(a[i*4+0] & mask);
 577 
 578             b[i*4+3] = (short)(a[i*4+3] & mask);
 579             b[i*4+4] = (short)(a[i*4+4] & mask);
 580             b[i*4+5] = (short)(a[i*4+5] & mask);
 581             b[i*4+6] = (short)(a[i*4+6] & mask);
 582         }
 583         return new Object[]{ a, b };
 584     }
 585 
 586     @Test
 587     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 588                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 589                   IRNode.STORE_VECTOR, "> 0"},
 590         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"},
 591         applyIfPlatform = {"64-bit", "true"},
 592         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 593     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 594                   IRNode.AND_VB, "= 0",
 595                   IRNode.STORE_VECTOR, "= 0"},
 596         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 597         applyIfPlatform = {"64-bit", "true"},
 598         applyIf = {"AlignVector", "true"})
 599     static Object[] test8(byte[] a, byte[] b, byte mask, int init) {
 600         for (int i = init; i < RANGE; i+=8) {
 601             // Cannot align with AlignVector because of invariant (variable init becomes invar)
 602             b[i+0] = (byte)(a[i+0] & mask);
 603 
 604             b[i+3] = (byte)(a[i+3] & mask);
 605             b[i+4] = (byte)(a[i+4] & mask);
 606             b[i+5] = (byte)(a[i+5] & mask);
 607             b[i+6] = (byte)(a[i+6] & mask);
 608         }
 609         return new Object[]{ a, b };
 610     }
 611 
 612     @Test
 613     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 614                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 615                   IRNode.STORE_VECTOR, "> 0"},
 616         applyIf = {"MaxVectorSize", ">=8"},
 617         applyIfPlatform = {"64-bit", "true"},
 618         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 619     static Object[] test9(byte[] a, byte[] b, byte mask) {
 620         // known non-zero init value does not affect offset, but has implicit effect on iv
 621         for (int i = 13; i < RANGE-8; i+=8) {
 622             b[i+0] = (byte)(a[i+0] & mask);
 623 
 624             b[i+3] = (byte)(a[i+3] & mask);
 625             b[i+4] = (byte)(a[i+4] & mask);
 626             b[i+5] = (byte)(a[i+5] & mask);
 627             b[i+6] = (byte)(a[i+6] & mask);
 628         }
 629         return new Object[]{ a, b };
 630     }
 631 
 632     @Test
 633     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 634                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 635                   IRNode.STORE_VECTOR, "> 0"},
 636         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 637         applyIfPlatform = {"64-bit", "true"},
 638         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
 639     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 640                   IRNode.AND_VB, "= 0",
 641                   IRNode.STORE_VECTOR, "= 0"},
 642         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 643         applyIfPlatform = {"64-bit", "true"},
 644         applyIf = {"AlignVector", "true"})
 645     static Object[] test10a(byte[] a, byte[] b, byte mask) {
 646         // This is not alignable with pre-loop, because of odd init.
 647         for (int i = 3; i < RANGE-8; i+=8) {
 648             b[i+0] = (byte)(a[i+0] & mask);
 649             b[i+1] = (byte)(a[i+1] & mask);
 650             b[i+2] = (byte)(a[i+2] & mask);
 651             b[i+3] = (byte)(a[i+3] & mask);
 652         }
 653         return new Object[]{ a, b };
 654     }
 655 
 656     @Test
 657     @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0",
 658                   IRNode.AND_VB,        IRNode.VECTOR_SIZE_4, "> 0",
 659                   IRNode.STORE_VECTOR, "> 0"},
 660         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 661         applyIfPlatform = {"64-bit", "true"},
 662         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"})
 663     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 664                   IRNode.AND_VB, "= 0",
 665                   IRNode.STORE_VECTOR, "= 0"},
 666         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 667         applyIfPlatform = {"64-bit", "true"},
 668         applyIf = {"AlignVector", "true"})
 669     static Object[] test10b(byte[] a, byte[] b, byte mask) {
 670         // This is not alignable with pre-loop, because of odd init.
 671         // Seems not correctly handled.
 672         for (int i = 13; i < RANGE-8; i+=8) {
 673             b[i+0] = (byte)(a[i+0] & mask);
 674             b[i+1] = (byte)(a[i+1] & mask);
 675             b[i+2] = (byte)(a[i+2] & mask);
 676             b[i+3] = (byte)(a[i+3] & mask);
 677         }
 678         return new Object[]{ a, b };
 679     }
 680 
 681     @Test
 682     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 683                   IRNode.AND_VS,        IRNode.VECTOR_SIZE_4, "> 0",
 684                   IRNode.STORE_VECTOR, "> 0"},
 685         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 686         applyIfPlatform = {"64-bit", "true"},
 687         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"})
 688     @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
 689                   IRNode.AND_VS, "= 0",
 690                   IRNode.STORE_VECTOR, "= 0"},
 691         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 692         applyIfPlatform = {"64-bit", "true"},
 693         applyIf = {"AlignVector", "true"})
 694     static Object[] test10c(short[] a, short[] b, short mask) {
 695         // This is not alignable with pre-loop, because of odd init.
 696         // Seems not correctly handled with MaxVectorSize >= 32.
 697         for (int i = 13; i < RANGE-8; i+=8) {
 698             b[i+0] = (short)(a[i+0] & mask);
 699             b[i+1] = (short)(a[i+1] & mask);
 700             b[i+2] = (short)(a[i+2] & mask);
 701             b[i+3] = (short)(a[i+3] & mask);
 702         }
 703         return new Object[]{ a, b };
 704     }
 705 
 706     @Test
 707     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 708                   IRNode.AND_VS,        IRNode.VECTOR_SIZE_4, "> 0",
 709                   IRNode.STORE_VECTOR, "> 0"},
 710         applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "false"},
 711         applyIfPlatform = {"64-bit", "true"},
 712         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
 713     static Object[] test10d(short[] a, short[] b, short mask) {
 714         for (int i = 13; i < RANGE-16; i+=8) {
 715             // init + offset -> aligned
 716             b[i+0+3] = (short)(a[i+0+3] & mask);
 717             b[i+1+3] = (short)(a[i+1+3] & mask);
 718             b[i+2+3] = (short)(a[i+2+3] & mask);
 719             b[i+3+3] = (short)(a[i+3+3] & mask);
 720         }
 721         return new Object[]{ a, b };
 722     }
 723 
 724     @Test
 725     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 726                   IRNode.AND_VB, "> 0",
 727                   IRNode.STORE_VECTOR, "> 0"},
 728         applyIfPlatform = {"64-bit", "true"},
 729         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 730     static Object[] test11aB(byte[] a, byte[] b, byte mask) {
 731         for (int i = 0; i < RANGE; i++) {
 732             // always alignable
 733             b[i+0] = (byte)(a[i+0] & mask);
 734         }
 735         return new Object[]{ a, b };
 736     }
 737 
 738     @Test
 739     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 740                   IRNode.AND_VS, "> 0",
 741                   IRNode.STORE_VECTOR, "> 0"},
 742         applyIfPlatform = {"64-bit", "true"},
 743         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 744     static Object[] test11aS(short[] a, short[] b, short mask) {
 745         for (int i = 0; i < RANGE; i++) {
 746             // always alignable
 747             b[i+0] = (short)(a[i+0] & mask);
 748         }
 749         return new Object[]{ a, b };
 750     }
 751 
 752     @Test
 753     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
 754                   IRNode.AND_VI, "> 0",
 755                   IRNode.STORE_VECTOR, "> 0"},
 756         applyIfPlatform = {"64-bit", "true"},
 757         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 758     static Object[] test11aI(int[] a, int[] b, int mask) {
 759         for (int i = 0; i < RANGE; i++) {
 760             // always alignable
 761             b[i+0] = (int)(a[i+0] & mask);
 762         }
 763         return new Object[]{ a, b };
 764     }
 765 
 766     @Test
 767     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
 768                   IRNode.AND_VL, "> 0",
 769                   IRNode.STORE_VECTOR, "> 0"},
 770         applyIfPlatform = {"64-bit", "true"},
 771         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 772     static Object[] test11aL(long[] a, long[] b, long mask) {
 773         for (int i = 0; i < RANGE; i++) {
 774             // always alignable
 775             b[i+0] = (long)(a[i+0] & mask);
 776         }
 777         return new Object[]{ a, b };
 778     }
 779 
 780     @Test
 781     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 782                   IRNode.AND_VB, "> 0",
 783                   IRNode.STORE_VECTOR, "> 0"},
 784         applyIfPlatform = {"64-bit", "true"},
 785         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 786     static Object[] test11bB(byte[] a, byte[] b, byte mask) {
 787         for (int i = 1; i < RANGE; i++) {
 788             // always alignable
 789             b[i+0] = (byte)(a[i+0] & mask);
 790         }
 791         return new Object[]{ a, b };
 792     }
 793 
 794     @Test
 795     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 796                   IRNode.AND_VS, "> 0",
 797                   IRNode.STORE_VECTOR, "> 0"},
 798         applyIfPlatform = {"64-bit", "true"},
 799         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 800     static Object[] test11bS(short[] a, short[] b, short mask) {
 801         for (int i = 1; i < RANGE; i++) {
 802             // always alignable
 803             b[i+0] = (short)(a[i+0] & mask);
 804         }
 805         return new Object[]{ a, b };
 806     }
 807 
 808     @Test
 809     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
 810                   IRNode.AND_VI, "> 0",
 811                   IRNode.STORE_VECTOR, "> 0"},
 812         applyIfPlatform = {"64-bit", "true"},
 813         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 814     static Object[] test11bI(int[] a, int[] b, int mask) {
 815         for (int i = 1; i < RANGE; i++) {
 816             // always alignable
 817             b[i+0] = (int)(a[i+0] & mask);
 818         }
 819         return new Object[]{ a, b };
 820     }
 821 
 822     @Test
 823     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
 824                   IRNode.AND_VL, "> 0",
 825                   IRNode.STORE_VECTOR, "> 0"},
 826         applyIfPlatform = {"64-bit", "true"},
 827         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 828     static Object[] test11bL(long[] a, long[] b, long mask) {
 829         for (int i = 1; i < RANGE; i++) {
 830             // always alignable
 831             b[i+0] = (long)(a[i+0] & mask);
 832         }
 833         return new Object[]{ a, b };
 834     }
 835 
 836     @Test
 837     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 838                   IRNode.AND_VB, "> 0",
 839                   IRNode.STORE_VECTOR, "> 0"},
 840         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 841         applyIfPlatform = {"64-bit", "true"},
 842         applyIf = {"AlignVector", "false"})
 843     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 844                   IRNode.AND_VB, "= 0",
 845                   IRNode.STORE_VECTOR, "= 0"},
 846         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 847         applyIfPlatform = {"64-bit", "true"},
 848         applyIf = {"AlignVector", "true"})
 849     static Object[] test11cB(byte[] a, byte[] b, byte mask) {
 850         for (int i = 1; i < RANGE-1; i++) {
 851             // 1 byte offset -> not alignable with AlignVector
 852             b[i+0] = (byte)(a[i+1] & mask);
 853         }
 854         return new Object[]{ a, b };
 855     }
 856 
 857     @Test
 858     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 859                   IRNode.AND_VS, "> 0",
 860                   IRNode.STORE_VECTOR, "> 0"},
 861         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 862         applyIfPlatform = {"64-bit", "true"},
 863         applyIf = {"AlignVector", "false"})
 864     @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0",
 865                   IRNode.AND_VS, "= 0",
 866                   IRNode.STORE_VECTOR, "= 0"},
 867         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 868         applyIfPlatform = {"64-bit", "true"},
 869         applyIf = {"AlignVector", "true"})
 870     static Object[] test11cS(short[] a, short[] b, short mask) {
 871         for (int i = 1; i < RANGE-1; i++) {
 872             // 2 byte offset -> not alignable with AlignVector
 873             b[i+0] = (short)(a[i+1] & mask);
 874         }
 875         return new Object[]{ a, b };
 876     }
 877 
 878     @Test
 879     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
 880                   IRNode.AND_VI, "> 0",
 881                   IRNode.STORE_VECTOR, "> 0"},
 882         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 883         applyIfPlatform = {"64-bit", "true"},
 884         applyIf = {"AlignVector", "false"})
 885     @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0",
 886                   IRNode.AND_VI, "= 0",
 887                   IRNode.STORE_VECTOR, "= 0"},
 888         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
 889         applyIfPlatform = {"64-bit", "true"},
 890         applyIf = {"AlignVector", "true"})
 891     static Object[] test11cI(int[] a, int[] b, int mask) {
 892         for (int i = 1; i < RANGE-1; i++) {
 893             // 4 byte offset -> not alignable with AlignVector
 894             b[i+0] = (int)(a[i+1] & mask);
 895         }
 896         return new Object[]{ a, b };
 897     }
 898 
 899     @Test
 900     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
 901                   IRNode.AND_VL, "> 0",
 902                   IRNode.STORE_VECTOR, "> 0"},
 903         applyIfPlatform = {"64-bit", "true"},
 904         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 905     static Object[] test11cL(long[] a, long[] b, long mask) {
 906         for (int i = 1; i < RANGE-1; i++) {
 907             // always alignable (8 byte offset)
 908             b[i+0] = (long)(a[i+1] & mask);
 909         }
 910         return new Object[]{ a, b };
 911     }
 912 
 913     @Test
 914     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
 915                   IRNode.AND_VB, "> 0",
 916                   IRNode.STORE_VECTOR, "> 0"},
 917         applyIfPlatform = {"64-bit", "true"},
 918         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 919     static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) {
 920         for (int i = 0; i < RANGE; i++) {
 921             b[i+0+invar] = (byte)(a[i+0+invar] & mask);
 922         }
 923         return new Object[]{ a, b };
 924     }
 925 
 926     @Test
 927     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 928                   IRNode.AND_VS, "> 0",
 929                   IRNode.STORE_VECTOR, "> 0"},
 930         applyIfPlatform = {"64-bit", "true"},
 931         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 932     static Object[] test11dS(short[] a, short[] b, short mask, int invar) {
 933         for (int i = 0; i < RANGE; i++) {
 934             b[i+0+invar] = (short)(a[i+0+invar] & mask);
 935         }
 936         return new Object[]{ a, b };
 937     }
 938 
 939     @Test
 940     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
 941                   IRNode.AND_VI, "> 0",
 942                   IRNode.STORE_VECTOR, "> 0"},
 943         applyIfPlatform = {"64-bit", "true"},
 944         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 945     static Object[] test11dI(int[] a, int[] b, int mask, int invar) {
 946         for (int i = 0; i < RANGE; i++) {
 947             b[i+0+invar] = (int)(a[i+0+invar] & mask);
 948         }
 949         return new Object[]{ a, b };
 950     }
 951 
 952     @Test
 953     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
 954                   IRNode.AND_VL, "> 0",
 955                   IRNode.STORE_VECTOR, "> 0"},
 956         applyIfPlatform = {"64-bit", "true"},
 957         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 958     static Object[] test11dL(long[] a, long[] b, long mask, int invar) {
 959         for (int i = 0; i < RANGE; i++) {
 960             b[i+0+invar] = (long)(a[i+0+invar] & mask);
 961         }
 962         return new Object[]{ a, b };
 963     }
 964 
 965     @Test
 966     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
 967                   IRNode.AND_VB, "= 0",
 968                   IRNode.STORE_VECTOR, "= 0"},
 969         applyIfPlatform = {"64-bit", "true"},
 970         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
 971     static Object[] test12(byte[] a, byte[] b, byte mask) {
 972         for (int i = 0; i < RANGE/16; i++) {
 973             // Currently does not vectorize at all
 974             b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask);
 975             b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask);
 976             b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask);
 977             b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask);
 978         }
 979         return new Object[]{ a, b };
 980     }
 981 
 982     @Test
 983     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
 984                   IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
 985                   IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
 986                   IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
 987                   IRNode.STORE_VECTOR, "> 0"},
 988         applyIfPlatform = {"64-bit", "true"},
 989         applyIfCPUFeatureOr = {"avx2", "true"})
 990     // require avx to ensure vectors are larger than what unrolling produces
 991     static Object[] test13aIL(int[] a, long[] b) {
 992         for (int i = 0; i < RANGE; i++) {
 993             a[i]++;
 994             b[i]++;
 995         }
 996         return new Object[]{ a, b };
 997     }
 998 
 999     @Test
1000     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1001                   IRNode.LOAD_VECTOR_I, "> 0",
1002                   IRNode.ADD_VB, "> 0",
1003                   IRNode.ADD_VI, "> 0",
1004                   IRNode.STORE_VECTOR, "> 0"},
1005         applyIf = {"UseCompactObjectHeaders", "false"},
1006         applyIfPlatform = {"64-bit", "true"},
1007         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1008     static Object[] test13aIB(int[] a, byte[] b) {
1009         for (int i = 0; i < RANGE; i++) {
1010             a[i]++;
1011             b[i]++;
1012         }
1013         return new Object[]{ a, b };
1014     }
1015 
1016     @Test
1017     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1018                   IRNode.LOAD_VECTOR_S, "> 0",
1019                   IRNode.ADD_VI, "> 0",
1020                   IRNode.ADD_VS, "> 0",
1021                   IRNode.STORE_VECTOR, "> 0"},
1022         applyIf = {"UseCompactObjectHeaders", "false"},
1023         applyIfPlatform = {"64-bit", "true"},
1024         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1025     static Object[] test13aIS(int[] a, short[] b) {
1026         for (int i = 0; i < RANGE; i++) {
1027             a[i]++;
1028             b[i]++;
1029         }
1030         return new Object[]{ a, b };
1031     }
1032 
1033     @Test
1034     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1035                   IRNode.LOAD_VECTOR_S, "> 0",
1036                   IRNode.LOAD_VECTOR_I, "> 0",
1037                   IRNode.LOAD_VECTOR_L, "> 0",
1038                   IRNode.ADD_VB, "> 0",
1039                   IRNode.ADD_VS, "> 0",
1040                   IRNode.ADD_VI, "> 0",
1041                   IRNode.ADD_VL, "> 0",
1042                   IRNode.STORE_VECTOR, "> 0"},
1043         applyIf = {"UseCompactObjectHeaders", "false"},
1044         applyIfPlatform = {"64-bit", "true"},
1045         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1046     static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) {
1047         for (int i = 0; i < RANGE; i++) {
1048             a[i]++;
1049             b[i]++;
1050             c[i]++;
1051             d[i]++;
1052         }
1053         return new Object[]{ a, b, c, d };
1054     }
1055 
1056     @Test
1057     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1058                   IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1059                   IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1060                   IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0",
1061                   IRNode.STORE_VECTOR, "> 0"},
1062         applyIfPlatform = {"64-bit", "true"},
1063         applyIfCPUFeatureOr = {"avx2", "true"})
1064     // require avx to ensure vectors are larger than what unrolling produces
1065     static Object[] test13bIL(int[] a, long[] b) {
1066         for (int i = 1; i < RANGE; i++) {
1067             a[i]++;
1068             b[i]++;
1069         }
1070         return new Object[]{ a, b };
1071     }
1072 
1073     @Test
1074     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1075                   IRNode.LOAD_VECTOR_I, "> 0",
1076                   IRNode.ADD_VB, "> 0",
1077                   IRNode.ADD_VI, "> 0",
1078                   IRNode.STORE_VECTOR, "> 0"},
1079         applyIf = {"UseCompactObjectHeaders", "false"},
1080         applyIfPlatform = {"64-bit", "true"},
1081         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1082     static Object[] test13bIB(int[] a, byte[] b) {
1083         for (int i = 1; i < RANGE; i++) {
1084             a[i]++;
1085             b[i]++;
1086         }
1087         return new Object[]{ a, b };
1088     }
1089 
1090     @Test
1091     @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1092                   IRNode.LOAD_VECTOR_S, "> 0",
1093                   IRNode.ADD_VI, "> 0",
1094                   IRNode.ADD_VS, "> 0",
1095                   IRNode.STORE_VECTOR, "> 0"},
1096         applyIf = {"UseCompactObjectHeaders", "false"},
1097         applyIfPlatform = {"64-bit", "true"},
1098         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1099     static Object[] test13bIS(int[] a, short[] b) {
1100         for (int i = 1; i < RANGE; i++) {
1101             a[i]++;
1102             b[i]++;
1103         }
1104         return new Object[]{ a, b };
1105     }
1106 
1107     @Test
1108     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1109                   IRNode.LOAD_VECTOR_S, "> 0",
1110                   IRNode.LOAD_VECTOR_I, "> 0",
1111                   IRNode.LOAD_VECTOR_L, "> 0",
1112                   IRNode.ADD_VB, "> 0",
1113                   IRNode.ADD_VS, "> 0",
1114                   IRNode.ADD_VI, "> 0",
1115                   IRNode.ADD_VL, "> 0",
1116                   IRNode.STORE_VECTOR, "> 0"},
1117         applyIf = {"UseCompactObjectHeaders", "false"},
1118         applyIfPlatform = {"64-bit", "true"},
1119         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"})
1120     static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) {
1121         for (int i = 1; i < RANGE; i++) {
1122             a[i]++;
1123             b[i]++;
1124             c[i]++;
1125             d[i]++;
1126         }
1127         return new Object[]{ a, b, c, d };
1128     }
1129 
1130     @Test
1131     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1132                   IRNode.ADD_VB, "> 0",
1133                   IRNode.STORE_VECTOR, "> 0"},
1134         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1135         applyIfPlatform = {"64-bit", "true"},
1136         applyIf = {"AlignVector", "false"})
1137     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1138                   IRNode.ADD_VB, "= 0",
1139                   IRNode.STORE_VECTOR, "= 0"},
1140         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1141         applyIfPlatform = {"64-bit", "true"},
1142         applyIf = {"AlignVector", "true"})
1143     static Object[] test14aB(byte[] a) {
1144         // non-power-of-2 stride
1145         for (int i = 0; i < RANGE-20; i+=9) {
1146             a[i+0]++;
1147             a[i+1]++;
1148             a[i+2]++;
1149             a[i+3]++;
1150             a[i+4]++;
1151             a[i+5]++;
1152             a[i+6]++;
1153             a[i+7]++;
1154             a[i+8]++;
1155             a[i+9]++;
1156             a[i+10]++;
1157             a[i+11]++;
1158             a[i+12]++;
1159             a[i+13]++;
1160             a[i+14]++;
1161             a[i+15]++;
1162         }
1163         return new Object[]{ a };
1164     }
1165 
1166     @Test
1167     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1168                   IRNode.ADD_VB, "> 0",
1169                   IRNode.STORE_VECTOR, "> 0"},
1170         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1171         applyIfPlatform = {"64-bit", "true"},
1172         applyIf = {"AlignVector", "false"})
1173     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1174                   IRNode.ADD_VB, "= 0",
1175                   IRNode.STORE_VECTOR, "= 0"},
1176         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1177         applyIfPlatform = {"64-bit", "true"},
1178         applyIf = {"AlignVector", "true"})
1179     static Object[] test14bB(byte[] a) {
1180         // non-power-of-2 stride
1181         for (int i = 0; i < RANGE-20; i+=3) {
1182             a[i+0]++;
1183             a[i+1]++;
1184             a[i+2]++;
1185             a[i+3]++;
1186             a[i+4]++;
1187             a[i+5]++;
1188             a[i+6]++;
1189             a[i+7]++;
1190             a[i+8]++;
1191             a[i+9]++;
1192             a[i+10]++;
1193             a[i+11]++;
1194             a[i+12]++;
1195             a[i+13]++;
1196             a[i+14]++;
1197             a[i+15]++;
1198         }
1199         return new Object[]{ a };
1200     }
1201 
1202     @Test
1203     @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0",
1204                   IRNode.ADD_VB, "> 0",
1205                   IRNode.STORE_VECTOR, "> 0"},
1206         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1207         applyIfPlatform = {"64-bit", "true"},
1208         applyIf = {"AlignVector", "false"})
1209     @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0",
1210                   IRNode.ADD_VB, "= 0",
1211                   IRNode.STORE_VECTOR, "= 0"},
1212         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1213         applyIfPlatform = {"64-bit", "true"},
1214         applyIf = {"AlignVector", "true"})
1215     static Object[] test14cB(byte[] a) {
1216         // non-power-of-2 stride
1217         for (int i = 0; i < RANGE-20; i+=5) {
1218             a[i+0]++;
1219             a[i+1]++;
1220             a[i+2]++;
1221             a[i+3]++;
1222             a[i+4]++;
1223             a[i+5]++;
1224             a[i+6]++;
1225             a[i+7]++;
1226             a[i+8]++;
1227             a[i+9]++;
1228             a[i+10]++;
1229             a[i+11]++;
1230             a[i+12]++;
1231             a[i+13]++;
1232             a[i+14]++;
1233             a[i+15]++;
1234         }
1235         return new Object[]{ a };
1236     }
1237 
1238     @Test
1239     // IR rules difficult because of modulo wrapping with offset after peeling.
1240     static Object[] test15aB(byte[] a) {
1241         // non-power-of-2 scale
1242         for (int i = 0; i < RANGE/64-20; i++) {
1243             a[53*i+0]++;
1244             a[53*i+1]++;
1245             a[53*i+2]++;
1246             a[53*i+3]++;
1247             a[53*i+4]++;
1248             a[53*i+5]++;
1249             a[53*i+6]++;
1250             a[53*i+7]++;
1251             a[53*i+8]++;
1252             a[53*i+9]++;
1253             a[53*i+10]++;
1254             a[53*i+11]++;
1255             a[53*i+12]++;
1256             a[53*i+13]++;
1257             a[53*i+14]++;
1258             a[53*i+15]++;
1259         }
1260         return new Object[]{ a };
1261     }
1262 
1263     @Test
1264     // IR rules difficult because of modulo wrapping with offset after peeling.
1265     static Object[] test15bB(byte[] a) {
1266         // non-power-of-2 scale
1267         for (int i = 0; i < RANGE/64-20; i++) {
1268             a[25*i+0]++;
1269             a[25*i+1]++;
1270             a[25*i+2]++;
1271             a[25*i+3]++;
1272             a[25*i+4]++;
1273             a[25*i+5]++;
1274             a[25*i+6]++;
1275             a[25*i+7]++;
1276             a[25*i+8]++;
1277             a[25*i+9]++;
1278             a[25*i+10]++;
1279             a[25*i+11]++;
1280             a[25*i+12]++;
1281             a[25*i+13]++;
1282             a[25*i+14]++;
1283             a[25*i+15]++;
1284         }
1285         return new Object[]{ a };
1286     }
1287 
1288     @Test
1289     // IR rules difficult because of modulo wrapping with offset after peeling.
1290     static Object[] test15cB(byte[] a) {
1291         // non-power-of-2 scale
1292         for (int i = 0; i < RANGE/64-20; i++) {
1293             a[19*i+0]++;
1294             a[19*i+1]++;
1295             a[19*i+2]++;
1296             a[19*i+3]++;
1297             a[19*i+4]++;
1298             a[19*i+5]++;
1299             a[19*i+6]++;
1300             a[19*i+7]++;
1301             a[19*i+8]++;
1302             a[19*i+9]++;
1303             a[19*i+10]++;
1304             a[19*i+11]++;
1305             a[19*i+12]++;
1306             a[19*i+13]++;
1307             a[19*i+14]++;
1308             a[19*i+15]++;
1309         }
1310         return new Object[]{ a };
1311     }
1312 
1313     @Test
1314     static Object[] test16a(byte[] a, short[] b) {
1315         // infinite loop issues
1316         for (int i = 0; i < RANGE/2-20; i++) {
1317             a[2*i+0]++;
1318             a[2*i+1]++;
1319             a[2*i+2]++;
1320             a[2*i+3]++;
1321             a[2*i+4]++;
1322             a[2*i+5]++;
1323             a[2*i+6]++;
1324             a[2*i+7]++;
1325             a[2*i+8]++;
1326             a[2*i+9]++;
1327             a[2*i+10]++;
1328             a[2*i+11]++;
1329             a[2*i+12]++;
1330             a[2*i+13]++;
1331             a[2*i+14]++;
1332 
1333             b[2*i+0]++;
1334             b[2*i+1]++;
1335             b[2*i+2]++;
1336             b[2*i+3]++;
1337         }
1338         return new Object[]{ a, b };
1339     }
1340 
1341     @Test
1342     static Object[] test16b(byte[] a) {
1343         // infinite loop issues
1344         for (int i = 0; i < RANGE/2-20; i++) {
1345             a[2*i+0]++;
1346             a[2*i+1]++;
1347             a[2*i+2]++;
1348             a[2*i+3]++;
1349             a[2*i+4]++;
1350             a[2*i+5]++;
1351             a[2*i+6]++;
1352             a[2*i+7]++;
1353             a[2*i+8]++;
1354             a[2*i+9]++;
1355             a[2*i+10]++;
1356             a[2*i+11]++;
1357             a[2*i+12]++;
1358             a[2*i+13]++;
1359             a[2*i+14]++;
1360         }
1361         return new Object[]{ a };
1362     }
1363 
1364     @Test
1365     @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0",
1366                   IRNode.ADD_VL, "> 0",
1367                   IRNode.STORE_VECTOR, "> 0"},
1368         applyIfPlatform = {"64-bit", "true"},
1369         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
1370     static Object[] test17a(long[] a) {
1371         // Unsafe: vectorizes with profiling (not xcomp)
1372         for (int i = 0; i < RANGE; i++) {
1373             int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i;
1374             long v = UNSAFE.getLongUnaligned(a, adr);
1375             UNSAFE.putLongUnaligned(a, adr, v + 1);
1376         }
1377         return new Object[]{ a };
1378     }
1379 
1380     @Test
1381     // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs.
1382     static Object[] test17b(long[] a) {
1383         // Not alignable
1384         for (int i = 0; i < RANGE-1; i++) {
1385             int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1;
1386             long v = UNSAFE.getLongUnaligned(a, adr);
1387             UNSAFE.putLongUnaligned(a, adr, v + 1);
1388         }
1389         return new Object[]{ a };
1390     }
1391 
1392     @Test
1393     @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1394                   IRNode.ADD_VL,        IRNode.VECTOR_SIZE_2, "> 0",
1395                   IRNode.STORE_VECTOR, "> 0"},
1396         applyIf = {"MaxVectorSize", ">=32"},
1397         applyIfPlatform = {"64-bit", "true"},
1398         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"})
1399     static Object[] test17c(long[] a) {
1400         // Unsafe: aligned vectorizes
1401         for (int i = 0; i < RANGE-1; i+=4) {
1402             int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i;
1403             long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1404             long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1405             UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1406             UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1407         }
1408         return new Object[]{ a };
1409     }
1410 
1411     @Test
1412     @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0",
1413                   IRNode.ADD_VL,        IRNode.VECTOR_SIZE_2, "> 0",
1414                   IRNode.STORE_VECTOR, "> 0"},
1415         applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true"},
1416         applyIfPlatform = {"64-bit", "true"},
1417         applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"})
1418     // Ensure vector width is large enough to fit 64 byte for longs:
1419     // The offsets are: 25, 33, 57, 65
1420     // In modulo 32:    25,  1, 25,  1  -> does not vectorize
1421     // In modulo 64:    25, 33, 57,  1  -> at least first pair vectorizes
1422     // This problem is because we compute modulo vector width in memory_alignment.
1423     @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0",
1424                   IRNode.ADD_VL, "= 0",
1425                   IRNode.STORE_VECTOR, "= 0"},
1426         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"},
1427         applyIfPlatform = {"64-bit", "true"},
1428         applyIf = {"AlignVector", "true"})
1429     static Object[] test17d(long[] a) {
1430         // Not alignable
1431         for (int i = 0; i < RANGE-1; i+=4) {
1432             int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1;
1433             long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
1434             long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
1435             UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
1436             UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1);
1437         }
1438         return new Object[]{ a };
1439     }
1440 
1441     @Test
1442     static Object[] test18a(byte[] a, int[] b) {
1443         // scale = 0  -->  no iv
1444         for (int i = 0; i < RANGE; i++) {
1445             a[0] = 1;
1446             b[i] = 2;
1447             a[1] = 1;
1448         }
1449         return new Object[]{ a, b };
1450     }
1451 
1452     @Test
1453     static Object[] test18b(byte[] a, int[] b) {
1454         // scale = 0  -->  no iv
1455         for (int i = 0; i < RANGE; i++) {
1456             a[1] = 1;
1457             b[i] = 2;
1458             a[2] = 1;
1459         }
1460         return new Object[]{ a, b };
1461     }
1462 
1463     @Test
1464     static Object[] test19(int[] a, int[] b) {
1465         for (int i = 5000; i > 0; i--) {
1466             a[RANGE_FINAL - i] = b[RANGE_FINAL - i];
1467         }
1468         return new Object[]{ a, b };
1469     }
1470 
1471     @Test
1472     static Object[] test20(byte[] a) {
1473         // Example where it is easy to pass alignment check,
1474         // but used to fail the alignment calculation
1475         for (int i = 1; i < RANGE/2-50; i++) {
1476             a[2*i+0+30]++;
1477             a[2*i+1+30]++;
1478             a[2*i+2+30]++;
1479             a[2*i+3+30]++;
1480         }
1481         return new Object[]{ a };
1482     }
1483 }