1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package compiler.loopopts.superword; 25 26 import compiler.lib.ir_framework.*; 27 import jdk.test.lib.Utils; 28 import jdk.test.whitebox.WhiteBox; 29 import jdk.internal.misc.Unsafe; 30 import java.lang.reflect.Array; 31 import java.util.Map; 32 import java.util.HashMap; 33 import java.util.Random; 34 import java.nio.ByteOrder; 35 36 /* 37 * @test id=NoAlignVector 38 * @bug 8310190 39 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 40 * @modules java.base/jdk.internal.misc 41 * @library /test/lib / 42 * @requires vm.compiler2.enabled 43 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector 44 */ 45 46 /* 47 * @test id=AlignVector 48 * @bug 8310190 49 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 50 * @modules java.base/jdk.internal.misc 51 * @library /test/lib / 52 * @requires vm.compiler2.enabled 53 * @run driver compiler.loopopts.superword.TestAlignVector AlignVector 54 */ 55 56 /* 57 * @test id=VerifyAlignVector 58 * @bug 8310190 59 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 60 * @modules java.base/jdk.internal.misc 61 * @library /test/lib / 62 * @requires vm.compiler2.enabled 63 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector 64 */ 65 66 public class TestAlignVector { 67 static int RANGE = 1024*8; 68 static int RANGE_FINAL = 1024*8; 69 private static final Unsafe UNSAFE = Unsafe.getUnsafe(); 70 private static final Random RANDOM = Utils.getRandomInstance(); 71 72 // Inputs 73 byte[] aB; 74 byte[] bB; 75 byte mB = (byte)31; 76 short[] aS; 77 short[] bS; 78 short mS = (short)0xF0F0; 79 int[] aI; 80 int[] bI; 81 int mI = 0xF0F0F0F0; 82 long[] aL; 83 long[] bL; 84 long mL = 0xF0F0F0F0F0F0F0F0L; 85 86 // List of tests 87 Map<String,TestFunction> tests = new HashMap<String,TestFunction>(); 88 89 // List of gold, the results from the first run before compilation 90 Map<String,Object[]> golds = new HashMap<String,Object[]>(); 91 92 interface TestFunction { 93 Object[] run(); 94 } 95 96 public static void main(String[] args) { 97 TestFramework framework = new TestFramework(TestAlignVector.class); 98 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 99 "-XX:LoopUnrollLimit=250"); 100 101 switch (args[0]) { 102 case "NoAlignVector" -> { framework.addFlags("-XX:-AlignVector"); } 103 case "AlignVector" -> { framework.addFlags("-XX:+AlignVector"); } 104 case "VerifyAlignVector" -> { framework.addFlags("-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } 105 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } 106 } 107 framework.start(); 108 } 109 110 public TestAlignVector() { 111 // Generate input once 112 aB = generateB(); 113 bB = generateB(); 114 aS = generateS(); 115 bS = generateS(); 116 aI = generateI(); 117 bI = generateI(); 118 aL = generateL(); 119 bL = generateL(); 120 121 // Add all tests to list 122 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); }); 123 tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); }); 124 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); }); 125 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); }); 126 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); }); 127 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); }); 128 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); }); 129 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); }); 130 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); }); 131 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); }); 132 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); }); 133 134 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); }); 135 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); }); 136 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); }); 137 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); }); 138 139 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); }); 140 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); }); 141 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); }); 142 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); }); 143 144 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); }); 145 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); }); 146 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); }); 147 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); }); 148 149 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); }); 150 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); }); 151 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); }); 152 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); }); 153 154 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); }); 155 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); }); 156 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); }); 157 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); }); 158 159 tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); }); 160 161 tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); }); 162 tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); }); 163 tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); }); 164 tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 165 166 tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); }); 167 tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); }); 168 tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); }); 169 tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 170 171 tests.put("test14aB", () -> { return test14aB(aB.clone()); }); 172 tests.put("test14bB", () -> { return test14bB(aB.clone()); }); 173 tests.put("test14cB", () -> { return test14cB(aB.clone()); }); 174 175 tests.put("test15aB", () -> { return test15aB(aB.clone()); }); 176 tests.put("test15bB", () -> { return test15bB(aB.clone()); }); 177 tests.put("test15cB", () -> { return test15cB(aB.clone()); }); 178 179 tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); }); 180 tests.put("test16b", () -> { return test16b(aB.clone()); }); 181 182 tests.put("test17a", () -> { return test17a(aL.clone()); }); 183 tests.put("test17b", () -> { return test17b(aL.clone()); }); 184 tests.put("test17c", () -> { return test17c(aL.clone()); }); 185 tests.put("test17d", () -> { return test17d(aL.clone()); }); 186 187 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); }); 188 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); }); 189 190 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); }); 191 tests.put("test20", () -> { return test20(aB.clone()); }); 192 193 // Compute gold value for all test methods before compilation 194 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 195 String name = entry.getKey(); 196 TestFunction test = entry.getValue(); 197 Object[] gold = test.run(); 198 golds.put(name, gold); 199 } 200 } 201 202 @Warmup(100) 203 @Run(test = {"test0", 204 "test1", 205 "test2", 206 "test3", 207 "test4", 208 "test5", 209 "test6", 210 "test7", 211 "test8", 212 "test9", 213 "test10a", 214 "test10b", 215 "test10c", 216 "test10d", 217 "test11aB", 218 "test11aS", 219 "test11aI", 220 "test11aL", 221 "test11bB", 222 "test11bS", 223 "test11bI", 224 "test11bL", 225 "test11cB", 226 "test11cS", 227 "test11cI", 228 "test11cL", 229 "test11dB", 230 "test11dS", 231 "test11dI", 232 "test11dL", 233 "test12", 234 "test13aIL", 235 "test13aIB", 236 "test13aIS", 237 "test13aBSIL", 238 "test13bIL", 239 "test13bIB", 240 "test13bIS", 241 "test13bBSIL", 242 "test14aB", 243 "test14bB", 244 "test14cB", 245 "test15aB", 246 "test15bB", 247 "test15cB", 248 "test16a", 249 "test16b", 250 "test17a", 251 "test17b", 252 "test17c", 253 "test17d", 254 "test18a", 255 "test18b", 256 "test19", 257 "test20"}) 258 public void runTests() { 259 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 260 String name = entry.getKey(); 261 TestFunction test = entry.getValue(); 262 // Recall gold value from before compilation 263 Object[] gold = golds.get(name); 264 // Compute new result 265 Object[] result = test.run(); 266 // Compare gold and new result 267 verify(name, gold, result); 268 } 269 } 270 271 static byte[] generateB() { 272 byte[] a = new byte[RANGE]; 273 for (int i = 0; i < a.length; i++) { 274 a[i] = (byte)RANDOM.nextInt(); 275 } 276 return a; 277 } 278 279 static short[] generateS() { 280 short[] a = new short[RANGE]; 281 for (int i = 0; i < a.length; i++) { 282 a[i] = (short)RANDOM.nextInt(); 283 } 284 return a; 285 } 286 287 static int[] generateI() { 288 int[] a = new int[RANGE]; 289 for (int i = 0; i < a.length; i++) { 290 a[i] = RANDOM.nextInt(); 291 } 292 return a; 293 } 294 295 static long[] generateL() { 296 long[] a = new long[RANGE]; 297 for (int i = 0; i < a.length; i++) { 298 a[i] = RANDOM.nextLong(); 299 } 300 return a; 301 } 302 303 static void verify(String name, Object[] gold, Object[] result) { 304 if (gold.length != result.length) { 305 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " + 306 gold.length + ", result.length = " + result.length); 307 } 308 for (int i = 0; i < gold.length; i++) { 309 Object g = gold[i]; 310 Object r = result[i]; 311 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) { 312 throw new RuntimeException("verify " + name + ": must both be array of same type:" + 313 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 314 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 315 } 316 if (g == r) { 317 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" + 318 " gold[" + i + "] == result[" + i + "]"); 319 } 320 if (Array.getLength(g) != Array.getLength(r)) { 321 throw new RuntimeException("verify " + name + ": arrays must have same length:" + 322 " gold[" + i + "].length = " + Array.getLength(g) + 323 " result[" + i + "].length = " + Array.getLength(r)); 324 } 325 Class c = g.getClass().getComponentType(); 326 if (c == byte.class) { 327 verifyB(name, i, (byte[])g, (byte[])r); 328 } else if (c == short.class) { 329 verifyS(name, i, (short[])g, (short[])r); 330 } else if (c == int.class) { 331 verifyI(name, i, (int[])g, (int[])r); 332 } else if (c == long.class) { 333 verifyL(name, i, (long[])g, (long[])r); 334 } else { 335 throw new RuntimeException("verify " + name + ": array type not supported for verify:" + 336 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 337 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 338 } 339 } 340 } 341 342 static void verifyB(String name, int i, byte[] g, byte[] r) { 343 for (int j = 0; j < g.length; j++) { 344 if (g[j] != r[j]) { 345 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 346 " gold[" + i + "][" + j + "] = " + g[j] + 347 " result[" + i + "][" + j + "] = " + r[j]); 348 } 349 } 350 } 351 352 static void verifyS(String name, int i, short[] g, short[] r) { 353 for (int j = 0; j < g.length; j++) { 354 if (g[j] != r[j]) { 355 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 356 " gold[" + i + "][" + j + "] = " + g[j] + 357 " result[" + i + "][" + j + "] = " + r[j]); 358 } 359 } 360 } 361 362 static void verifyI(String name, int i, int[] g, int[] r) { 363 for (int j = 0; j < g.length; j++) { 364 if (g[j] != r[j]) { 365 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 366 " gold[" + i + "][" + j + "] = " + g[j] + 367 " result[" + i + "][" + j + "] = " + r[j]); 368 } 369 } 370 } 371 372 static void verifyL(String name, int i, long[] g, long[] r) { 373 for (int j = 0; j < g.length; j++) { 374 if (g[j] != r[j]) { 375 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 376 " gold[" + i + "][" + j + "] = " + g[j] + 377 " result[" + i + "][" + j + "] = " + r[j]); 378 } 379 } 380 } 381 382 @Test 383 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 384 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 385 IRNode.STORE_VECTOR, "> 0"}, 386 applyIf = {"MaxVectorSize", ">=8"}, 387 applyIfPlatform = {"64-bit", "true"}, 388 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 389 static Object[] test0(byte[] a, byte[] b, byte mask) { 390 for (int i = 0; i < RANGE; i+=8) { 391 // Safe to vectorize with AlignVector 392 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 393 b[i+1] = (byte)(a[i+1] & mask); 394 b[i+2] = (byte)(a[i+2] & mask); 395 b[i+3] = (byte)(a[i+3] & mask); 396 } 397 return new Object[]{ a, b }; 398 } 399 400 @Test 401 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 402 IRNode.AND_VB, "> 0", 403 IRNode.STORE_VECTOR, "> 0"}, 404 applyIfPlatform = {"64-bit", "true"}, 405 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 406 static Object[] test1(byte[] a, byte[] b, byte mask) { 407 for (int i = 0; i < RANGE; i+=8) { 408 // Safe to vectorize with AlignVector 409 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 410 b[i+1] = (byte)(a[i+1] & mask); 411 b[i+2] = (byte)(a[i+2] & mask); 412 b[i+3] = (byte)(a[i+3] & mask); 413 b[i+4] = (byte)(a[i+4] & mask); 414 b[i+5] = (byte)(a[i+5] & mask); 415 b[i+6] = (byte)(a[i+6] & mask); 416 b[i+7] = (byte)(a[i+7] & mask); 417 } 418 return new Object[]{ a, b }; 419 } 420 421 @Test 422 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 423 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 424 IRNode.STORE_VECTOR, "> 0"}, 425 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 426 applyIfPlatform = {"64-bit", "true"}, 427 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 428 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 429 IRNode.AND_VB, "= 0", 430 IRNode.STORE_VECTOR, "= 0"}, 431 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 432 applyIfPlatform = {"64-bit", "true"}, 433 applyIf = {"AlignVector", "true"}) 434 static Object[] test2(byte[] a, byte[] b, byte mask) { 435 for (int i = 0; i < RANGE; i+=8) { 436 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 437 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3 438 b[i+4] = (byte)(a[i+4] & mask); 439 b[i+5] = (byte)(a[i+5] & mask); 440 b[i+6] = (byte)(a[i+6] & mask); 441 } 442 return new Object[]{ a, b }; 443 } 444 445 @Test 446 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 447 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 448 IRNode.STORE_VECTOR, "> 0"}, 449 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 450 applyIfPlatform = {"64-bit", "true"}, 451 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 452 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 453 IRNode.AND_VB, "= 0", 454 IRNode.STORE_VECTOR, "= 0"}, 455 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 456 applyIfPlatform = {"64-bit", "true"}, 457 applyIf = {"AlignVector", "true"}) 458 static Object[] test3(byte[] a, byte[] b, byte mask) { 459 for (int i = 0; i < RANGE; i+=8) { 460 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 461 462 // Problematic for AlignVector 463 b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0 464 465 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes 466 b[i+4] = (byte)(a[i+4] & mask); 467 b[i+5] = (byte)(a[i+5] & mask); 468 b[i+6] = (byte)(a[i+6] & mask); 469 } 470 return new Object[]{ a, b }; 471 } 472 473 @Test 474 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 475 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0", 476 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 477 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0", 478 IRNode.STORE_VECTOR, "> 0"}, 479 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 480 applyIfPlatform = {"64-bit", "true"}, 481 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 482 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 483 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 484 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 485 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 486 IRNode.STORE_VECTOR, "> 0"}, 487 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 488 applyIfPlatform = {"64-bit", "true"}, 489 applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"}) 490 static Object[] test4(byte[] a, byte[] b, byte mask) { 491 for (int i = 0; i < RANGE/16; i++) { 492 // Problematic for AlignVector 493 b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned 494 b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask); 495 b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask); 496 b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask); 497 498 b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned 499 b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask); 500 b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask); 501 b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask); 502 b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask); 503 b[i*16 + 10] = (byte)(a[i*16 + 10] & mask); 504 b[i*16 + 11] = (byte)(a[i*16 + 11] & mask); 505 b[i*16 + 12] = (byte)(a[i*16 + 12] & mask); 506 } 507 return new Object[]{ a, b }; 508 } 509 510 @Test 511 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 512 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 513 IRNode.STORE_VECTOR, "> 0"}, 514 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 515 applyIfPlatform = {"64-bit", "true"}, 516 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 517 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 518 IRNode.AND_VB, "= 0", 519 IRNode.STORE_VECTOR, "= 0"}, 520 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 521 applyIfPlatform = {"64-bit", "true"}, 522 applyIf = {"AlignVector", "true"}) 523 static Object[] test5(byte[] a, byte[] b, byte mask, int inv) { 524 for (int i = 0; i < RANGE; i+=8) { 525 // Cannot align with AlignVector because of invariant 526 b[i+inv+0] = (byte)(a[i+inv+0] & mask); 527 528 b[i+inv+3] = (byte)(a[i+inv+3] & mask); 529 b[i+inv+4] = (byte)(a[i+inv+4] & mask); 530 b[i+inv+5] = (byte)(a[i+inv+5] & mask); 531 b[i+inv+6] = (byte)(a[i+inv+6] & mask); 532 } 533 return new Object[]{ a, b }; 534 } 535 536 @Test 537 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 538 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 539 IRNode.STORE_VECTOR, "> 0"}, 540 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 541 applyIfPlatform = {"64-bit", "true"}, 542 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 543 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 544 IRNode.AND_VB, "= 0", 545 IRNode.STORE_VECTOR, "= 0"}, 546 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 547 applyIfPlatform = {"64-bit", "true"}, 548 applyIf = {"AlignVector", "true"}) 549 static Object[] test6(byte[] a, byte[] b, byte mask) { 550 for (int i = 0; i < RANGE/8; i+=2) { 551 // Cannot align with AlignVector because offset is odd 552 b[i*4+0] = (byte)(a[i*4+0] & mask); 553 554 b[i*4+3] = (byte)(a[i*4+3] & mask); 555 b[i*4+4] = (byte)(a[i*4+4] & mask); 556 b[i*4+5] = (byte)(a[i*4+5] & mask); 557 b[i*4+6] = (byte)(a[i*4+6] & mask); 558 } 559 return new Object[]{ a, b }; 560 } 561 562 @Test 563 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 564 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 565 IRNode.STORE_VECTOR, "> 0"}, 566 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}, 567 applyIfPlatform = {"64-bit", "true"}, 568 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 569 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 570 IRNode.AND_VS, "= 0", 571 IRNode.STORE_VECTOR, "= 0"}, 572 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 573 applyIfPlatform = {"64-bit", "true"}, 574 applyIf = {"AlignVector", "true"}) 575 static Object[] test7(short[] a, short[] b, short mask) { 576 for (int i = 0; i < RANGE/8; i+=2) { 577 // Cannot align with AlignVector because offset is odd 578 b[i*4+0] = (short)(a[i*4+0] & mask); 579 580 b[i*4+3] = (short)(a[i*4+3] & mask); 581 b[i*4+4] = (short)(a[i*4+4] & mask); 582 b[i*4+5] = (short)(a[i*4+5] & mask); 583 b[i*4+6] = (short)(a[i*4+6] & mask); 584 } 585 return new Object[]{ a, b }; 586 } 587 588 @Test 589 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 590 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 591 IRNode.STORE_VECTOR, "> 0"}, 592 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 593 applyIfPlatform = {"64-bit", "true"}, 594 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 595 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 596 IRNode.AND_VB, "= 0", 597 IRNode.STORE_VECTOR, "= 0"}, 598 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 599 applyIfPlatform = {"64-bit", "true"}, 600 applyIf = {"AlignVector", "true"}) 601 static Object[] test8(byte[] a, byte[] b, byte mask, int init) { 602 for (int i = init; i < RANGE; i+=8) { 603 // Cannot align with AlignVector because of invariant (variable init becomes invar) 604 b[i+0] = (byte)(a[i+0] & mask); 605 606 b[i+3] = (byte)(a[i+3] & mask); 607 b[i+4] = (byte)(a[i+4] & mask); 608 b[i+5] = (byte)(a[i+5] & mask); 609 b[i+6] = (byte)(a[i+6] & mask); 610 } 611 return new Object[]{ a, b }; 612 } 613 614 @Test 615 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 616 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 617 IRNode.STORE_VECTOR, "> 0"}, 618 applyIf = {"MaxVectorSize", ">=8"}, 619 applyIfPlatform = {"64-bit", "true"}, 620 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 621 static Object[] test9(byte[] a, byte[] b, byte mask) { 622 // known non-zero init value does not affect offset, but has implicit effect on iv 623 for (int i = 13; i < RANGE-8; i+=8) { 624 b[i+0] = (byte)(a[i+0] & mask); 625 626 b[i+3] = (byte)(a[i+3] & mask); 627 b[i+4] = (byte)(a[i+4] & mask); 628 b[i+5] = (byte)(a[i+5] & mask); 629 b[i+6] = (byte)(a[i+6] & mask); 630 } 631 return new Object[]{ a, b }; 632 } 633 634 @Test 635 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 636 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 637 IRNode.STORE_VECTOR, "> 0"}, 638 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 639 applyIfPlatform = {"64-bit", "true"}, 640 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 641 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 642 IRNode.AND_VB, "= 0", 643 IRNode.STORE_VECTOR, "= 0"}, 644 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 645 applyIfPlatform = {"64-bit", "true"}, 646 applyIf = {"AlignVector", "true"}) 647 static Object[] test10a(byte[] a, byte[] b, byte mask) { 648 // This is not alignable with pre-loop, because of odd init. 649 for (int i = 3; i < RANGE-8; i+=8) { 650 b[i+0] = (byte)(a[i+0] & mask); 651 b[i+1] = (byte)(a[i+1] & mask); 652 b[i+2] = (byte)(a[i+2] & mask); 653 b[i+3] = (byte)(a[i+3] & mask); 654 } 655 return new Object[]{ a, b }; 656 } 657 658 @Test 659 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 660 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 661 IRNode.STORE_VECTOR, "> 0"}, 662 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 663 applyIfPlatform = {"64-bit", "true"}, 664 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 665 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 666 IRNode.AND_VB, "= 0", 667 IRNode.STORE_VECTOR, "= 0"}, 668 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 669 applyIfPlatform = {"64-bit", "true"}, 670 applyIf = {"AlignVector", "true"}) 671 static Object[] test10b(byte[] a, byte[] b, byte mask) { 672 // This is not alignable with pre-loop, because of odd init. 673 // Seems not correctly handled. 674 for (int i = 13; i < RANGE-8; i+=8) { 675 b[i+0] = (byte)(a[i+0] & mask); 676 b[i+1] = (byte)(a[i+1] & mask); 677 b[i+2] = (byte)(a[i+2] & mask); 678 b[i+3] = (byte)(a[i+3] & mask); 679 } 680 return new Object[]{ a, b }; 681 } 682 683 @Test 684 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 685 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 686 IRNode.STORE_VECTOR, "> 0"}, 687 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 688 applyIfPlatform = {"64-bit", "true"}, 689 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 690 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 691 IRNode.AND_VS, "= 0", 692 IRNode.STORE_VECTOR, "= 0"}, 693 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 694 applyIfPlatform = {"64-bit", "true"}, 695 applyIf = {"AlignVector", "true"}) 696 static Object[] test10c(short[] a, short[] b, short mask) { 697 // This is not alignable with pre-loop, because of odd init. 698 // Seems not correctly handled with MaxVectorSize >= 32. 699 for (int i = 13; i < RANGE-8; i+=8) { 700 b[i+0] = (short)(a[i+0] & mask); 701 b[i+1] = (short)(a[i+1] & mask); 702 b[i+2] = (short)(a[i+2] & mask); 703 b[i+3] = (short)(a[i+3] & mask); 704 } 705 return new Object[]{ a, b }; 706 } 707 708 @Test 709 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 710 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 711 IRNode.STORE_VECTOR, "> 0"}, 712 applyIf = {"MaxVectorSize", ">=16"}, 713 applyIfPlatform = {"64-bit", "true"}, 714 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 715 static Object[] test10d(short[] a, short[] b, short mask) { 716 for (int i = 13; i < RANGE-16; i+=8) { 717 // init + offset -> aligned 718 b[i+0+3] = (short)(a[i+0+3] & mask); 719 b[i+1+3] = (short)(a[i+1+3] & mask); 720 b[i+2+3] = (short)(a[i+2+3] & mask); 721 b[i+3+3] = (short)(a[i+3+3] & mask); 722 } 723 return new Object[]{ a, b }; 724 } 725 726 @Test 727 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 728 IRNode.AND_VB, "> 0", 729 IRNode.STORE_VECTOR, "> 0"}, 730 applyIfPlatform = {"64-bit", "true"}, 731 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 732 static Object[] test11aB(byte[] a, byte[] b, byte mask) { 733 for (int i = 0; i < RANGE; i++) { 734 // always alignable 735 b[i+0] = (byte)(a[i+0] & mask); 736 } 737 return new Object[]{ a, b }; 738 } 739 740 @Test 741 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 742 IRNode.AND_VS, "> 0", 743 IRNode.STORE_VECTOR, "> 0"}, 744 applyIfPlatform = {"64-bit", "true"}, 745 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 746 static Object[] test11aS(short[] a, short[] b, short mask) { 747 for (int i = 0; i < RANGE; i++) { 748 // always alignable 749 b[i+0] = (short)(a[i+0] & mask); 750 } 751 return new Object[]{ a, b }; 752 } 753 754 @Test 755 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 756 IRNode.AND_VI, "> 0", 757 IRNode.STORE_VECTOR, "> 0"}, 758 applyIfPlatform = {"64-bit", "true"}, 759 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 760 static Object[] test11aI(int[] a, int[] b, int mask) { 761 for (int i = 0; i < RANGE; i++) { 762 // always alignable 763 b[i+0] = (int)(a[i+0] & mask); 764 } 765 return new Object[]{ a, b }; 766 } 767 768 @Test 769 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 770 IRNode.AND_VL, "> 0", 771 IRNode.STORE_VECTOR, "> 0"}, 772 applyIfPlatform = {"64-bit", "true"}, 773 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 774 static Object[] test11aL(long[] a, long[] b, long mask) { 775 for (int i = 0; i < RANGE; i++) { 776 // always alignable 777 b[i+0] = (long)(a[i+0] & mask); 778 } 779 return new Object[]{ a, b }; 780 } 781 782 @Test 783 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 784 IRNode.AND_VB, "> 0", 785 IRNode.STORE_VECTOR, "> 0"}, 786 applyIfPlatform = {"64-bit", "true"}, 787 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 788 static Object[] test11bB(byte[] a, byte[] b, byte mask) { 789 for (int i = 1; i < RANGE; i++) { 790 // always alignable 791 b[i+0] = (byte)(a[i+0] & mask); 792 } 793 return new Object[]{ a, b }; 794 } 795 796 @Test 797 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 798 IRNode.AND_VS, "> 0", 799 IRNode.STORE_VECTOR, "> 0"}, 800 applyIfPlatform = {"64-bit", "true"}, 801 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 802 static Object[] test11bS(short[] a, short[] b, short mask) { 803 for (int i = 1; i < RANGE; i++) { 804 // always alignable 805 b[i+0] = (short)(a[i+0] & mask); 806 } 807 return new Object[]{ a, b }; 808 } 809 810 @Test 811 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 812 IRNode.AND_VI, "> 0", 813 IRNode.STORE_VECTOR, "> 0"}, 814 applyIfPlatform = {"64-bit", "true"}, 815 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 816 static Object[] test11bI(int[] a, int[] b, int mask) { 817 for (int i = 1; i < RANGE; i++) { 818 // always alignable 819 b[i+0] = (int)(a[i+0] & mask); 820 } 821 return new Object[]{ a, b }; 822 } 823 824 @Test 825 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 826 IRNode.AND_VL, "> 0", 827 IRNode.STORE_VECTOR, "> 0"}, 828 applyIfPlatform = {"64-bit", "true"}, 829 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 830 static Object[] test11bL(long[] a, long[] b, long mask) { 831 for (int i = 1; i < RANGE; i++) { 832 // always alignable 833 b[i+0] = (long)(a[i+0] & mask); 834 } 835 return new Object[]{ a, b }; 836 } 837 838 @Test 839 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 840 IRNode.AND_VB, "> 0", 841 IRNode.STORE_VECTOR, "> 0"}, 842 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 843 applyIfPlatform = {"64-bit", "true"}, 844 applyIf = {"AlignVector", "false"}) 845 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 846 IRNode.AND_VB, "= 0", 847 IRNode.STORE_VECTOR, "= 0"}, 848 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 849 applyIfPlatform = {"64-bit", "true"}, 850 applyIf = {"AlignVector", "true"}) 851 static Object[] test11cB(byte[] a, byte[] b, byte mask) { 852 for (int i = 1; i < RANGE-1; i++) { 853 // 1 byte offset -> not alignable with AlignVector 854 b[i+0] = (byte)(a[i+1] & mask); 855 } 856 return new Object[]{ a, b }; 857 } 858 859 @Test 860 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 861 IRNode.AND_VS, "> 0", 862 IRNode.STORE_VECTOR, "> 0"}, 863 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 864 applyIfPlatform = {"64-bit", "true"}, 865 applyIf = {"AlignVector", "false"}) 866 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 867 IRNode.AND_VS, "= 0", 868 IRNode.STORE_VECTOR, "= 0"}, 869 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 870 applyIfPlatform = {"64-bit", "true"}, 871 applyIf = {"AlignVector", "true"}) 872 static Object[] test11cS(short[] a, short[] b, short mask) { 873 for (int i = 1; i < RANGE-1; i++) { 874 // 2 byte offset -> not alignable with AlignVector 875 b[i+0] = (short)(a[i+1] & mask); 876 } 877 return new Object[]{ a, b }; 878 } 879 880 @Test 881 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 882 IRNode.AND_VI, "> 0", 883 IRNode.STORE_VECTOR, "> 0"}, 884 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 885 applyIfPlatform = {"64-bit", "true"}, 886 applyIf = {"AlignVector", "false"}) 887 @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", 888 IRNode.AND_VI, "= 0", 889 IRNode.STORE_VECTOR, "= 0"}, 890 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 891 applyIfPlatform = {"64-bit", "true"}, 892 applyIf = {"AlignVector", "true"}) 893 static Object[] test11cI(int[] a, int[] b, int mask) { 894 for (int i = 1; i < RANGE-1; i++) { 895 // 4 byte offset -> not alignable with AlignVector 896 b[i+0] = (int)(a[i+1] & mask); 897 } 898 return new Object[]{ a, b }; 899 } 900 901 @Test 902 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 903 IRNode.AND_VL, "> 0", 904 IRNode.STORE_VECTOR, "> 0"}, 905 applyIfPlatform = {"64-bit", "true"}, 906 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 907 static Object[] test11cL(long[] a, long[] b, long mask) { 908 for (int i = 1; i < RANGE-1; i++) { 909 // always alignable (8 byte offset) 910 b[i+0] = (long)(a[i+1] & mask); 911 } 912 return new Object[]{ a, b }; 913 } 914 915 @Test 916 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 917 IRNode.AND_VB, "> 0", 918 IRNode.STORE_VECTOR, "> 0"}, 919 applyIfPlatform = {"64-bit", "true"}, 920 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 921 static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) { 922 for (int i = 0; i < RANGE; i++) { 923 b[i+0+invar] = (byte)(a[i+0+invar] & mask); 924 } 925 return new Object[]{ a, b }; 926 } 927 928 @Test 929 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 930 IRNode.AND_VS, "> 0", 931 IRNode.STORE_VECTOR, "> 0"}, 932 applyIfPlatform = {"64-bit", "true"}, 933 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 934 static Object[] test11dS(short[] a, short[] b, short mask, int invar) { 935 for (int i = 0; i < RANGE; i++) { 936 b[i+0+invar] = (short)(a[i+0+invar] & mask); 937 } 938 return new Object[]{ a, b }; 939 } 940 941 @Test 942 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 943 IRNode.AND_VI, "> 0", 944 IRNode.STORE_VECTOR, "> 0"}, 945 applyIfPlatform = {"64-bit", "true"}, 946 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 947 static Object[] test11dI(int[] a, int[] b, int mask, int invar) { 948 for (int i = 0; i < RANGE; i++) { 949 b[i+0+invar] = (int)(a[i+0+invar] & mask); 950 } 951 return new Object[]{ a, b }; 952 } 953 954 @Test 955 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 956 IRNode.AND_VL, "> 0", 957 IRNode.STORE_VECTOR, "> 0"}, 958 applyIfPlatform = {"64-bit", "true"}, 959 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 960 static Object[] test11dL(long[] a, long[] b, long mask, int invar) { 961 for (int i = 0; i < RANGE; i++) { 962 b[i+0+invar] = (long)(a[i+0+invar] & mask); 963 } 964 return new Object[]{ a, b }; 965 } 966 967 @Test 968 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 969 IRNode.AND_VB, "= 0", 970 IRNode.STORE_VECTOR, "= 0"}, 971 applyIfPlatform = {"64-bit", "true"}, 972 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 973 static Object[] test12(byte[] a, byte[] b, byte mask) { 974 for (int i = 0; i < RANGE/16; i++) { 975 // Currently does not vectorize at all 976 b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask); 977 b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask); 978 b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask); 979 b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask); 980 } 981 return new Object[]{ a, b }; 982 } 983 984 @Test 985 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 986 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 987 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 988 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 989 IRNode.STORE_VECTOR, "> 0"}, 990 applyIfPlatform = {"64-bit", "true"}, 991 applyIfCPUFeatureOr = {"avx2", "true"}) 992 // require avx to ensure vectors are larger than what unrolling produces 993 static Object[] test13aIL(int[] a, long[] b) { 994 for (int i = 0; i < RANGE; i++) { 995 a[i]++; 996 b[i]++; 997 } 998 return new Object[]{ a, b }; 999 } 1000 1001 @Test 1002 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1003 IRNode.LOAD_VECTOR_I, "> 0", 1004 IRNode.ADD_VB, "> 0", 1005 IRNode.ADD_VI, "> 0", 1006 IRNode.STORE_VECTOR, "> 0"}, 1007 applyIfPlatform = {"64-bit", "true"}, 1008 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1009 static Object[] test13aIB(int[] a, byte[] b) { 1010 for (int i = 0; i < RANGE; i++) { 1011 a[i]++; 1012 b[i]++; 1013 } 1014 return new Object[]{ a, b }; 1015 } 1016 1017 @Test 1018 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1019 IRNode.LOAD_VECTOR_S, "> 0", 1020 IRNode.ADD_VI, "> 0", 1021 IRNode.ADD_VS, "> 0", 1022 IRNode.STORE_VECTOR, "> 0"}, 1023 applyIfPlatform = {"64-bit", "true"}, 1024 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1025 static Object[] test13aIS(int[] a, short[] b) { 1026 for (int i = 0; i < RANGE; i++) { 1027 a[i]++; 1028 b[i]++; 1029 } 1030 return new Object[]{ a, b }; 1031 } 1032 1033 @Test 1034 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1035 IRNode.LOAD_VECTOR_S, "> 0", 1036 IRNode.LOAD_VECTOR_I, "> 0", 1037 IRNode.LOAD_VECTOR_L, "> 0", 1038 IRNode.ADD_VB, "> 0", 1039 IRNode.ADD_VS, "> 0", 1040 IRNode.ADD_VI, "> 0", 1041 IRNode.ADD_VL, "> 0", 1042 IRNode.STORE_VECTOR, "> 0"}, 1043 applyIfPlatform = {"64-bit", "true"}, 1044 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1045 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) { 1046 for (int i = 0; i < RANGE; i++) { 1047 a[i]++; 1048 b[i]++; 1049 c[i]++; 1050 d[i]++; 1051 } 1052 return new Object[]{ a, b, c, d }; 1053 } 1054 1055 @Test 1056 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1057 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1058 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1059 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1060 IRNode.STORE_VECTOR, "> 0"}, 1061 applyIfPlatform = {"64-bit", "true"}, 1062 applyIfCPUFeatureOr = {"avx2", "true"}) 1063 // require avx to ensure vectors are larger than what unrolling produces 1064 static Object[] test13bIL(int[] a, long[] b) { 1065 for (int i = 1; i < RANGE; i++) { 1066 a[i]++; 1067 b[i]++; 1068 } 1069 return new Object[]{ a, b }; 1070 } 1071 1072 @Test 1073 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1074 IRNode.LOAD_VECTOR_I, "> 0", 1075 IRNode.ADD_VB, "> 0", 1076 IRNode.ADD_VI, "> 0", 1077 IRNode.STORE_VECTOR, "> 0"}, 1078 applyIfPlatform = {"64-bit", "true"}, 1079 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1080 static Object[] test13bIB(int[] a, byte[] b) { 1081 for (int i = 1; i < RANGE; i++) { 1082 a[i]++; 1083 b[i]++; 1084 } 1085 return new Object[]{ a, b }; 1086 } 1087 1088 @Test 1089 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1090 IRNode.LOAD_VECTOR_S, "> 0", 1091 IRNode.ADD_VI, "> 0", 1092 IRNode.ADD_VS, "> 0", 1093 IRNode.STORE_VECTOR, "> 0"}, 1094 applyIfPlatform = {"64-bit", "true"}, 1095 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1096 static Object[] test13bIS(int[] a, short[] b) { 1097 for (int i = 1; i < RANGE; i++) { 1098 a[i]++; 1099 b[i]++; 1100 } 1101 return new Object[]{ a, b }; 1102 } 1103 1104 @Test 1105 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1106 IRNode.LOAD_VECTOR_S, "> 0", 1107 IRNode.LOAD_VECTOR_I, "> 0", 1108 IRNode.LOAD_VECTOR_L, "> 0", 1109 IRNode.ADD_VB, "> 0", 1110 IRNode.ADD_VS, "> 0", 1111 IRNode.ADD_VI, "> 0", 1112 IRNode.ADD_VL, "> 0", 1113 IRNode.STORE_VECTOR, "> 0"}, 1114 applyIfPlatform = {"64-bit", "true"}, 1115 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1116 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) { 1117 for (int i = 1; i < RANGE; i++) { 1118 a[i]++; 1119 b[i]++; 1120 c[i]++; 1121 d[i]++; 1122 } 1123 return new Object[]{ a, b, c, d }; 1124 } 1125 1126 @Test 1127 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1128 IRNode.ADD_VB, "> 0", 1129 IRNode.STORE_VECTOR, "> 0"}, 1130 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1131 applyIfPlatform = {"64-bit", "true"}, 1132 applyIf = {"AlignVector", "false"}) 1133 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1134 IRNode.ADD_VB, "= 0", 1135 IRNode.STORE_VECTOR, "= 0"}, 1136 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1137 applyIfPlatform = {"64-bit", "true"}, 1138 applyIf = {"AlignVector", "true"}) 1139 static Object[] test14aB(byte[] a) { 1140 // non-power-of-2 stride 1141 for (int i = 0; i < RANGE-20; i+=9) { 1142 a[i+0]++; 1143 a[i+1]++; 1144 a[i+2]++; 1145 a[i+3]++; 1146 a[i+4]++; 1147 a[i+5]++; 1148 a[i+6]++; 1149 a[i+7]++; 1150 a[i+8]++; 1151 a[i+9]++; 1152 a[i+10]++; 1153 a[i+11]++; 1154 a[i+12]++; 1155 a[i+13]++; 1156 a[i+14]++; 1157 a[i+15]++; 1158 } 1159 return new Object[]{ a }; 1160 } 1161 1162 @Test 1163 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1164 IRNode.ADD_VB, "> 0", 1165 IRNode.STORE_VECTOR, "> 0"}, 1166 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1167 applyIfPlatform = {"64-bit", "true"}, 1168 applyIf = {"AlignVector", "false"}) 1169 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1170 IRNode.ADD_VB, "= 0", 1171 IRNode.STORE_VECTOR, "= 0"}, 1172 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1173 applyIfPlatform = {"64-bit", "true"}, 1174 applyIf = {"AlignVector", "true"}) 1175 static Object[] test14bB(byte[] a) { 1176 // non-power-of-2 stride 1177 for (int i = 0; i < RANGE-20; i+=3) { 1178 a[i+0]++; 1179 a[i+1]++; 1180 a[i+2]++; 1181 a[i+3]++; 1182 a[i+4]++; 1183 a[i+5]++; 1184 a[i+6]++; 1185 a[i+7]++; 1186 a[i+8]++; 1187 a[i+9]++; 1188 a[i+10]++; 1189 a[i+11]++; 1190 a[i+12]++; 1191 a[i+13]++; 1192 a[i+14]++; 1193 a[i+15]++; 1194 } 1195 return new Object[]{ a }; 1196 } 1197 1198 @Test 1199 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1200 IRNode.ADD_VB, "> 0", 1201 IRNode.STORE_VECTOR, "> 0"}, 1202 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1203 applyIfPlatform = {"64-bit", "true"}, 1204 applyIf = {"AlignVector", "false"}) 1205 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1206 IRNode.ADD_VB, "= 0", 1207 IRNode.STORE_VECTOR, "= 0"}, 1208 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1209 applyIfPlatform = {"64-bit", "true"}, 1210 applyIf = {"AlignVector", "true"}) 1211 static Object[] test14cB(byte[] a) { 1212 // non-power-of-2 stride 1213 for (int i = 0; i < RANGE-20; i+=5) { 1214 a[i+0]++; 1215 a[i+1]++; 1216 a[i+2]++; 1217 a[i+3]++; 1218 a[i+4]++; 1219 a[i+5]++; 1220 a[i+6]++; 1221 a[i+7]++; 1222 a[i+8]++; 1223 a[i+9]++; 1224 a[i+10]++; 1225 a[i+11]++; 1226 a[i+12]++; 1227 a[i+13]++; 1228 a[i+14]++; 1229 a[i+15]++; 1230 } 1231 return new Object[]{ a }; 1232 } 1233 1234 @Test 1235 // IR rules difficult because of modulo wrapping with offset after peeling. 1236 static Object[] test15aB(byte[] a) { 1237 // non-power-of-2 scale 1238 for (int i = 0; i < RANGE/64-20; i++) { 1239 a[53*i+0]++; 1240 a[53*i+1]++; 1241 a[53*i+2]++; 1242 a[53*i+3]++; 1243 a[53*i+4]++; 1244 a[53*i+5]++; 1245 a[53*i+6]++; 1246 a[53*i+7]++; 1247 a[53*i+8]++; 1248 a[53*i+9]++; 1249 a[53*i+10]++; 1250 a[53*i+11]++; 1251 a[53*i+12]++; 1252 a[53*i+13]++; 1253 a[53*i+14]++; 1254 a[53*i+15]++; 1255 } 1256 return new Object[]{ a }; 1257 } 1258 1259 @Test 1260 // IR rules difficult because of modulo wrapping with offset after peeling. 1261 static Object[] test15bB(byte[] a) { 1262 // non-power-of-2 scale 1263 for (int i = 0; i < RANGE/64-20; i++) { 1264 a[25*i+0]++; 1265 a[25*i+1]++; 1266 a[25*i+2]++; 1267 a[25*i+3]++; 1268 a[25*i+4]++; 1269 a[25*i+5]++; 1270 a[25*i+6]++; 1271 a[25*i+7]++; 1272 a[25*i+8]++; 1273 a[25*i+9]++; 1274 a[25*i+10]++; 1275 a[25*i+11]++; 1276 a[25*i+12]++; 1277 a[25*i+13]++; 1278 a[25*i+14]++; 1279 a[25*i+15]++; 1280 } 1281 return new Object[]{ a }; 1282 } 1283 1284 @Test 1285 // IR rules difficult because of modulo wrapping with offset after peeling. 1286 static Object[] test15cB(byte[] a) { 1287 // non-power-of-2 scale 1288 for (int i = 0; i < RANGE/64-20; i++) { 1289 a[19*i+0]++; 1290 a[19*i+1]++; 1291 a[19*i+2]++; 1292 a[19*i+3]++; 1293 a[19*i+4]++; 1294 a[19*i+5]++; 1295 a[19*i+6]++; 1296 a[19*i+7]++; 1297 a[19*i+8]++; 1298 a[19*i+9]++; 1299 a[19*i+10]++; 1300 a[19*i+11]++; 1301 a[19*i+12]++; 1302 a[19*i+13]++; 1303 a[19*i+14]++; 1304 a[19*i+15]++; 1305 } 1306 return new Object[]{ a }; 1307 } 1308 1309 @Test 1310 static Object[] test16a(byte[] a, short[] b) { 1311 // infinite loop issues 1312 for (int i = 0; i < RANGE/2-20; i++) { 1313 a[2*i+0]++; 1314 a[2*i+1]++; 1315 a[2*i+2]++; 1316 a[2*i+3]++; 1317 a[2*i+4]++; 1318 a[2*i+5]++; 1319 a[2*i+6]++; 1320 a[2*i+7]++; 1321 a[2*i+8]++; 1322 a[2*i+9]++; 1323 a[2*i+10]++; 1324 a[2*i+11]++; 1325 a[2*i+12]++; 1326 a[2*i+13]++; 1327 a[2*i+14]++; 1328 1329 b[2*i+0]++; 1330 b[2*i+1]++; 1331 b[2*i+2]++; 1332 b[2*i+3]++; 1333 } 1334 return new Object[]{ a, b }; 1335 } 1336 1337 @Test 1338 static Object[] test16b(byte[] a) { 1339 // infinite loop issues 1340 for (int i = 0; i < RANGE/2-20; i++) { 1341 a[2*i+0]++; 1342 a[2*i+1]++; 1343 a[2*i+2]++; 1344 a[2*i+3]++; 1345 a[2*i+4]++; 1346 a[2*i+5]++; 1347 a[2*i+6]++; 1348 a[2*i+7]++; 1349 a[2*i+8]++; 1350 a[2*i+9]++; 1351 a[2*i+10]++; 1352 a[2*i+11]++; 1353 a[2*i+12]++; 1354 a[2*i+13]++; 1355 a[2*i+14]++; 1356 } 1357 return new Object[]{ a }; 1358 } 1359 1360 @Test 1361 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 1362 IRNode.ADD_VL, "> 0", 1363 IRNode.STORE_VECTOR, "> 0"}, 1364 applyIfPlatform = {"64-bit", "true"}, 1365 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1366 static Object[] test17a(long[] a) { 1367 // Unsafe: vectorizes with profiling (not xcomp) 1368 for (int i = 0; i < RANGE; i++) { 1369 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i; 1370 long v = UNSAFE.getLongUnaligned(a, adr); 1371 UNSAFE.putLongUnaligned(a, adr, v + 1); 1372 } 1373 return new Object[]{ a }; 1374 } 1375 1376 @Test 1377 // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs. 1378 static Object[] test17b(long[] a) { 1379 // Not alignable 1380 for (int i = 0; i < RANGE-1; i++) { 1381 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1; 1382 long v = UNSAFE.getLongUnaligned(a, adr); 1383 UNSAFE.putLongUnaligned(a, adr, v + 1); 1384 } 1385 return new Object[]{ a }; 1386 } 1387 1388 @Test 1389 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1390 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1391 IRNode.STORE_VECTOR, "> 0"}, 1392 applyIf = {"MaxVectorSize", ">=32"}, 1393 applyIfPlatform = {"64-bit", "true"}, 1394 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1395 static Object[] test17c(long[] a) { 1396 // Unsafe: aligned vectorizes 1397 for (int i = 0; i < RANGE-1; i+=4) { 1398 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i; 1399 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1400 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1401 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1402 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1403 } 1404 return new Object[]{ a }; 1405 } 1406 1407 @Test 1408 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1409 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1410 IRNode.STORE_VECTOR, "> 0"}, 1411 applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true"}, 1412 applyIfPlatform = {"64-bit", "true"}, 1413 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"}) 1414 // Ensure vector width is large enough to fit 64 byte for longs: 1415 // The offsets are: 25, 33, 57, 65 1416 // In modulo 32: 25, 1, 25, 1 -> does not vectorize 1417 // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes 1418 // This problem is because we compute modulo vector width in memory_alignment. 1419 @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0", 1420 IRNode.ADD_VL, "= 0", 1421 IRNode.STORE_VECTOR, "= 0"}, 1422 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1423 applyIfPlatform = {"64-bit", "true"}, 1424 applyIf = {"AlignVector", "true"}) 1425 static Object[] test17d(long[] a) { 1426 // Not alignable 1427 for (int i = 0; i < RANGE-1; i+=4) { 1428 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1; 1429 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1430 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1431 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1432 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1433 } 1434 return new Object[]{ a }; 1435 } 1436 1437 @Test 1438 static Object[] test18a(byte[] a, int[] b) { 1439 // scale = 0 --> no iv 1440 for (int i = 0; i < RANGE; i++) { 1441 a[0] = 1; 1442 b[i] = 2; 1443 a[1] = 1; 1444 } 1445 return new Object[]{ a, b }; 1446 } 1447 1448 @Test 1449 static Object[] test18b(byte[] a, int[] b) { 1450 // scale = 0 --> no iv 1451 for (int i = 0; i < RANGE; i++) { 1452 a[1] = 1; 1453 b[i] = 2; 1454 a[2] = 1; 1455 } 1456 return new Object[]{ a, b }; 1457 } 1458 1459 @Test 1460 static Object[] test19(int[] a, int[] b) { 1461 for (int i = 5000; i > 0; i--) { 1462 a[RANGE_FINAL - i] = b[RANGE_FINAL - i]; 1463 } 1464 return new Object[]{ a, b }; 1465 } 1466 1467 @Test 1468 static Object[] test20(byte[] a) { 1469 // Example where it is easy to pass alignment check, 1470 // but used to fail the alignment calculation 1471 for (int i = 1; i < RANGE/2-50; i++) { 1472 a[2*i+0+30]++; 1473 a[2*i+1+30]++; 1474 a[2*i+2+30]++; 1475 a[2*i+3+30]++; 1476 } 1477 return new Object[]{ a }; 1478 } 1479 }