1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package compiler.loopopts.superword; 25 26 import compiler.lib.ir_framework.*; 27 import jdk.test.lib.Utils; 28 import jdk.test.whitebox.WhiteBox; 29 import jdk.internal.misc.Unsafe; 30 import java.lang.reflect.Array; 31 import java.util.Map; 32 import java.util.HashMap; 33 import java.util.Random; 34 import java.nio.ByteOrder; 35 36 /* 37 * @test id=NoAlignVector 38 * @bug 8310190 39 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 40 * @modules java.base/jdk.internal.misc 41 * @library /test/lib / 42 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector 43 */ 44 45 /* 46 * @test id=AlignVector 47 * @bug 8310190 48 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 49 * @modules java.base/jdk.internal.misc 50 * @library /test/lib / 51 * @run driver compiler.loopopts.superword.TestAlignVector AlignVector 52 */ 53 54 /* 55 * @test id=VerifyAlignVector 56 * @bug 8310190 57 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 58 * @modules java.base/jdk.internal.misc 59 * @library /test/lib / 60 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector 61 */ 62 63 public class TestAlignVector { 64 static int RANGE = 1024*8; 65 static int RANGE_FINAL = 1024*8; 66 private static final Unsafe UNSAFE = Unsafe.getUnsafe(); 67 private static final Random RANDOM = Utils.getRandomInstance(); 68 69 // Inputs 70 byte[] aB; 71 byte[] bB; 72 byte mB = (byte)31; 73 short[] aS; 74 short[] bS; 75 short mS = (short)0xF0F0; 76 int[] aI; 77 int[] bI; 78 int mI = 0xF0F0F0F0; 79 long[] aL; 80 long[] bL; 81 long mL = 0xF0F0F0F0F0F0F0F0L; 82 83 // List of tests 84 Map<String,TestFunction> tests = new HashMap<String,TestFunction>(); 85 86 // List of gold, the results from the first run before compilation 87 Map<String,Object[]> golds = new HashMap<String,Object[]>(); 88 89 interface TestFunction { 90 Object[] run(); 91 } 92 93 public static void main(String[] args) { 94 TestFramework framework = new TestFramework(TestAlignVector.class); 95 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 96 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250"); 97 98 switch (args[0]) { 99 case "NoAlignVector" -> { framework.addFlags("-XX:-AlignVector"); } 100 case "AlignVector" -> { framework.addFlags("-XX:+AlignVector"); } 101 case "VerifyAlignVector" -> { framework.addFlags("-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } 102 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } 103 } 104 framework.start(); 105 } 106 107 public TestAlignVector() { 108 // Generate input once 109 aB = generateB(); 110 bB = generateB(); 111 aS = generateS(); 112 bS = generateS(); 113 aI = generateI(); 114 bI = generateI(); 115 aL = generateL(); 116 bL = generateL(); 117 118 // Add all tests to list 119 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); }); 120 tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); }); 121 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); }); 122 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); }); 123 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); }); 124 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); }); 125 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); }); 126 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); }); 127 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); }); 128 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); }); 129 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); }); 130 131 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); }); 132 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); }); 133 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); }); 134 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); }); 135 136 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); }); 137 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); }); 138 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); }); 139 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); }); 140 141 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); }); 142 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); }); 143 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); }); 144 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); }); 145 146 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); }); 147 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); }); 148 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); }); 149 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); }); 150 151 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); }); 152 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); }); 153 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); }); 154 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); }); 155 156 tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); }); 157 158 tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); }); 159 tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); }); 160 tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); }); 161 tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 162 163 tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); }); 164 tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); }); 165 tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); }); 166 tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 167 168 tests.put("test14aB", () -> { return test14aB(aB.clone()); }); 169 tests.put("test14bB", () -> { return test14bB(aB.clone()); }); 170 tests.put("test14cB", () -> { return test14cB(aB.clone()); }); 171 172 tests.put("test15aB", () -> { return test15aB(aB.clone()); }); 173 tests.put("test15bB", () -> { return test15bB(aB.clone()); }); 174 tests.put("test15cB", () -> { return test15cB(aB.clone()); }); 175 176 tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); }); 177 tests.put("test16b", () -> { return test16b(aB.clone()); }); 178 179 tests.put("test17a", () -> { return test17a(aL.clone()); }); 180 tests.put("test17b", () -> { return test17b(aL.clone()); }); 181 tests.put("test17c", () -> { return test17c(aL.clone()); }); 182 tests.put("test17d", () -> { return test17d(aL.clone()); }); 183 184 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); }); 185 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); }); 186 187 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); }); 188 tests.put("test20", () -> { return test20(aB.clone()); }); 189 190 // Compute gold value for all test methods before compilation 191 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 192 String name = entry.getKey(); 193 TestFunction test = entry.getValue(); 194 Object[] gold = test.run(); 195 golds.put(name, gold); 196 } 197 } 198 199 @Warmup(100) 200 @Run(test = {"test0", 201 "test1", 202 "test2", 203 "test3", 204 "test4", 205 "test5", 206 "test6", 207 "test7", 208 "test8", 209 "test9", 210 "test10a", 211 "test10b", 212 "test10c", 213 "test10d", 214 "test11aB", 215 "test11aS", 216 "test11aI", 217 "test11aL", 218 "test11bB", 219 "test11bS", 220 "test11bI", 221 "test11bL", 222 "test11cB", 223 "test11cS", 224 "test11cI", 225 "test11cL", 226 "test11dB", 227 "test11dS", 228 "test11dI", 229 "test11dL", 230 "test12", 231 "test13aIL", 232 "test13aIB", 233 "test13aIS", 234 "test13aBSIL", 235 "test13bIL", 236 "test13bIB", 237 "test13bIS", 238 "test13bBSIL", 239 "test14aB", 240 "test14bB", 241 "test14cB", 242 "test15aB", 243 "test15bB", 244 "test15cB", 245 "test16a", 246 "test16b", 247 "test17a", 248 "test17b", 249 "test17c", 250 "test17d", 251 "test18a", 252 "test18b", 253 "test19", 254 "test20"}) 255 public void runTests() { 256 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 257 String name = entry.getKey(); 258 TestFunction test = entry.getValue(); 259 // Recall gold value from before compilation 260 Object[] gold = golds.get(name); 261 // Compute new result 262 Object[] result = test.run(); 263 // Compare gold and new result 264 verify(name, gold, result); 265 } 266 } 267 268 static byte[] generateB() { 269 byte[] a = new byte[RANGE]; 270 for (int i = 0; i < a.length; i++) { 271 a[i] = (byte)RANDOM.nextInt(); 272 } 273 return a; 274 } 275 276 static short[] generateS() { 277 short[] a = new short[RANGE]; 278 for (int i = 0; i < a.length; i++) { 279 a[i] = (short)RANDOM.nextInt(); 280 } 281 return a; 282 } 283 284 static int[] generateI() { 285 int[] a = new int[RANGE]; 286 for (int i = 0; i < a.length; i++) { 287 a[i] = RANDOM.nextInt(); 288 } 289 return a; 290 } 291 292 static long[] generateL() { 293 long[] a = new long[RANGE]; 294 for (int i = 0; i < a.length; i++) { 295 a[i] = RANDOM.nextLong(); 296 } 297 return a; 298 } 299 300 static void verify(String name, Object[] gold, Object[] result) { 301 if (gold.length != result.length) { 302 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " + 303 gold.length + ", result.length = " + result.length); 304 } 305 for (int i = 0; i < gold.length; i++) { 306 Object g = gold[i]; 307 Object r = result[i]; 308 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) { 309 throw new RuntimeException("verify " + name + ": must both be array of same type:" + 310 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 311 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 312 } 313 if (g == r) { 314 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" + 315 " gold[" + i + "] == result[" + i + "]"); 316 } 317 if (Array.getLength(g) != Array.getLength(r)) { 318 throw new RuntimeException("verify " + name + ": arrays must have same length:" + 319 " gold[" + i + "].length = " + Array.getLength(g) + 320 " result[" + i + "].length = " + Array.getLength(r)); 321 } 322 Class c = g.getClass().getComponentType(); 323 if (c == byte.class) { 324 verifyB(name, i, (byte[])g, (byte[])r); 325 } else if (c == short.class) { 326 verifyS(name, i, (short[])g, (short[])r); 327 } else if (c == int.class) { 328 verifyI(name, i, (int[])g, (int[])r); 329 } else if (c == long.class) { 330 verifyL(name, i, (long[])g, (long[])r); 331 } else { 332 throw new RuntimeException("verify " + name + ": array type not supported for verify:" + 333 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 334 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 335 } 336 } 337 } 338 339 static void verifyB(String name, int i, byte[] g, byte[] r) { 340 for (int j = 0; j < g.length; j++) { 341 if (g[j] != r[j]) { 342 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 343 " gold[" + i + "][" + j + "] = " + g[j] + 344 " result[" + i + "][" + j + "] = " + r[j]); 345 } 346 } 347 } 348 349 static void verifyS(String name, int i, short[] g, short[] r) { 350 for (int j = 0; j < g.length; j++) { 351 if (g[j] != r[j]) { 352 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 353 " gold[" + i + "][" + j + "] = " + g[j] + 354 " result[" + i + "][" + j + "] = " + r[j]); 355 } 356 } 357 } 358 359 static void verifyI(String name, int i, int[] g, int[] r) { 360 for (int j = 0; j < g.length; j++) { 361 if (g[j] != r[j]) { 362 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 363 " gold[" + i + "][" + j + "] = " + g[j] + 364 " result[" + i + "][" + j + "] = " + r[j]); 365 } 366 } 367 } 368 369 static void verifyL(String name, int i, long[] g, long[] r) { 370 for (int j = 0; j < g.length; j++) { 371 if (g[j] != r[j]) { 372 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 373 " gold[" + i + "][" + j + "] = " + g[j] + 374 " result[" + i + "][" + j + "] = " + r[j]); 375 } 376 } 377 } 378 379 @Test 380 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 381 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 382 IRNode.STORE_VECTOR, "> 0"}, 383 applyIf = {"MaxVectorSize", ">=8"}, 384 applyIfPlatform = {"64-bit", "true"}, 385 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 386 static Object[] test0(byte[] a, byte[] b, byte mask) { 387 for (int i = 0; i < RANGE; i+=8) { 388 // Safe to vectorize with AlignVector 389 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 390 b[i+1] = (byte)(a[i+1] & mask); 391 b[i+2] = (byte)(a[i+2] & mask); 392 b[i+3] = (byte)(a[i+3] & mask); 393 } 394 return new Object[]{ a, b }; 395 } 396 397 @Test 398 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 399 IRNode.AND_VB, "> 0", 400 IRNode.STORE_VECTOR, "> 0"}, 401 applyIfPlatform = {"64-bit", "true"}, 402 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 403 static Object[] test1(byte[] a, byte[] b, byte mask) { 404 for (int i = 0; i < RANGE; i+=8) { 405 // Safe to vectorize with AlignVector 406 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 407 b[i+1] = (byte)(a[i+1] & mask); 408 b[i+2] = (byte)(a[i+2] & mask); 409 b[i+3] = (byte)(a[i+3] & mask); 410 b[i+4] = (byte)(a[i+4] & mask); 411 b[i+5] = (byte)(a[i+5] & mask); 412 b[i+6] = (byte)(a[i+6] & mask); 413 b[i+7] = (byte)(a[i+7] & mask); 414 } 415 return new Object[]{ a, b }; 416 } 417 418 @Test 419 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 420 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 421 IRNode.STORE_VECTOR, "> 0"}, 422 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 423 applyIfPlatform = {"64-bit", "true"}, 424 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 425 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 426 IRNode.AND_VB, "= 0", 427 IRNode.STORE_VECTOR, "= 0"}, 428 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 429 applyIfPlatform = {"64-bit", "true"}, 430 applyIf = {"AlignVector", "true"}) 431 static Object[] test2(byte[] a, byte[] b, byte mask) { 432 for (int i = 0; i < RANGE; i+=8) { 433 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 434 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3 435 b[i+4] = (byte)(a[i+4] & mask); 436 b[i+5] = (byte)(a[i+5] & mask); 437 b[i+6] = (byte)(a[i+6] & mask); 438 } 439 return new Object[]{ a, b }; 440 } 441 442 @Test 443 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 444 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 445 IRNode.STORE_VECTOR, "> 0"}, 446 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 447 applyIfPlatform = {"64-bit", "true"}, 448 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 449 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 450 IRNode.AND_VB, "= 0", 451 IRNode.STORE_VECTOR, "= 0"}, 452 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 453 applyIfPlatform = {"64-bit", "true"}, 454 applyIf = {"AlignVector", "true"}) 455 static Object[] test3(byte[] a, byte[] b, byte mask) { 456 for (int i = 0; i < RANGE; i+=8) { 457 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 458 459 // Problematic for AlignVector 460 b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0 461 462 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes 463 b[i+4] = (byte)(a[i+4] & mask); 464 b[i+5] = (byte)(a[i+5] & mask); 465 b[i+6] = (byte)(a[i+6] & mask); 466 } 467 return new Object[]{ a, b }; 468 } 469 470 @Test 471 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 472 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0", 473 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 474 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0", 475 IRNode.STORE_VECTOR, "> 0"}, 476 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 477 applyIfPlatform = {"64-bit", "true"}, 478 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 479 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 480 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 481 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 482 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 483 IRNode.STORE_VECTOR, "> 0"}, 484 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 485 applyIfPlatform = {"64-bit", "true"}, 486 applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"}) 487 static Object[] test4(byte[] a, byte[] b, byte mask) { 488 for (int i = 0; i < RANGE/16; i++) { 489 // Problematic for AlignVector 490 b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned 491 b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask); 492 b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask); 493 b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask); 494 495 b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned 496 b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask); 497 b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask); 498 b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask); 499 b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask); 500 b[i*16 + 10] = (byte)(a[i*16 + 10] & mask); 501 b[i*16 + 11] = (byte)(a[i*16 + 11] & mask); 502 b[i*16 + 12] = (byte)(a[i*16 + 12] & mask); 503 } 504 return new Object[]{ a, b }; 505 } 506 507 @Test 508 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 509 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 510 IRNode.STORE_VECTOR, "> 0"}, 511 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 512 applyIfPlatform = {"64-bit", "true"}, 513 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 514 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 515 IRNode.AND_VB, "= 0", 516 IRNode.STORE_VECTOR, "= 0"}, 517 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 518 applyIfPlatform = {"64-bit", "true"}, 519 applyIf = {"AlignVector", "true"}) 520 static Object[] test5(byte[] a, byte[] b, byte mask, int inv) { 521 for (int i = 0; i < RANGE; i+=8) { 522 // Cannot align with AlignVector because of invariant 523 b[i+inv+0] = (byte)(a[i+inv+0] & mask); 524 525 b[i+inv+3] = (byte)(a[i+inv+3] & mask); 526 b[i+inv+4] = (byte)(a[i+inv+4] & mask); 527 b[i+inv+5] = (byte)(a[i+inv+5] & mask); 528 b[i+inv+6] = (byte)(a[i+inv+6] & mask); 529 } 530 return new Object[]{ a, b }; 531 } 532 533 @Test 534 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 535 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 536 IRNode.STORE_VECTOR, "> 0"}, 537 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 538 applyIfPlatform = {"64-bit", "true"}, 539 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 540 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 541 IRNode.AND_VB, "= 0", 542 IRNode.STORE_VECTOR, "= 0"}, 543 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 544 applyIfPlatform = {"64-bit", "true"}, 545 applyIf = {"AlignVector", "true"}) 546 static Object[] test6(byte[] a, byte[] b, byte mask) { 547 for (int i = 0; i < RANGE/8; i+=2) { 548 // Cannot align with AlignVector because offset is odd 549 b[i*4+0] = (byte)(a[i*4+0] & mask); 550 551 b[i*4+3] = (byte)(a[i*4+3] & mask); 552 b[i*4+4] = (byte)(a[i*4+4] & mask); 553 b[i*4+5] = (byte)(a[i*4+5] & mask); 554 b[i*4+6] = (byte)(a[i*4+6] & mask); 555 } 556 return new Object[]{ a, b }; 557 } 558 559 @Test 560 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 561 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 562 IRNode.STORE_VECTOR, "> 0"}, 563 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}, 564 applyIfPlatform = {"64-bit", "true"}, 565 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 566 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 567 IRNode.AND_VS, "= 0", 568 IRNode.STORE_VECTOR, "= 0"}, 569 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 570 applyIfPlatform = {"64-bit", "true"}, 571 applyIf = {"AlignVector", "true"}) 572 static Object[] test7(short[] a, short[] b, short mask) { 573 for (int i = 0; i < RANGE/8; i+=2) { 574 // Cannot align with AlignVector because offset is odd 575 b[i*4+0] = (short)(a[i*4+0] & mask); 576 577 b[i*4+3] = (short)(a[i*4+3] & mask); 578 b[i*4+4] = (short)(a[i*4+4] & mask); 579 b[i*4+5] = (short)(a[i*4+5] & mask); 580 b[i*4+6] = (short)(a[i*4+6] & mask); 581 } 582 return new Object[]{ a, b }; 583 } 584 585 @Test 586 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 587 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 588 IRNode.STORE_VECTOR, "> 0"}, 589 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 590 applyIfPlatform = {"64-bit", "true"}, 591 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 592 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 593 IRNode.AND_VB, "= 0", 594 IRNode.STORE_VECTOR, "= 0"}, 595 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 596 applyIfPlatform = {"64-bit", "true"}, 597 applyIf = {"AlignVector", "true"}) 598 static Object[] test8(byte[] a, byte[] b, byte mask, int init) { 599 for (int i = init; i < RANGE; i+=8) { 600 // Cannot align with AlignVector because of invariant (variable init becomes invar) 601 b[i+0] = (byte)(a[i+0] & mask); 602 603 b[i+3] = (byte)(a[i+3] & mask); 604 b[i+4] = (byte)(a[i+4] & mask); 605 b[i+5] = (byte)(a[i+5] & mask); 606 b[i+6] = (byte)(a[i+6] & mask); 607 } 608 return new Object[]{ a, b }; 609 } 610 611 @Test 612 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 613 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 614 IRNode.STORE_VECTOR, "> 0"}, 615 applyIf = {"MaxVectorSize", ">=8"}, 616 applyIfPlatform = {"64-bit", "true"}, 617 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 618 static Object[] test9(byte[] a, byte[] b, byte mask) { 619 // known non-zero init value does not affect offset, but has implicit effect on iv 620 for (int i = 13; i < RANGE-8; i+=8) { 621 b[i+0] = (byte)(a[i+0] & mask); 622 623 b[i+3] = (byte)(a[i+3] & mask); 624 b[i+4] = (byte)(a[i+4] & mask); 625 b[i+5] = (byte)(a[i+5] & mask); 626 b[i+6] = (byte)(a[i+6] & mask); 627 } 628 return new Object[]{ a, b }; 629 } 630 631 @Test 632 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 633 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 634 IRNode.STORE_VECTOR, "> 0"}, 635 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 636 applyIfPlatform = {"64-bit", "true"}, 637 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 638 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 639 IRNode.AND_VB, "= 0", 640 IRNode.STORE_VECTOR, "= 0"}, 641 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 642 applyIfPlatform = {"64-bit", "true"}, 643 applyIf = {"AlignVector", "true"}) 644 static Object[] test10a(byte[] a, byte[] b, byte mask) { 645 // This is not alignable with pre-loop, because of odd init. 646 for (int i = 3; i < RANGE-8; i+=8) { 647 b[i+0] = (byte)(a[i+0] & mask); 648 b[i+1] = (byte)(a[i+1] & mask); 649 b[i+2] = (byte)(a[i+2] & mask); 650 b[i+3] = (byte)(a[i+3] & mask); 651 } 652 return new Object[]{ a, b }; 653 } 654 655 @Test 656 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 657 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 658 IRNode.STORE_VECTOR, "> 0"}, 659 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 660 applyIfPlatform = {"64-bit", "true"}, 661 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 662 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 663 IRNode.AND_VB, "= 0", 664 IRNode.STORE_VECTOR, "= 0"}, 665 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 666 applyIfPlatform = {"64-bit", "true"}, 667 applyIf = {"AlignVector", "true"}) 668 static Object[] test10b(byte[] a, byte[] b, byte mask) { 669 // This is not alignable with pre-loop, because of odd init. 670 // Seems not correctly handled. 671 for (int i = 13; i < RANGE-8; i+=8) { 672 b[i+0] = (byte)(a[i+0] & mask); 673 b[i+1] = (byte)(a[i+1] & mask); 674 b[i+2] = (byte)(a[i+2] & mask); 675 b[i+3] = (byte)(a[i+3] & mask); 676 } 677 return new Object[]{ a, b }; 678 } 679 680 @Test 681 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 682 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 683 IRNode.STORE_VECTOR, "> 0"}, 684 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 685 applyIfPlatform = {"64-bit", "true"}, 686 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 687 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 688 IRNode.AND_VS, "= 0", 689 IRNode.STORE_VECTOR, "= 0"}, 690 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 691 applyIfPlatform = {"64-bit", "true"}, 692 applyIf = {"AlignVector", "true"}) 693 static Object[] test10c(short[] a, short[] b, short mask) { 694 // This is not alignable with pre-loop, because of odd init. 695 // Seems not correctly handled with MaxVectorSize >= 32. 696 for (int i = 13; i < RANGE-8; i+=8) { 697 b[i+0] = (short)(a[i+0] & mask); 698 b[i+1] = (short)(a[i+1] & mask); 699 b[i+2] = (short)(a[i+2] & mask); 700 b[i+3] = (short)(a[i+3] & mask); 701 } 702 return new Object[]{ a, b }; 703 } 704 705 @Test 706 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 707 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 708 IRNode.STORE_VECTOR, "> 0"}, 709 applyIf = {"MaxVectorSize", ">=16"}, 710 applyIfPlatform = {"64-bit", "true"}, 711 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 712 static Object[] test10d(short[] a, short[] b, short mask) { 713 for (int i = 13; i < RANGE-16; i+=8) { 714 // init + offset -> aligned 715 b[i+0+3] = (short)(a[i+0+3] & mask); 716 b[i+1+3] = (short)(a[i+1+3] & mask); 717 b[i+2+3] = (short)(a[i+2+3] & mask); 718 b[i+3+3] = (short)(a[i+3+3] & mask); 719 } 720 return new Object[]{ a, b }; 721 } 722 723 @Test 724 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 725 IRNode.AND_VB, "> 0", 726 IRNode.STORE_VECTOR, "> 0"}, 727 applyIfPlatform = {"64-bit", "true"}, 728 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 729 static Object[] test11aB(byte[] a, byte[] b, byte mask) { 730 for (int i = 0; i < RANGE; i++) { 731 // always alignable 732 b[i+0] = (byte)(a[i+0] & mask); 733 } 734 return new Object[]{ a, b }; 735 } 736 737 @Test 738 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 739 IRNode.AND_VS, "> 0", 740 IRNode.STORE_VECTOR, "> 0"}, 741 applyIfPlatform = {"64-bit", "true"}, 742 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 743 static Object[] test11aS(short[] a, short[] b, short mask) { 744 for (int i = 0; i < RANGE; i++) { 745 // always alignable 746 b[i+0] = (short)(a[i+0] & mask); 747 } 748 return new Object[]{ a, b }; 749 } 750 751 @Test 752 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 753 IRNode.AND_VI, "> 0", 754 IRNode.STORE_VECTOR, "> 0"}, 755 applyIfPlatform = {"64-bit", "true"}, 756 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 757 static Object[] test11aI(int[] a, int[] b, int mask) { 758 for (int i = 0; i < RANGE; i++) { 759 // always alignable 760 b[i+0] = (int)(a[i+0] & mask); 761 } 762 return new Object[]{ a, b }; 763 } 764 765 @Test 766 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 767 IRNode.AND_VL, "> 0", 768 IRNode.STORE_VECTOR, "> 0"}, 769 applyIfPlatform = {"64-bit", "true"}, 770 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 771 static Object[] test11aL(long[] a, long[] b, long mask) { 772 for (int i = 0; i < RANGE; i++) { 773 // always alignable 774 b[i+0] = (long)(a[i+0] & mask); 775 } 776 return new Object[]{ a, b }; 777 } 778 779 @Test 780 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 781 IRNode.AND_VB, "> 0", 782 IRNode.STORE_VECTOR, "> 0"}, 783 applyIfPlatform = {"64-bit", "true"}, 784 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 785 static Object[] test11bB(byte[] a, byte[] b, byte mask) { 786 for (int i = 1; i < RANGE; i++) { 787 // always alignable 788 b[i+0] = (byte)(a[i+0] & mask); 789 } 790 return new Object[]{ a, b }; 791 } 792 793 @Test 794 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 795 IRNode.AND_VS, "> 0", 796 IRNode.STORE_VECTOR, "> 0"}, 797 applyIfPlatform = {"64-bit", "true"}, 798 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 799 static Object[] test11bS(short[] a, short[] b, short mask) { 800 for (int i = 1; i < RANGE; i++) { 801 // always alignable 802 b[i+0] = (short)(a[i+0] & mask); 803 } 804 return new Object[]{ a, b }; 805 } 806 807 @Test 808 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 809 IRNode.AND_VI, "> 0", 810 IRNode.STORE_VECTOR, "> 0"}, 811 applyIfPlatform = {"64-bit", "true"}, 812 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 813 static Object[] test11bI(int[] a, int[] b, int mask) { 814 for (int i = 1; i < RANGE; i++) { 815 // always alignable 816 b[i+0] = (int)(a[i+0] & mask); 817 } 818 return new Object[]{ a, b }; 819 } 820 821 @Test 822 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 823 IRNode.AND_VL, "> 0", 824 IRNode.STORE_VECTOR, "> 0"}, 825 applyIfPlatform = {"64-bit", "true"}, 826 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 827 static Object[] test11bL(long[] a, long[] b, long mask) { 828 for (int i = 1; i < RANGE; i++) { 829 // always alignable 830 b[i+0] = (long)(a[i+0] & mask); 831 } 832 return new Object[]{ a, b }; 833 } 834 835 @Test 836 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 837 IRNode.AND_VB, "> 0", 838 IRNode.STORE_VECTOR, "> 0"}, 839 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 840 applyIfPlatform = {"64-bit", "true"}, 841 applyIf = {"AlignVector", "false"}) 842 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 843 IRNode.AND_VB, "= 0", 844 IRNode.STORE_VECTOR, "= 0"}, 845 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 846 applyIfPlatform = {"64-bit", "true"}, 847 applyIf = {"AlignVector", "true"}) 848 static Object[] test11cB(byte[] a, byte[] b, byte mask) { 849 for (int i = 1; i < RANGE-1; i++) { 850 // 1 byte offset -> not alignable with AlignVector 851 b[i+0] = (byte)(a[i+1] & mask); 852 } 853 return new Object[]{ a, b }; 854 } 855 856 @Test 857 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 858 IRNode.AND_VS, "> 0", 859 IRNode.STORE_VECTOR, "> 0"}, 860 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 861 applyIfPlatform = {"64-bit", "true"}, 862 applyIf = {"AlignVector", "false"}) 863 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 864 IRNode.AND_VS, "= 0", 865 IRNode.STORE_VECTOR, "= 0"}, 866 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 867 applyIfPlatform = {"64-bit", "true"}, 868 applyIf = {"AlignVector", "true"}) 869 static Object[] test11cS(short[] a, short[] b, short mask) { 870 for (int i = 1; i < RANGE-1; i++) { 871 // 2 byte offset -> not alignable with AlignVector 872 b[i+0] = (short)(a[i+1] & mask); 873 } 874 return new Object[]{ a, b }; 875 } 876 877 @Test 878 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 879 IRNode.AND_VI, "> 0", 880 IRNode.STORE_VECTOR, "> 0"}, 881 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 882 applyIfPlatform = {"64-bit", "true"}, 883 applyIf = {"AlignVector", "false"}) 884 @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", 885 IRNode.AND_VI, "= 0", 886 IRNode.STORE_VECTOR, "= 0"}, 887 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 888 applyIfPlatform = {"64-bit", "true"}, 889 applyIf = {"AlignVector", "true"}) 890 static Object[] test11cI(int[] a, int[] b, int mask) { 891 for (int i = 1; i < RANGE-1; i++) { 892 // 4 byte offset -> not alignable with AlignVector 893 b[i+0] = (int)(a[i+1] & mask); 894 } 895 return new Object[]{ a, b }; 896 } 897 898 @Test 899 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 900 IRNode.AND_VL, "> 0", 901 IRNode.STORE_VECTOR, "> 0"}, 902 applyIfPlatform = {"64-bit", "true"}, 903 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 904 static Object[] test11cL(long[] a, long[] b, long mask) { 905 for (int i = 1; i < RANGE-1; i++) { 906 // always alignable (8 byte offset) 907 b[i+0] = (long)(a[i+1] & mask); 908 } 909 return new Object[]{ a, b }; 910 } 911 912 @Test 913 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 914 IRNode.AND_VB, "> 0", 915 IRNode.STORE_VECTOR, "> 0"}, 916 applyIfPlatform = {"64-bit", "true"}, 917 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 918 static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) { 919 for (int i = 0; i < RANGE; i++) { 920 b[i+0+invar] = (byte)(a[i+0+invar] & mask); 921 } 922 return new Object[]{ a, b }; 923 } 924 925 @Test 926 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 927 IRNode.AND_VS, "> 0", 928 IRNode.STORE_VECTOR, "> 0"}, 929 applyIfPlatform = {"64-bit", "true"}, 930 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 931 static Object[] test11dS(short[] a, short[] b, short mask, int invar) { 932 for (int i = 0; i < RANGE; i++) { 933 b[i+0+invar] = (short)(a[i+0+invar] & mask); 934 } 935 return new Object[]{ a, b }; 936 } 937 938 @Test 939 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 940 IRNode.AND_VI, "> 0", 941 IRNode.STORE_VECTOR, "> 0"}, 942 applyIfPlatform = {"64-bit", "true"}, 943 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 944 static Object[] test11dI(int[] a, int[] b, int mask, int invar) { 945 for (int i = 0; i < RANGE; i++) { 946 b[i+0+invar] = (int)(a[i+0+invar] & mask); 947 } 948 return new Object[]{ a, b }; 949 } 950 951 @Test 952 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 953 IRNode.AND_VL, "> 0", 954 IRNode.STORE_VECTOR, "> 0"}, 955 applyIfPlatform = {"64-bit", "true"}, 956 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 957 static Object[] test11dL(long[] a, long[] b, long mask, int invar) { 958 for (int i = 0; i < RANGE; i++) { 959 b[i+0+invar] = (long)(a[i+0+invar] & mask); 960 } 961 return new Object[]{ a, b }; 962 } 963 964 @Test 965 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 966 IRNode.AND_VB, "= 0", 967 IRNode.STORE_VECTOR, "= 0"}, 968 applyIfPlatform = {"64-bit", "true"}, 969 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 970 static Object[] test12(byte[] a, byte[] b, byte mask) { 971 for (int i = 0; i < RANGE/16; i++) { 972 // Currently does not vectorize at all 973 b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask); 974 b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask); 975 b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask); 976 b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask); 977 } 978 return new Object[]{ a, b }; 979 } 980 981 @Test 982 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 983 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 984 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 985 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 986 IRNode.STORE_VECTOR, "> 0"}, 987 applyIfPlatform = {"64-bit", "true"}, 988 applyIfCPUFeatureOr = {"avx2", "true"}) 989 // require avx to ensure vectors are larger than what unrolling produces 990 static Object[] test13aIL(int[] a, long[] b) { 991 for (int i = 0; i < RANGE; i++) { 992 a[i]++; 993 b[i]++; 994 } 995 return new Object[]{ a, b }; 996 } 997 998 @Test 999 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1000 IRNode.LOAD_VECTOR_I, "> 0", 1001 IRNode.ADD_VB, "> 0", 1002 IRNode.ADD_VI, "> 0", 1003 IRNode.STORE_VECTOR, "> 0"}, 1004 applyIfPlatform = {"64-bit", "true"}, 1005 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1006 static Object[] test13aIB(int[] a, byte[] b) { 1007 for (int i = 0; i < RANGE; i++) { 1008 a[i]++; 1009 b[i]++; 1010 } 1011 return new Object[]{ a, b }; 1012 } 1013 1014 @Test 1015 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1016 IRNode.LOAD_VECTOR_S, "> 0", 1017 IRNode.ADD_VI, "> 0", 1018 IRNode.ADD_VS, "> 0", 1019 IRNode.STORE_VECTOR, "> 0"}, 1020 applyIfPlatform = {"64-bit", "true"}, 1021 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1022 static Object[] test13aIS(int[] a, short[] b) { 1023 for (int i = 0; i < RANGE; i++) { 1024 a[i]++; 1025 b[i]++; 1026 } 1027 return new Object[]{ a, b }; 1028 } 1029 1030 @Test 1031 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1032 IRNode.LOAD_VECTOR_S, "> 0", 1033 IRNode.LOAD_VECTOR_I, "> 0", 1034 IRNode.LOAD_VECTOR_L, "> 0", 1035 IRNode.ADD_VB, "> 0", 1036 IRNode.ADD_VS, "> 0", 1037 IRNode.ADD_VI, "> 0", 1038 IRNode.ADD_VL, "> 0", 1039 IRNode.STORE_VECTOR, "> 0"}, 1040 applyIfPlatform = {"64-bit", "true"}, 1041 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1042 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) { 1043 for (int i = 0; i < RANGE; i++) { 1044 a[i]++; 1045 b[i]++; 1046 c[i]++; 1047 d[i]++; 1048 } 1049 return new Object[]{ a, b, c, d }; 1050 } 1051 1052 @Test 1053 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1054 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1055 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1056 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1057 IRNode.STORE_VECTOR, "> 0"}, 1058 applyIfPlatform = {"64-bit", "true"}, 1059 applyIfCPUFeatureOr = {"avx2", "true"}) 1060 // require avx to ensure vectors are larger than what unrolling produces 1061 static Object[] test13bIL(int[] a, long[] b) { 1062 for (int i = 1; i < RANGE; i++) { 1063 a[i]++; 1064 b[i]++; 1065 } 1066 return new Object[]{ a, b }; 1067 } 1068 1069 @Test 1070 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1071 IRNode.LOAD_VECTOR_I, "> 0", 1072 IRNode.ADD_VB, "> 0", 1073 IRNode.ADD_VI, "> 0", 1074 IRNode.STORE_VECTOR, "> 0"}, 1075 applyIfPlatform = {"64-bit", "true"}, 1076 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1077 static Object[] test13bIB(int[] a, byte[] b) { 1078 for (int i = 1; i < RANGE; i++) { 1079 a[i]++; 1080 b[i]++; 1081 } 1082 return new Object[]{ a, b }; 1083 } 1084 1085 @Test 1086 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1087 IRNode.LOAD_VECTOR_S, "> 0", 1088 IRNode.ADD_VI, "> 0", 1089 IRNode.ADD_VS, "> 0", 1090 IRNode.STORE_VECTOR, "> 0"}, 1091 applyIfPlatform = {"64-bit", "true"}, 1092 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1093 static Object[] test13bIS(int[] a, short[] b) { 1094 for (int i = 1; i < RANGE; i++) { 1095 a[i]++; 1096 b[i]++; 1097 } 1098 return new Object[]{ a, b }; 1099 } 1100 1101 @Test 1102 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1103 IRNode.LOAD_VECTOR_S, "> 0", 1104 IRNode.LOAD_VECTOR_I, "> 0", 1105 IRNode.LOAD_VECTOR_L, "> 0", 1106 IRNode.ADD_VB, "> 0", 1107 IRNode.ADD_VS, "> 0", 1108 IRNode.ADD_VI, "> 0", 1109 IRNode.ADD_VL, "> 0", 1110 IRNode.STORE_VECTOR, "> 0"}, 1111 applyIfPlatform = {"64-bit", "true"}, 1112 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1113 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) { 1114 for (int i = 1; i < RANGE; i++) { 1115 a[i]++; 1116 b[i]++; 1117 c[i]++; 1118 d[i]++; 1119 } 1120 return new Object[]{ a, b, c, d }; 1121 } 1122 1123 @Test 1124 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1125 IRNode.ADD_VB, "> 0", 1126 IRNode.STORE_VECTOR, "> 0"}, 1127 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1128 applyIfPlatform = {"64-bit", "true"}, 1129 applyIf = {"AlignVector", "false"}) 1130 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1131 IRNode.ADD_VB, "= 0", 1132 IRNode.STORE_VECTOR, "= 0"}, 1133 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1134 applyIfPlatform = {"64-bit", "true"}, 1135 applyIf = {"AlignVector", "true"}) 1136 static Object[] test14aB(byte[] a) { 1137 // non-power-of-2 stride 1138 for (int i = 0; i < RANGE-20; i+=9) { 1139 a[i+0]++; 1140 a[i+1]++; 1141 a[i+2]++; 1142 a[i+3]++; 1143 a[i+4]++; 1144 a[i+5]++; 1145 a[i+6]++; 1146 a[i+7]++; 1147 a[i+8]++; 1148 a[i+9]++; 1149 a[i+10]++; 1150 a[i+11]++; 1151 a[i+12]++; 1152 a[i+13]++; 1153 a[i+14]++; 1154 a[i+15]++; 1155 } 1156 return new Object[]{ a }; 1157 } 1158 1159 @Test 1160 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1161 IRNode.ADD_VB, "> 0", 1162 IRNode.STORE_VECTOR, "> 0"}, 1163 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1164 applyIfPlatform = {"64-bit", "true"}, 1165 applyIf = {"AlignVector", "false"}) 1166 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1167 IRNode.ADD_VB, "= 0", 1168 IRNode.STORE_VECTOR, "= 0"}, 1169 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1170 applyIfPlatform = {"64-bit", "true"}, 1171 applyIf = {"AlignVector", "true"}) 1172 static Object[] test14bB(byte[] a) { 1173 // non-power-of-2 stride 1174 for (int i = 0; i < RANGE-20; i+=3) { 1175 a[i+0]++; 1176 a[i+1]++; 1177 a[i+2]++; 1178 a[i+3]++; 1179 a[i+4]++; 1180 a[i+5]++; 1181 a[i+6]++; 1182 a[i+7]++; 1183 a[i+8]++; 1184 a[i+9]++; 1185 a[i+10]++; 1186 a[i+11]++; 1187 a[i+12]++; 1188 a[i+13]++; 1189 a[i+14]++; 1190 a[i+15]++; 1191 } 1192 return new Object[]{ a }; 1193 } 1194 1195 @Test 1196 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1197 IRNode.ADD_VB, "> 0", 1198 IRNode.STORE_VECTOR, "> 0"}, 1199 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1200 applyIfPlatform = {"64-bit", "true"}, 1201 applyIf = {"AlignVector", "false"}) 1202 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1203 IRNode.ADD_VB, "= 0", 1204 IRNode.STORE_VECTOR, "= 0"}, 1205 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1206 applyIfPlatform = {"64-bit", "true"}, 1207 applyIf = {"AlignVector", "true"}) 1208 static Object[] test14cB(byte[] a) { 1209 // non-power-of-2 stride 1210 for (int i = 0; i < RANGE-20; i+=5) { 1211 a[i+0]++; 1212 a[i+1]++; 1213 a[i+2]++; 1214 a[i+3]++; 1215 a[i+4]++; 1216 a[i+5]++; 1217 a[i+6]++; 1218 a[i+7]++; 1219 a[i+8]++; 1220 a[i+9]++; 1221 a[i+10]++; 1222 a[i+11]++; 1223 a[i+12]++; 1224 a[i+13]++; 1225 a[i+14]++; 1226 a[i+15]++; 1227 } 1228 return new Object[]{ a }; 1229 } 1230 1231 @Test 1232 // IR rules difficult because of modulo wrapping with offset after peeling. 1233 static Object[] test15aB(byte[] a) { 1234 // non-power-of-2 scale 1235 for (int i = 0; i < RANGE/64-20; i++) { 1236 a[53*i+0]++; 1237 a[53*i+1]++; 1238 a[53*i+2]++; 1239 a[53*i+3]++; 1240 a[53*i+4]++; 1241 a[53*i+5]++; 1242 a[53*i+6]++; 1243 a[53*i+7]++; 1244 a[53*i+8]++; 1245 a[53*i+9]++; 1246 a[53*i+10]++; 1247 a[53*i+11]++; 1248 a[53*i+12]++; 1249 a[53*i+13]++; 1250 a[53*i+14]++; 1251 a[53*i+15]++; 1252 } 1253 return new Object[]{ a }; 1254 } 1255 1256 @Test 1257 // IR rules difficult because of modulo wrapping with offset after peeling. 1258 static Object[] test15bB(byte[] a) { 1259 // non-power-of-2 scale 1260 for (int i = 0; i < RANGE/64-20; i++) { 1261 a[25*i+0]++; 1262 a[25*i+1]++; 1263 a[25*i+2]++; 1264 a[25*i+3]++; 1265 a[25*i+4]++; 1266 a[25*i+5]++; 1267 a[25*i+6]++; 1268 a[25*i+7]++; 1269 a[25*i+8]++; 1270 a[25*i+9]++; 1271 a[25*i+10]++; 1272 a[25*i+11]++; 1273 a[25*i+12]++; 1274 a[25*i+13]++; 1275 a[25*i+14]++; 1276 a[25*i+15]++; 1277 } 1278 return new Object[]{ a }; 1279 } 1280 1281 @Test 1282 // IR rules difficult because of modulo wrapping with offset after peeling. 1283 static Object[] test15cB(byte[] a) { 1284 // non-power-of-2 scale 1285 for (int i = 0; i < RANGE/64-20; i++) { 1286 a[19*i+0]++; 1287 a[19*i+1]++; 1288 a[19*i+2]++; 1289 a[19*i+3]++; 1290 a[19*i+4]++; 1291 a[19*i+5]++; 1292 a[19*i+6]++; 1293 a[19*i+7]++; 1294 a[19*i+8]++; 1295 a[19*i+9]++; 1296 a[19*i+10]++; 1297 a[19*i+11]++; 1298 a[19*i+12]++; 1299 a[19*i+13]++; 1300 a[19*i+14]++; 1301 a[19*i+15]++; 1302 } 1303 return new Object[]{ a }; 1304 } 1305 1306 @Test 1307 static Object[] test16a(byte[] a, short[] b) { 1308 // infinite loop issues 1309 for (int i = 0; i < RANGE/2-20; i++) { 1310 a[2*i+0]++; 1311 a[2*i+1]++; 1312 a[2*i+2]++; 1313 a[2*i+3]++; 1314 a[2*i+4]++; 1315 a[2*i+5]++; 1316 a[2*i+6]++; 1317 a[2*i+7]++; 1318 a[2*i+8]++; 1319 a[2*i+9]++; 1320 a[2*i+10]++; 1321 a[2*i+11]++; 1322 a[2*i+12]++; 1323 a[2*i+13]++; 1324 a[2*i+14]++; 1325 1326 b[2*i+0]++; 1327 b[2*i+1]++; 1328 b[2*i+2]++; 1329 b[2*i+3]++; 1330 } 1331 return new Object[]{ a, b }; 1332 } 1333 1334 @Test 1335 static Object[] test16b(byte[] a) { 1336 // infinite loop issues 1337 for (int i = 0; i < RANGE/2-20; i++) { 1338 a[2*i+0]++; 1339 a[2*i+1]++; 1340 a[2*i+2]++; 1341 a[2*i+3]++; 1342 a[2*i+4]++; 1343 a[2*i+5]++; 1344 a[2*i+6]++; 1345 a[2*i+7]++; 1346 a[2*i+8]++; 1347 a[2*i+9]++; 1348 a[2*i+10]++; 1349 a[2*i+11]++; 1350 a[2*i+12]++; 1351 a[2*i+13]++; 1352 a[2*i+14]++; 1353 } 1354 return new Object[]{ a }; 1355 } 1356 1357 @Test 1358 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 1359 IRNode.ADD_VL, "> 0", 1360 IRNode.STORE_VECTOR, "> 0"}, 1361 applyIfPlatform = {"64-bit", "true"}, 1362 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1363 static Object[] test17a(long[] a) { 1364 // Unsafe: vectorizes with profiling (not xcomp) 1365 for (int i = 0; i < RANGE; i++) { 1366 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i; 1367 long v = UNSAFE.getLongUnaligned(a, adr); 1368 UNSAFE.putLongUnaligned(a, adr, v + 1); 1369 } 1370 return new Object[]{ a }; 1371 } 1372 1373 @Test 1374 // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs. 1375 static Object[] test17b(long[] a) { 1376 // Not alignable 1377 for (int i = 0; i < RANGE-1; i++) { 1378 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1; 1379 long v = UNSAFE.getLongUnaligned(a, adr); 1380 UNSAFE.putLongUnaligned(a, adr, v + 1); 1381 } 1382 return new Object[]{ a }; 1383 } 1384 1385 @Test 1386 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1387 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1388 IRNode.STORE_VECTOR, "> 0"}, 1389 applyIf = {"MaxVectorSize", ">=32"}, 1390 applyIfPlatform = {"64-bit", "true"}, 1391 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1392 static Object[] test17c(long[] a) { 1393 // Unsafe: aligned vectorizes 1394 for (int i = 0; i < RANGE-1; i+=4) { 1395 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i; 1396 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1397 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1398 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1399 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1400 } 1401 return new Object[]{ a }; 1402 } 1403 1404 @Test 1405 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1406 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1407 IRNode.STORE_VECTOR, "> 0"}, 1408 applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true"}, 1409 applyIfPlatform = {"64-bit", "true"}, 1410 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"}) 1411 // Ensure vector width is large enough to fit 64 byte for longs: 1412 // The offsets are: 25, 33, 57, 65 1413 // In modulo 32: 25, 1, 25, 1 -> does not vectorize 1414 // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes 1415 // This problem is because we compute modulo vector width in memory_alignment. 1416 @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0", 1417 IRNode.ADD_VL, "= 0", 1418 IRNode.STORE_VECTOR, "= 0"}, 1419 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1420 applyIfPlatform = {"64-bit", "true"}, 1421 applyIf = {"AlignVector", "true"}) 1422 static Object[] test17d(long[] a) { 1423 // Not alignable 1424 for (int i = 0; i < RANGE-1; i+=4) { 1425 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1; 1426 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1427 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1428 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1429 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1430 } 1431 return new Object[]{ a }; 1432 } 1433 1434 @Test 1435 static Object[] test18a(byte[] a, int[] b) { 1436 // scale = 0 --> no iv 1437 for (int i = 0; i < RANGE; i++) { 1438 a[0] = 1; 1439 b[i] = 2; 1440 a[1] = 1; 1441 } 1442 return new Object[]{ a, b }; 1443 } 1444 1445 @Test 1446 static Object[] test18b(byte[] a, int[] b) { 1447 // scale = 0 --> no iv 1448 for (int i = 0; i < RANGE; i++) { 1449 a[1] = 1; 1450 b[i] = 2; 1451 a[2] = 1; 1452 } 1453 return new Object[]{ a, b }; 1454 } 1455 1456 @Test 1457 static Object[] test19(int[] a, int[] b) { 1458 for (int i = 5000; i > 0; i--) { 1459 a[RANGE_FINAL - i] = b[RANGE_FINAL - i]; 1460 } 1461 return new Object[]{ a, b }; 1462 } 1463 1464 @Test 1465 static Object[] test20(byte[] a) { 1466 // Example where it is easy to pass alignment check, 1467 // but used to fail the alignment calculation 1468 for (int i = 1; i < RANGE/2-50; i++) { 1469 a[2*i+0+30]++; 1470 a[2*i+1+30]++; 1471 a[2*i+2+30]++; 1472 a[2*i+3+30]++; 1473 } 1474 return new Object[]{ a }; 1475 } 1476 }