1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package compiler.loopopts.superword; 25 26 import compiler.lib.ir_framework.*; 27 import jdk.test.lib.Utils; 28 import jdk.test.whitebox.WhiteBox; 29 import jdk.internal.misc.Unsafe; 30 import java.lang.reflect.Array; 31 import java.util.Map; 32 import java.util.HashMap; 33 import java.util.Random; 34 import java.nio.ByteOrder; 35 36 /* 37 * @test id=NoAlignVector 38 * @bug 8310190 39 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 40 * @modules java.base/jdk.internal.misc 41 * @library /test/lib / 42 * @requires vm.compiler2.enabled 43 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector 44 */ 45 46 /* 47 * @test id=AlignVector 48 * @bug 8310190 49 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 50 * @modules java.base/jdk.internal.misc 51 * @library /test/lib / 52 * @requires vm.compiler2.enabled 53 * @run driver compiler.loopopts.superword.TestAlignVector AlignVector 54 */ 55 56 /* 57 * @test id=VerifyAlignVector 58 * @bug 8310190 59 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 60 * @modules java.base/jdk.internal.misc 61 * @library /test/lib / 62 * @requires vm.compiler2.enabled 63 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector 64 */ 65 66 public class TestAlignVector { 67 static int RANGE = 1024*8; 68 static int RANGE_FINAL = 1024*8; 69 private static final Unsafe UNSAFE = Unsafe.getUnsafe(); 70 private static final Random RANDOM = Utils.getRandomInstance(); 71 72 // Inputs 73 byte[] aB; 74 byte[] bB; 75 byte mB = (byte)31; 76 short[] aS; 77 short[] bS; 78 short mS = (short)0xF0F0; 79 int[] aI; 80 int[] bI; 81 int mI = 0xF0F0F0F0; 82 long[] aL; 83 long[] bL; 84 long mL = 0xF0F0F0F0F0F0F0F0L; 85 86 // List of tests 87 Map<String,TestFunction> tests = new HashMap<String,TestFunction>(); 88 89 // List of gold, the results from the first run before compilation 90 Map<String,Object[]> golds = new HashMap<String,Object[]>(); 91 92 interface TestFunction { 93 Object[] run(); 94 } 95 96 public static void main(String[] args) { 97 TestFramework framework = new TestFramework(TestAlignVector.class); 98 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 99 "-XX:LoopUnrollLimit=250"); 100 101 switch (args[0]) { 102 case "NoAlignVector" -> { framework.addFlags("-XX:-AlignVector"); } 103 case "AlignVector" -> { framework.addFlags("-XX:+AlignVector"); } 104 case "VerifyAlignVector" -> { framework.addFlags("-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } 105 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } 106 } 107 framework.start(); 108 } 109 110 public TestAlignVector() { 111 // Generate input once 112 aB = generateB(); 113 bB = generateB(); 114 aS = generateS(); 115 bS = generateS(); 116 aI = generateI(); 117 bI = generateI(); 118 aL = generateL(); 119 bL = generateL(); 120 121 // Add all tests to list 122 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); }); 123 tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); }); 124 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); }); 125 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); }); 126 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); }); 127 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); }); 128 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); }); 129 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); }); 130 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); }); 131 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); }); 132 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); }); 133 134 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); }); 135 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); }); 136 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); }); 137 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); }); 138 139 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); }); 140 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); }); 141 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); }); 142 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); }); 143 144 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); }); 145 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); }); 146 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); }); 147 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); }); 148 149 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); }); 150 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); }); 151 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); }); 152 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); }); 153 154 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); }); 155 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); }); 156 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); }); 157 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); }); 158 159 tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); }); 160 161 tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); }); 162 tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); }); 163 tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); }); 164 tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 165 166 tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); }); 167 tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); }); 168 tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); }); 169 tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 170 171 tests.put("test14aB", () -> { return test14aB(aB.clone()); }); 172 tests.put("test14bB", () -> { return test14bB(aB.clone()); }); 173 tests.put("test14cB", () -> { return test14cB(aB.clone()); }); 174 175 tests.put("test15aB", () -> { return test15aB(aB.clone()); }); 176 tests.put("test15bB", () -> { return test15bB(aB.clone()); }); 177 tests.put("test15cB", () -> { return test15cB(aB.clone()); }); 178 179 tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); }); 180 tests.put("test16b", () -> { return test16b(aB.clone()); }); 181 182 tests.put("test17a", () -> { return test17a(aL.clone()); }); 183 tests.put("test17b", () -> { return test17b(aL.clone()); }); 184 tests.put("test17c", () -> { return test17c(aL.clone()); }); 185 tests.put("test17d", () -> { return test17d(aL.clone()); }); 186 187 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); }); 188 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); }); 189 190 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); }); 191 tests.put("test20", () -> { return test20(aB.clone()); }); 192 193 // Compute gold value for all test methods before compilation 194 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 195 String name = entry.getKey(); 196 TestFunction test = entry.getValue(); 197 Object[] gold = test.run(); 198 golds.put(name, gold); 199 } 200 } 201 202 @Warmup(100) 203 @Run(test = {"test0", 204 "test1", 205 "test2", 206 "test3", 207 "test4", 208 "test5", 209 "test6", 210 "test7", 211 "test8", 212 "test9", 213 "test10a", 214 "test10b", 215 "test10c", 216 "test10d", 217 "test11aB", 218 "test11aS", 219 "test11aI", 220 "test11aL", 221 "test11bB", 222 "test11bS", 223 "test11bI", 224 "test11bL", 225 "test11cB", 226 "test11cS", 227 "test11cI", 228 "test11cL", 229 "test11dB", 230 "test11dS", 231 "test11dI", 232 "test11dL", 233 "test12", 234 "test13aIL", 235 "test13aIB", 236 "test13aIS", 237 "test13aBSIL", 238 "test13bIL", 239 "test13bIB", 240 "test13bIS", 241 "test13bBSIL", 242 "test14aB", 243 "test14bB", 244 "test14cB", 245 "test15aB", 246 "test15bB", 247 "test15cB", 248 "test16a", 249 "test16b", 250 "test17a", 251 "test17b", 252 "test17c", 253 "test17d", 254 "test18a", 255 "test18b", 256 "test19", 257 "test20"}) 258 public void runTests() { 259 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 260 String name = entry.getKey(); 261 TestFunction test = entry.getValue(); 262 // Recall gold value from before compilation 263 Object[] gold = golds.get(name); 264 // Compute new result 265 Object[] result = test.run(); 266 // Compare gold and new result 267 verify(name, gold, result); 268 } 269 } 270 271 static byte[] generateB() { 272 byte[] a = new byte[RANGE]; 273 for (int i = 0; i < a.length; i++) { 274 a[i] = (byte)RANDOM.nextInt(); 275 } 276 return a; 277 } 278 279 static short[] generateS() { 280 short[] a = new short[RANGE]; 281 for (int i = 0; i < a.length; i++) { 282 a[i] = (short)RANDOM.nextInt(); 283 } 284 return a; 285 } 286 287 static int[] generateI() { 288 int[] a = new int[RANGE]; 289 for (int i = 0; i < a.length; i++) { 290 a[i] = RANDOM.nextInt(); 291 } 292 return a; 293 } 294 295 static long[] generateL() { 296 long[] a = new long[RANGE]; 297 for (int i = 0; i < a.length; i++) { 298 a[i] = RANDOM.nextLong(); 299 } 300 return a; 301 } 302 303 static void verify(String name, Object[] gold, Object[] result) { 304 if (gold.length != result.length) { 305 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " + 306 gold.length + ", result.length = " + result.length); 307 } 308 for (int i = 0; i < gold.length; i++) { 309 Object g = gold[i]; 310 Object r = result[i]; 311 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) { 312 throw new RuntimeException("verify " + name + ": must both be array of same type:" + 313 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 314 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 315 } 316 if (g == r) { 317 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" + 318 " gold[" + i + "] == result[" + i + "]"); 319 } 320 if (Array.getLength(g) != Array.getLength(r)) { 321 throw new RuntimeException("verify " + name + ": arrays must have same length:" + 322 " gold[" + i + "].length = " + Array.getLength(g) + 323 " result[" + i + "].length = " + Array.getLength(r)); 324 } 325 Class c = g.getClass().getComponentType(); 326 if (c == byte.class) { 327 verifyB(name, i, (byte[])g, (byte[])r); 328 } else if (c == short.class) { 329 verifyS(name, i, (short[])g, (short[])r); 330 } else if (c == int.class) { 331 verifyI(name, i, (int[])g, (int[])r); 332 } else if (c == long.class) { 333 verifyL(name, i, (long[])g, (long[])r); 334 } else { 335 throw new RuntimeException("verify " + name + ": array type not supported for verify:" + 336 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 337 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 338 } 339 } 340 } 341 342 static void verifyB(String name, int i, byte[] g, byte[] r) { 343 for (int j = 0; j < g.length; j++) { 344 if (g[j] != r[j]) { 345 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 346 " gold[" + i + "][" + j + "] = " + g[j] + 347 " result[" + i + "][" + j + "] = " + r[j]); 348 } 349 } 350 } 351 352 static void verifyS(String name, int i, short[] g, short[] r) { 353 for (int j = 0; j < g.length; j++) { 354 if (g[j] != r[j]) { 355 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 356 " gold[" + i + "][" + j + "] = " + g[j] + 357 " result[" + i + "][" + j + "] = " + r[j]); 358 } 359 } 360 } 361 362 static void verifyI(String name, int i, int[] g, int[] r) { 363 for (int j = 0; j < g.length; j++) { 364 if (g[j] != r[j]) { 365 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 366 " gold[" + i + "][" + j + "] = " + g[j] + 367 " result[" + i + "][" + j + "] = " + r[j]); 368 } 369 } 370 } 371 372 static void verifyL(String name, int i, long[] g, long[] r) { 373 for (int j = 0; j < g.length; j++) { 374 if (g[j] != r[j]) { 375 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 376 " gold[" + i + "][" + j + "] = " + g[j] + 377 " result[" + i + "][" + j + "] = " + r[j]); 378 } 379 } 380 } 381 382 @Test 383 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 384 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 385 IRNode.STORE_VECTOR, "> 0"}, 386 applyIf = {"MaxVectorSize", ">=8"}, 387 applyIfPlatform = {"64-bit", "true"}, 388 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 389 static Object[] test0(byte[] a, byte[] b, byte mask) { 390 for (int i = 0; i < RANGE; i+=8) { 391 // Safe to vectorize with AlignVector 392 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 393 b[i+1] = (byte)(a[i+1] & mask); 394 b[i+2] = (byte)(a[i+2] & mask); 395 b[i+3] = (byte)(a[i+3] & mask); 396 } 397 return new Object[]{ a, b }; 398 } 399 400 @Test 401 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 402 IRNode.AND_VB, "> 0", 403 IRNode.STORE_VECTOR, "> 0"}, 404 applyIf = {"UseCompactObjectHeaders", "false"}, 405 applyIfPlatform = {"64-bit", "true"}, 406 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 407 static Object[] test1(byte[] a, byte[] b, byte mask) { 408 for (int i = 0; i < RANGE; i+=8) { 409 // Safe to vectorize with AlignVector 410 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 411 b[i+1] = (byte)(a[i+1] & mask); 412 b[i+2] = (byte)(a[i+2] & mask); 413 b[i+3] = (byte)(a[i+3] & mask); 414 b[i+4] = (byte)(a[i+4] & mask); 415 b[i+5] = (byte)(a[i+5] & mask); 416 b[i+6] = (byte)(a[i+6] & mask); 417 b[i+7] = (byte)(a[i+7] & mask); 418 } 419 return new Object[]{ a, b }; 420 } 421 422 @Test 423 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 424 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 425 IRNode.STORE_VECTOR, "> 0"}, 426 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 427 applyIfPlatform = {"64-bit", "true"}, 428 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 429 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 430 IRNode.AND_VB, "= 0", 431 IRNode.STORE_VECTOR, "= 0"}, 432 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 433 applyIfPlatform = {"64-bit", "true"}, 434 applyIf = {"AlignVector", "true"}) 435 static Object[] test2(byte[] a, byte[] b, byte mask) { 436 for (int i = 0; i < RANGE; i+=8) { 437 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 438 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3 439 b[i+4] = (byte)(a[i+4] & mask); 440 b[i+5] = (byte)(a[i+5] & mask); 441 b[i+6] = (byte)(a[i+6] & mask); 442 } 443 return new Object[]{ a, b }; 444 } 445 446 @Test 447 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 448 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 449 IRNode.STORE_VECTOR, "> 0"}, 450 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 451 applyIfPlatform = {"64-bit", "true"}, 452 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 453 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 454 IRNode.AND_VB, "= 0", 455 IRNode.STORE_VECTOR, "= 0"}, 456 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 457 applyIfPlatform = {"64-bit", "true"}, 458 applyIf = {"AlignVector", "true"}) 459 static Object[] test3(byte[] a, byte[] b, byte mask) { 460 for (int i = 0; i < RANGE; i+=8) { 461 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 462 463 // Problematic for AlignVector 464 b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0 465 466 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes 467 b[i+4] = (byte)(a[i+4] & mask); 468 b[i+5] = (byte)(a[i+5] & mask); 469 b[i+6] = (byte)(a[i+6] & mask); 470 } 471 return new Object[]{ a, b }; 472 } 473 474 @Test 475 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 476 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0", 477 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 478 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0", 479 IRNode.STORE_VECTOR, "> 0"}, 480 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 481 applyIfPlatform = {"64-bit", "true"}, 482 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 483 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 484 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 485 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 486 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 487 IRNode.STORE_VECTOR, "> 0"}, 488 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 489 applyIfPlatform = {"64-bit", "true"}, 490 applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"}) 491 static Object[] test4(byte[] a, byte[] b, byte mask) { 492 for (int i = 0; i < RANGE/16; i++) { 493 // Problematic for AlignVector 494 b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned 495 b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask); 496 b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask); 497 b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask); 498 499 b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned 500 b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask); 501 b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask); 502 b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask); 503 b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask); 504 b[i*16 + 10] = (byte)(a[i*16 + 10] & mask); 505 b[i*16 + 11] = (byte)(a[i*16 + 11] & mask); 506 b[i*16 + 12] = (byte)(a[i*16 + 12] & mask); 507 } 508 return new Object[]{ a, b }; 509 } 510 511 @Test 512 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 513 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 514 IRNode.STORE_VECTOR, "> 0"}, 515 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 516 applyIfPlatform = {"64-bit", "true"}, 517 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 518 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 519 IRNode.AND_VB, "= 0", 520 IRNode.STORE_VECTOR, "= 0"}, 521 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 522 applyIfPlatform = {"64-bit", "true"}, 523 applyIf = {"AlignVector", "true"}) 524 static Object[] test5(byte[] a, byte[] b, byte mask, int inv) { 525 for (int i = 0; i < RANGE; i+=8) { 526 // Cannot align with AlignVector because of invariant 527 b[i+inv+0] = (byte)(a[i+inv+0] & mask); 528 529 b[i+inv+3] = (byte)(a[i+inv+3] & mask); 530 b[i+inv+4] = (byte)(a[i+inv+4] & mask); 531 b[i+inv+5] = (byte)(a[i+inv+5] & mask); 532 b[i+inv+6] = (byte)(a[i+inv+6] & mask); 533 } 534 return new Object[]{ a, b }; 535 } 536 537 @Test 538 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 539 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 540 IRNode.STORE_VECTOR, "> 0"}, 541 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 542 applyIfPlatform = {"64-bit", "true"}, 543 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 544 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 545 IRNode.AND_VB, "= 0", 546 IRNode.STORE_VECTOR, "= 0"}, 547 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 548 applyIfPlatform = {"64-bit", "true"}, 549 applyIf = {"AlignVector", "true"}) 550 static Object[] test6(byte[] a, byte[] b, byte mask) { 551 for (int i = 0; i < RANGE/8; i+=2) { 552 // Cannot align with AlignVector because offset is odd 553 b[i*4+0] = (byte)(a[i*4+0] & mask); 554 555 b[i*4+3] = (byte)(a[i*4+3] & mask); 556 b[i*4+4] = (byte)(a[i*4+4] & mask); 557 b[i*4+5] = (byte)(a[i*4+5] & mask); 558 b[i*4+6] = (byte)(a[i*4+6] & mask); 559 } 560 return new Object[]{ a, b }; 561 } 562 563 @Test 564 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 565 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 566 IRNode.STORE_VECTOR, "> 0"}, 567 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}, 568 applyIfPlatform = {"64-bit", "true"}, 569 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 570 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 571 IRNode.AND_VS, "= 0", 572 IRNode.STORE_VECTOR, "= 0"}, 573 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 574 applyIfPlatform = {"64-bit", "true"}, 575 applyIf = {"AlignVector", "true"}) 576 static Object[] test7(short[] a, short[] b, short mask) { 577 for (int i = 0; i < RANGE/8; i+=2) { 578 // Cannot align with AlignVector because offset is odd 579 b[i*4+0] = (short)(a[i*4+0] & mask); 580 581 b[i*4+3] = (short)(a[i*4+3] & mask); 582 b[i*4+4] = (short)(a[i*4+4] & mask); 583 b[i*4+5] = (short)(a[i*4+5] & mask); 584 b[i*4+6] = (short)(a[i*4+6] & mask); 585 } 586 return new Object[]{ a, b }; 587 } 588 589 @Test 590 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 591 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 592 IRNode.STORE_VECTOR, "> 0"}, 593 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 594 applyIfPlatform = {"64-bit", "true"}, 595 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 596 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 597 IRNode.AND_VB, "= 0", 598 IRNode.STORE_VECTOR, "= 0"}, 599 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 600 applyIfPlatform = {"64-bit", "true"}, 601 applyIf = {"AlignVector", "true"}) 602 static Object[] test8(byte[] a, byte[] b, byte mask, int init) { 603 for (int i = init; i < RANGE; i+=8) { 604 // Cannot align with AlignVector because of invariant (variable init becomes invar) 605 b[i+0] = (byte)(a[i+0] & mask); 606 607 b[i+3] = (byte)(a[i+3] & mask); 608 b[i+4] = (byte)(a[i+4] & mask); 609 b[i+5] = (byte)(a[i+5] & mask); 610 b[i+6] = (byte)(a[i+6] & mask); 611 } 612 return new Object[]{ a, b }; 613 } 614 615 @Test 616 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 617 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 618 IRNode.STORE_VECTOR, "> 0"}, 619 applyIf = {"MaxVectorSize", ">=8"}, 620 applyIfPlatform = {"64-bit", "true"}, 621 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 622 static Object[] test9(byte[] a, byte[] b, byte mask) { 623 // known non-zero init value does not affect offset, but has implicit effect on iv 624 for (int i = 13; i < RANGE-8; i+=8) { 625 b[i+0] = (byte)(a[i+0] & mask); 626 627 b[i+3] = (byte)(a[i+3] & mask); 628 b[i+4] = (byte)(a[i+4] & mask); 629 b[i+5] = (byte)(a[i+5] & mask); 630 b[i+6] = (byte)(a[i+6] & mask); 631 } 632 return new Object[]{ a, b }; 633 } 634 635 @Test 636 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 637 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 638 IRNode.STORE_VECTOR, "> 0"}, 639 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 640 applyIfPlatform = {"64-bit", "true"}, 641 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 642 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 643 IRNode.AND_VB, "= 0", 644 IRNode.STORE_VECTOR, "= 0"}, 645 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 646 applyIfPlatform = {"64-bit", "true"}, 647 applyIf = {"AlignVector", "true"}) 648 static Object[] test10a(byte[] a, byte[] b, byte mask) { 649 // This is not alignable with pre-loop, because of odd init. 650 for (int i = 3; i < RANGE-8; i+=8) { 651 b[i+0] = (byte)(a[i+0] & mask); 652 b[i+1] = (byte)(a[i+1] & mask); 653 b[i+2] = (byte)(a[i+2] & mask); 654 b[i+3] = (byte)(a[i+3] & mask); 655 } 656 return new Object[]{ a, b }; 657 } 658 659 @Test 660 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 661 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 662 IRNode.STORE_VECTOR, "> 0"}, 663 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 664 applyIfPlatform = {"64-bit", "true"}, 665 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 666 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 667 IRNode.AND_VB, "= 0", 668 IRNode.STORE_VECTOR, "= 0"}, 669 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 670 applyIfPlatform = {"64-bit", "true"}, 671 applyIf = {"AlignVector", "true"}) 672 static Object[] test10b(byte[] a, byte[] b, byte mask) { 673 // This is not alignable with pre-loop, because of odd init. 674 // Seems not correctly handled. 675 for (int i = 13; i < RANGE-8; i+=8) { 676 b[i+0] = (byte)(a[i+0] & mask); 677 b[i+1] = (byte)(a[i+1] & mask); 678 b[i+2] = (byte)(a[i+2] & mask); 679 b[i+3] = (byte)(a[i+3] & mask); 680 } 681 return new Object[]{ a, b }; 682 } 683 684 @Test 685 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 686 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 687 IRNode.STORE_VECTOR, "> 0"}, 688 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 689 applyIfPlatform = {"64-bit", "true"}, 690 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 691 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 692 IRNode.AND_VS, "= 0", 693 IRNode.STORE_VECTOR, "= 0"}, 694 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 695 applyIfPlatform = {"64-bit", "true"}, 696 applyIf = {"AlignVector", "true"}) 697 static Object[] test10c(short[] a, short[] b, short mask) { 698 // This is not alignable with pre-loop, because of odd init. 699 // Seems not correctly handled with MaxVectorSize >= 32. 700 for (int i = 13; i < RANGE-8; i+=8) { 701 b[i+0] = (short)(a[i+0] & mask); 702 b[i+1] = (short)(a[i+1] & mask); 703 b[i+2] = (short)(a[i+2] & mask); 704 b[i+3] = (short)(a[i+3] & mask); 705 } 706 return new Object[]{ a, b }; 707 } 708 709 @Test 710 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 711 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 712 IRNode.STORE_VECTOR, "> 0"}, 713 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "false"}, 714 applyIfPlatform = {"64-bit", "true"}, 715 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 716 static Object[] test10d(short[] a, short[] b, short mask) { 717 for (int i = 13; i < RANGE-16; i+=8) { 718 // init + offset -> aligned 719 b[i+0+3] = (short)(a[i+0+3] & mask); 720 b[i+1+3] = (short)(a[i+1+3] & mask); 721 b[i+2+3] = (short)(a[i+2+3] & mask); 722 b[i+3+3] = (short)(a[i+3+3] & mask); 723 } 724 return new Object[]{ a, b }; 725 } 726 727 @Test 728 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 729 IRNode.AND_VB, "> 0", 730 IRNode.STORE_VECTOR, "> 0"}, 731 applyIfPlatform = {"64-bit", "true"}, 732 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 733 static Object[] test11aB(byte[] a, byte[] b, byte mask) { 734 for (int i = 0; i < RANGE; i++) { 735 // always alignable 736 b[i+0] = (byte)(a[i+0] & mask); 737 } 738 return new Object[]{ a, b }; 739 } 740 741 @Test 742 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 743 IRNode.AND_VS, "> 0", 744 IRNode.STORE_VECTOR, "> 0"}, 745 applyIfPlatform = {"64-bit", "true"}, 746 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 747 static Object[] test11aS(short[] a, short[] b, short mask) { 748 for (int i = 0; i < RANGE; i++) { 749 // always alignable 750 b[i+0] = (short)(a[i+0] & mask); 751 } 752 return new Object[]{ a, b }; 753 } 754 755 @Test 756 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 757 IRNode.AND_VI, "> 0", 758 IRNode.STORE_VECTOR, "> 0"}, 759 applyIfPlatform = {"64-bit", "true"}, 760 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 761 static Object[] test11aI(int[] a, int[] b, int mask) { 762 for (int i = 0; i < RANGE; i++) { 763 // always alignable 764 b[i+0] = (int)(a[i+0] & mask); 765 } 766 return new Object[]{ a, b }; 767 } 768 769 @Test 770 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 771 IRNode.AND_VL, "> 0", 772 IRNode.STORE_VECTOR, "> 0"}, 773 applyIfPlatform = {"64-bit", "true"}, 774 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 775 static Object[] test11aL(long[] a, long[] b, long mask) { 776 for (int i = 0; i < RANGE; i++) { 777 // always alignable 778 b[i+0] = (long)(a[i+0] & mask); 779 } 780 return new Object[]{ a, b }; 781 } 782 783 @Test 784 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 785 IRNode.AND_VB, "> 0", 786 IRNode.STORE_VECTOR, "> 0"}, 787 applyIfPlatform = {"64-bit", "true"}, 788 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 789 static Object[] test11bB(byte[] a, byte[] b, byte mask) { 790 for (int i = 1; i < RANGE; i++) { 791 // always alignable 792 b[i+0] = (byte)(a[i+0] & mask); 793 } 794 return new Object[]{ a, b }; 795 } 796 797 @Test 798 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 799 IRNode.AND_VS, "> 0", 800 IRNode.STORE_VECTOR, "> 0"}, 801 applyIfPlatform = {"64-bit", "true"}, 802 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 803 static Object[] test11bS(short[] a, short[] b, short mask) { 804 for (int i = 1; i < RANGE; i++) { 805 // always alignable 806 b[i+0] = (short)(a[i+0] & mask); 807 } 808 return new Object[]{ a, b }; 809 } 810 811 @Test 812 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 813 IRNode.AND_VI, "> 0", 814 IRNode.STORE_VECTOR, "> 0"}, 815 applyIfPlatform = {"64-bit", "true"}, 816 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 817 static Object[] test11bI(int[] a, int[] b, int mask) { 818 for (int i = 1; i < RANGE; i++) { 819 // always alignable 820 b[i+0] = (int)(a[i+0] & mask); 821 } 822 return new Object[]{ a, b }; 823 } 824 825 @Test 826 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 827 IRNode.AND_VL, "> 0", 828 IRNode.STORE_VECTOR, "> 0"}, 829 applyIfPlatform = {"64-bit", "true"}, 830 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 831 static Object[] test11bL(long[] a, long[] b, long mask) { 832 for (int i = 1; i < RANGE; i++) { 833 // always alignable 834 b[i+0] = (long)(a[i+0] & mask); 835 } 836 return new Object[]{ a, b }; 837 } 838 839 @Test 840 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 841 IRNode.AND_VB, "> 0", 842 IRNode.STORE_VECTOR, "> 0"}, 843 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 844 applyIfPlatform = {"64-bit", "true"}, 845 applyIf = {"AlignVector", "false"}) 846 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 847 IRNode.AND_VB, "= 0", 848 IRNode.STORE_VECTOR, "= 0"}, 849 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 850 applyIfPlatform = {"64-bit", "true"}, 851 applyIf = {"AlignVector", "true"}) 852 static Object[] test11cB(byte[] a, byte[] b, byte mask) { 853 for (int i = 1; i < RANGE-1; i++) { 854 // 1 byte offset -> not alignable with AlignVector 855 b[i+0] = (byte)(a[i+1] & mask); 856 } 857 return new Object[]{ a, b }; 858 } 859 860 @Test 861 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 862 IRNode.AND_VS, "> 0", 863 IRNode.STORE_VECTOR, "> 0"}, 864 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 865 applyIfPlatform = {"64-bit", "true"}, 866 applyIf = {"AlignVector", "false"}) 867 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 868 IRNode.AND_VS, "= 0", 869 IRNode.STORE_VECTOR, "= 0"}, 870 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 871 applyIfPlatform = {"64-bit", "true"}, 872 applyIf = {"AlignVector", "true"}) 873 static Object[] test11cS(short[] a, short[] b, short mask) { 874 for (int i = 1; i < RANGE-1; i++) { 875 // 2 byte offset -> not alignable with AlignVector 876 b[i+0] = (short)(a[i+1] & mask); 877 } 878 return new Object[]{ a, b }; 879 } 880 881 @Test 882 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 883 IRNode.AND_VI, "> 0", 884 IRNode.STORE_VECTOR, "> 0"}, 885 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 886 applyIfPlatform = {"64-bit", "true"}, 887 applyIf = {"AlignVector", "false"}) 888 @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", 889 IRNode.AND_VI, "= 0", 890 IRNode.STORE_VECTOR, "= 0"}, 891 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 892 applyIfPlatform = {"64-bit", "true"}, 893 applyIf = {"AlignVector", "true"}) 894 static Object[] test11cI(int[] a, int[] b, int mask) { 895 for (int i = 1; i < RANGE-1; i++) { 896 // 4 byte offset -> not alignable with AlignVector 897 b[i+0] = (int)(a[i+1] & mask); 898 } 899 return new Object[]{ a, b }; 900 } 901 902 @Test 903 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 904 IRNode.AND_VL, "> 0", 905 IRNode.STORE_VECTOR, "> 0"}, 906 applyIfPlatform = {"64-bit", "true"}, 907 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 908 static Object[] test11cL(long[] a, long[] b, long mask) { 909 for (int i = 1; i < RANGE-1; i++) { 910 // always alignable (8 byte offset) 911 b[i+0] = (long)(a[i+1] & mask); 912 } 913 return new Object[]{ a, b }; 914 } 915 916 @Test 917 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 918 IRNode.AND_VB, "> 0", 919 IRNode.STORE_VECTOR, "> 0"}, 920 applyIfPlatform = {"64-bit", "true"}, 921 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 922 static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) { 923 for (int i = 0; i < RANGE; i++) { 924 b[i+0+invar] = (byte)(a[i+0+invar] & mask); 925 } 926 return new Object[]{ a, b }; 927 } 928 929 @Test 930 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 931 IRNode.AND_VS, "> 0", 932 IRNode.STORE_VECTOR, "> 0"}, 933 applyIfPlatform = {"64-bit", "true"}, 934 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 935 static Object[] test11dS(short[] a, short[] b, short mask, int invar) { 936 for (int i = 0; i < RANGE; i++) { 937 b[i+0+invar] = (short)(a[i+0+invar] & mask); 938 } 939 return new Object[]{ a, b }; 940 } 941 942 @Test 943 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 944 IRNode.AND_VI, "> 0", 945 IRNode.STORE_VECTOR, "> 0"}, 946 applyIfPlatform = {"64-bit", "true"}, 947 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 948 static Object[] test11dI(int[] a, int[] b, int mask, int invar) { 949 for (int i = 0; i < RANGE; i++) { 950 b[i+0+invar] = (int)(a[i+0+invar] & mask); 951 } 952 return new Object[]{ a, b }; 953 } 954 955 @Test 956 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 957 IRNode.AND_VL, "> 0", 958 IRNode.STORE_VECTOR, "> 0"}, 959 applyIfPlatform = {"64-bit", "true"}, 960 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 961 static Object[] test11dL(long[] a, long[] b, long mask, int invar) { 962 for (int i = 0; i < RANGE; i++) { 963 b[i+0+invar] = (long)(a[i+0+invar] & mask); 964 } 965 return new Object[]{ a, b }; 966 } 967 968 @Test 969 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 970 IRNode.AND_VB, "= 0", 971 IRNode.STORE_VECTOR, "= 0"}, 972 applyIfPlatform = {"64-bit", "true"}, 973 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 974 static Object[] test12(byte[] a, byte[] b, byte mask) { 975 for (int i = 0; i < RANGE/16; i++) { 976 // Currently does not vectorize at all 977 b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask); 978 b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask); 979 b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask); 980 b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask); 981 } 982 return new Object[]{ a, b }; 983 } 984 985 @Test 986 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 987 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 988 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 989 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 990 IRNode.STORE_VECTOR, "> 0"}, 991 applyIfPlatform = {"64-bit", "true"}, 992 applyIfCPUFeatureOr = {"avx2", "true"}) 993 // require avx to ensure vectors are larger than what unrolling produces 994 static Object[] test13aIL(int[] a, long[] b) { 995 for (int i = 0; i < RANGE; i++) { 996 a[i]++; 997 b[i]++; 998 } 999 return new Object[]{ a, b }; 1000 } 1001 1002 @Test 1003 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1004 IRNode.LOAD_VECTOR_I, "> 0", 1005 IRNode.ADD_VB, "> 0", 1006 IRNode.ADD_VI, "> 0", 1007 IRNode.STORE_VECTOR, "> 0"}, 1008 applyIf = {"UseCompactObjectHeaders", "false"}, 1009 applyIfPlatform = {"64-bit", "true"}, 1010 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1011 static Object[] test13aIB(int[] a, byte[] b) { 1012 for (int i = 0; i < RANGE; i++) { 1013 a[i]++; 1014 b[i]++; 1015 } 1016 return new Object[]{ a, b }; 1017 } 1018 1019 @Test 1020 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1021 IRNode.LOAD_VECTOR_S, "> 0", 1022 IRNode.ADD_VI, "> 0", 1023 IRNode.ADD_VS, "> 0", 1024 IRNode.STORE_VECTOR, "> 0"}, 1025 applyIf = {"UseCompactObjectHeaders", "false"}, 1026 applyIfPlatform = {"64-bit", "true"}, 1027 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1028 static Object[] test13aIS(int[] a, short[] b) { 1029 for (int i = 0; i < RANGE; i++) { 1030 a[i]++; 1031 b[i]++; 1032 } 1033 return new Object[]{ a, b }; 1034 } 1035 1036 @Test 1037 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1038 IRNode.LOAD_VECTOR_S, "> 0", 1039 IRNode.LOAD_VECTOR_I, "> 0", 1040 IRNode.LOAD_VECTOR_L, "> 0", 1041 IRNode.ADD_VB, "> 0", 1042 IRNode.ADD_VS, "> 0", 1043 IRNode.ADD_VI, "> 0", 1044 IRNode.ADD_VL, "> 0", 1045 IRNode.STORE_VECTOR, "> 0"}, 1046 applyIf = {"UseCompactObjectHeaders", "false"}, 1047 applyIfPlatform = {"64-bit", "true"}, 1048 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1049 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) { 1050 for (int i = 0; i < RANGE; i++) { 1051 a[i]++; 1052 b[i]++; 1053 c[i]++; 1054 d[i]++; 1055 } 1056 return new Object[]{ a, b, c, d }; 1057 } 1058 1059 @Test 1060 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1061 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1062 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1063 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1064 IRNode.STORE_VECTOR, "> 0"}, 1065 applyIfPlatform = {"64-bit", "true"}, 1066 applyIfCPUFeatureOr = {"avx2", "true"}) 1067 // require avx to ensure vectors are larger than what unrolling produces 1068 static Object[] test13bIL(int[] a, long[] b) { 1069 for (int i = 1; i < RANGE; i++) { 1070 a[i]++; 1071 b[i]++; 1072 } 1073 return new Object[]{ a, b }; 1074 } 1075 1076 @Test 1077 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1078 IRNode.LOAD_VECTOR_I, "> 0", 1079 IRNode.ADD_VB, "> 0", 1080 IRNode.ADD_VI, "> 0", 1081 IRNode.STORE_VECTOR, "> 0"}, 1082 applyIf = {"UseCompactObjectHeaders", "false"}, 1083 applyIfPlatform = {"64-bit", "true"}, 1084 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1085 static Object[] test13bIB(int[] a, byte[] b) { 1086 for (int i = 1; i < RANGE; i++) { 1087 a[i]++; 1088 b[i]++; 1089 } 1090 return new Object[]{ a, b }; 1091 } 1092 1093 @Test 1094 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1095 IRNode.LOAD_VECTOR_S, "> 0", 1096 IRNode.ADD_VI, "> 0", 1097 IRNode.ADD_VS, "> 0", 1098 IRNode.STORE_VECTOR, "> 0"}, 1099 applyIf = {"UseCompactObjectHeaders", "false"}, 1100 applyIfPlatform = {"64-bit", "true"}, 1101 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1102 static Object[] test13bIS(int[] a, short[] b) { 1103 for (int i = 1; i < RANGE; i++) { 1104 a[i]++; 1105 b[i]++; 1106 } 1107 return new Object[]{ a, b }; 1108 } 1109 1110 @Test 1111 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1112 IRNode.LOAD_VECTOR_S, "> 0", 1113 IRNode.LOAD_VECTOR_I, "> 0", 1114 IRNode.LOAD_VECTOR_L, "> 0", 1115 IRNode.ADD_VB, "> 0", 1116 IRNode.ADD_VS, "> 0", 1117 IRNode.ADD_VI, "> 0", 1118 IRNode.ADD_VL, "> 0", 1119 IRNode.STORE_VECTOR, "> 0"}, 1120 applyIf = {"UseCompactObjectHeaders", "false"}, 1121 applyIfPlatform = {"64-bit", "true"}, 1122 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1123 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) { 1124 for (int i = 1; i < RANGE; i++) { 1125 a[i]++; 1126 b[i]++; 1127 c[i]++; 1128 d[i]++; 1129 } 1130 return new Object[]{ a, b, c, d }; 1131 } 1132 1133 @Test 1134 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1135 IRNode.ADD_VB, "> 0", 1136 IRNode.STORE_VECTOR, "> 0"}, 1137 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1138 applyIfPlatform = {"64-bit", "true"}, 1139 applyIf = {"AlignVector", "false"}) 1140 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1141 IRNode.ADD_VB, "= 0", 1142 IRNode.STORE_VECTOR, "= 0"}, 1143 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1144 applyIfPlatform = {"64-bit", "true"}, 1145 applyIf = {"AlignVector", "true"}) 1146 static Object[] test14aB(byte[] a) { 1147 // non-power-of-2 stride 1148 for (int i = 0; i < RANGE-20; i+=9) { 1149 a[i+0]++; 1150 a[i+1]++; 1151 a[i+2]++; 1152 a[i+3]++; 1153 a[i+4]++; 1154 a[i+5]++; 1155 a[i+6]++; 1156 a[i+7]++; 1157 a[i+8]++; 1158 a[i+9]++; 1159 a[i+10]++; 1160 a[i+11]++; 1161 a[i+12]++; 1162 a[i+13]++; 1163 a[i+14]++; 1164 a[i+15]++; 1165 } 1166 return new Object[]{ a }; 1167 } 1168 1169 @Test 1170 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1171 IRNode.ADD_VB, "> 0", 1172 IRNode.STORE_VECTOR, "> 0"}, 1173 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1174 applyIfPlatform = {"64-bit", "true"}, 1175 applyIf = {"AlignVector", "false"}) 1176 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1177 IRNode.ADD_VB, "= 0", 1178 IRNode.STORE_VECTOR, "= 0"}, 1179 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1180 applyIfPlatform = {"64-bit", "true"}, 1181 applyIf = {"AlignVector", "true"}) 1182 static Object[] test14bB(byte[] a) { 1183 // non-power-of-2 stride 1184 for (int i = 0; i < RANGE-20; i+=3) { 1185 a[i+0]++; 1186 a[i+1]++; 1187 a[i+2]++; 1188 a[i+3]++; 1189 a[i+4]++; 1190 a[i+5]++; 1191 a[i+6]++; 1192 a[i+7]++; 1193 a[i+8]++; 1194 a[i+9]++; 1195 a[i+10]++; 1196 a[i+11]++; 1197 a[i+12]++; 1198 a[i+13]++; 1199 a[i+14]++; 1200 a[i+15]++; 1201 } 1202 return new Object[]{ a }; 1203 } 1204 1205 @Test 1206 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1207 IRNode.ADD_VB, "> 0", 1208 IRNode.STORE_VECTOR, "> 0"}, 1209 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1210 applyIfPlatform = {"64-bit", "true"}, 1211 applyIf = {"AlignVector", "false"}) 1212 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1213 IRNode.ADD_VB, "= 0", 1214 IRNode.STORE_VECTOR, "= 0"}, 1215 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1216 applyIfPlatform = {"64-bit", "true"}, 1217 applyIf = {"AlignVector", "true"}) 1218 static Object[] test14cB(byte[] a) { 1219 // non-power-of-2 stride 1220 for (int i = 0; i < RANGE-20; i+=5) { 1221 a[i+0]++; 1222 a[i+1]++; 1223 a[i+2]++; 1224 a[i+3]++; 1225 a[i+4]++; 1226 a[i+5]++; 1227 a[i+6]++; 1228 a[i+7]++; 1229 a[i+8]++; 1230 a[i+9]++; 1231 a[i+10]++; 1232 a[i+11]++; 1233 a[i+12]++; 1234 a[i+13]++; 1235 a[i+14]++; 1236 a[i+15]++; 1237 } 1238 return new Object[]{ a }; 1239 } 1240 1241 @Test 1242 // IR rules difficult because of modulo wrapping with offset after peeling. 1243 static Object[] test15aB(byte[] a) { 1244 // non-power-of-2 scale 1245 for (int i = 0; i < RANGE/64-20; i++) { 1246 a[53*i+0]++; 1247 a[53*i+1]++; 1248 a[53*i+2]++; 1249 a[53*i+3]++; 1250 a[53*i+4]++; 1251 a[53*i+5]++; 1252 a[53*i+6]++; 1253 a[53*i+7]++; 1254 a[53*i+8]++; 1255 a[53*i+9]++; 1256 a[53*i+10]++; 1257 a[53*i+11]++; 1258 a[53*i+12]++; 1259 a[53*i+13]++; 1260 a[53*i+14]++; 1261 a[53*i+15]++; 1262 } 1263 return new Object[]{ a }; 1264 } 1265 1266 @Test 1267 // IR rules difficult because of modulo wrapping with offset after peeling. 1268 static Object[] test15bB(byte[] a) { 1269 // non-power-of-2 scale 1270 for (int i = 0; i < RANGE/64-20; i++) { 1271 a[25*i+0]++; 1272 a[25*i+1]++; 1273 a[25*i+2]++; 1274 a[25*i+3]++; 1275 a[25*i+4]++; 1276 a[25*i+5]++; 1277 a[25*i+6]++; 1278 a[25*i+7]++; 1279 a[25*i+8]++; 1280 a[25*i+9]++; 1281 a[25*i+10]++; 1282 a[25*i+11]++; 1283 a[25*i+12]++; 1284 a[25*i+13]++; 1285 a[25*i+14]++; 1286 a[25*i+15]++; 1287 } 1288 return new Object[]{ a }; 1289 } 1290 1291 @Test 1292 // IR rules difficult because of modulo wrapping with offset after peeling. 1293 static Object[] test15cB(byte[] a) { 1294 // non-power-of-2 scale 1295 for (int i = 0; i < RANGE/64-20; i++) { 1296 a[19*i+0]++; 1297 a[19*i+1]++; 1298 a[19*i+2]++; 1299 a[19*i+3]++; 1300 a[19*i+4]++; 1301 a[19*i+5]++; 1302 a[19*i+6]++; 1303 a[19*i+7]++; 1304 a[19*i+8]++; 1305 a[19*i+9]++; 1306 a[19*i+10]++; 1307 a[19*i+11]++; 1308 a[19*i+12]++; 1309 a[19*i+13]++; 1310 a[19*i+14]++; 1311 a[19*i+15]++; 1312 } 1313 return new Object[]{ a }; 1314 } 1315 1316 @Test 1317 static Object[] test16a(byte[] a, short[] b) { 1318 // infinite loop issues 1319 for (int i = 0; i < RANGE/2-20; i++) { 1320 a[2*i+0]++; 1321 a[2*i+1]++; 1322 a[2*i+2]++; 1323 a[2*i+3]++; 1324 a[2*i+4]++; 1325 a[2*i+5]++; 1326 a[2*i+6]++; 1327 a[2*i+7]++; 1328 a[2*i+8]++; 1329 a[2*i+9]++; 1330 a[2*i+10]++; 1331 a[2*i+11]++; 1332 a[2*i+12]++; 1333 a[2*i+13]++; 1334 a[2*i+14]++; 1335 1336 b[2*i+0]++; 1337 b[2*i+1]++; 1338 b[2*i+2]++; 1339 b[2*i+3]++; 1340 } 1341 return new Object[]{ a, b }; 1342 } 1343 1344 @Test 1345 static Object[] test16b(byte[] a) { 1346 // infinite loop issues 1347 for (int i = 0; i < RANGE/2-20; i++) { 1348 a[2*i+0]++; 1349 a[2*i+1]++; 1350 a[2*i+2]++; 1351 a[2*i+3]++; 1352 a[2*i+4]++; 1353 a[2*i+5]++; 1354 a[2*i+6]++; 1355 a[2*i+7]++; 1356 a[2*i+8]++; 1357 a[2*i+9]++; 1358 a[2*i+10]++; 1359 a[2*i+11]++; 1360 a[2*i+12]++; 1361 a[2*i+13]++; 1362 a[2*i+14]++; 1363 } 1364 return new Object[]{ a }; 1365 } 1366 1367 @Test 1368 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 1369 IRNode.ADD_VL, "> 0", 1370 IRNode.STORE_VECTOR, "> 0"}, 1371 applyIfPlatform = {"64-bit", "true"}, 1372 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1373 static Object[] test17a(long[] a) { 1374 // Unsafe: vectorizes with profiling (not xcomp) 1375 for (int i = 0; i < RANGE; i++) { 1376 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i; 1377 long v = UNSAFE.getLongUnaligned(a, adr); 1378 UNSAFE.putLongUnaligned(a, adr, v + 1); 1379 } 1380 return new Object[]{ a }; 1381 } 1382 1383 @Test 1384 // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs. 1385 static Object[] test17b(long[] a) { 1386 // Not alignable 1387 for (int i = 0; i < RANGE-1; i++) { 1388 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1; 1389 long v = UNSAFE.getLongUnaligned(a, adr); 1390 UNSAFE.putLongUnaligned(a, adr, v + 1); 1391 } 1392 return new Object[]{ a }; 1393 } 1394 1395 @Test 1396 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1397 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1398 IRNode.STORE_VECTOR, "> 0"}, 1399 applyIf = {"MaxVectorSize", ">=32"}, 1400 applyIfPlatform = {"64-bit", "true"}, 1401 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1402 static Object[] test17c(long[] a) { 1403 // Unsafe: aligned vectorizes 1404 for (int i = 0; i < RANGE-1; i+=4) { 1405 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i; 1406 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1407 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1408 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1409 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1410 } 1411 return new Object[]{ a }; 1412 } 1413 1414 @Test 1415 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1416 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1417 IRNode.STORE_VECTOR, "> 0"}, 1418 applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true"}, 1419 applyIfPlatform = {"64-bit", "true"}, 1420 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"}) 1421 // Ensure vector width is large enough to fit 64 byte for longs: 1422 // The offsets are: 25, 33, 57, 65 1423 // In modulo 32: 25, 1, 25, 1 -> does not vectorize 1424 // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes 1425 // This problem is because we compute modulo vector width in memory_alignment. 1426 @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0", 1427 IRNode.ADD_VL, "= 0", 1428 IRNode.STORE_VECTOR, "= 0"}, 1429 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1430 applyIfPlatform = {"64-bit", "true"}, 1431 applyIf = {"AlignVector", "true"}) 1432 static Object[] test17d(long[] a) { 1433 // Not alignable 1434 for (int i = 0; i < RANGE-1; i+=4) { 1435 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1; 1436 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1437 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1438 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1439 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1440 } 1441 return new Object[]{ a }; 1442 } 1443 1444 @Test 1445 static Object[] test18a(byte[] a, int[] b) { 1446 // scale = 0 --> no iv 1447 for (int i = 0; i < RANGE; i++) { 1448 a[0] = 1; 1449 b[i] = 2; 1450 a[1] = 1; 1451 } 1452 return new Object[]{ a, b }; 1453 } 1454 1455 @Test 1456 static Object[] test18b(byte[] a, int[] b) { 1457 // scale = 0 --> no iv 1458 for (int i = 0; i < RANGE; i++) { 1459 a[1] = 1; 1460 b[i] = 2; 1461 a[2] = 1; 1462 } 1463 return new Object[]{ a, b }; 1464 } 1465 1466 @Test 1467 static Object[] test19(int[] a, int[] b) { 1468 for (int i = 5000; i > 0; i--) { 1469 a[RANGE_FINAL - i] = b[RANGE_FINAL - i]; 1470 } 1471 return new Object[]{ a, b }; 1472 } 1473 1474 @Test 1475 static Object[] test20(byte[] a) { 1476 // Example where it is easy to pass alignment check, 1477 // but used to fail the alignment calculation 1478 for (int i = 1; i < RANGE/2-50; i++) { 1479 a[2*i+0+30]++; 1480 a[2*i+1+30]++; 1481 a[2*i+2+30]++; 1482 a[2*i+3+30]++; 1483 } 1484 return new Object[]{ a }; 1485 } 1486 }