1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package compiler.loopopts.superword; 25 26 import compiler.lib.ir_framework.*; 27 import jdk.test.lib.Utils; 28 import jdk.test.whitebox.WhiteBox; 29 import jdk.internal.misc.Unsafe; 30 import java.lang.reflect.Array; 31 import java.util.Map; 32 import java.util.HashMap; 33 import java.util.Random; 34 import java.nio.ByteOrder; 35 36 /* 37 * @test id=NoAlignVector 38 * @bug 8310190 39 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 40 * @modules java.base/jdk.internal.misc 41 * @library /test/lib / 42 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector 43 */ 44 45 /* 46 * @test id=AlignVector 47 * @bug 8310190 48 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 49 * @modules java.base/jdk.internal.misc 50 * @library /test/lib / 51 * @run driver compiler.loopopts.superword.TestAlignVector AlignVector 52 */ 53 54 /* 55 * @test id=VerifyAlignVector 56 * @bug 8310190 57 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 58 * @modules java.base/jdk.internal.misc 59 * @library /test/lib / 60 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector 61 */ 62 63 public class TestAlignVector { 64 static int RANGE = 1024*8; 65 static int RANGE_FINAL = 1024*8; 66 private static final Unsafe UNSAFE = Unsafe.getUnsafe(); 67 private static final Random RANDOM = Utils.getRandomInstance(); 68 69 // Inputs 70 byte[] aB; 71 byte[] bB; 72 byte mB = (byte)31; 73 short[] aS; 74 short[] bS; 75 short mS = (short)0xF0F0; 76 int[] aI; 77 int[] bI; 78 int mI = 0xF0F0F0F0; 79 long[] aL; 80 long[] bL; 81 long mL = 0xF0F0F0F0F0F0F0F0L; 82 83 // List of tests 84 Map<String,TestFunction> tests = new HashMap<String,TestFunction>(); 85 86 // List of gold, the results from the first run before compilation 87 Map<String,Object[]> golds = new HashMap<String,Object[]>(); 88 89 interface TestFunction { 90 Object[] run(); 91 } 92 93 public static void main(String[] args) { 94 TestFramework framework = new TestFramework(TestAlignVector.class); 95 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 96 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250"); 97 98 switch (args[0]) { 99 case "NoAlignVector" -> { framework.addFlags("-XX:-AlignVector"); } 100 case "AlignVector" -> { framework.addFlags("-XX:+AlignVector"); } 101 case "VerifyAlignVector" -> { framework.addFlags("-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } 102 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } 103 } 104 framework.start(); 105 } 106 107 public TestAlignVector() { 108 // Generate input once 109 aB = generateB(); 110 bB = generateB(); 111 aS = generateS(); 112 bS = generateS(); 113 aI = generateI(); 114 bI = generateI(); 115 aL = generateL(); 116 bL = generateL(); 117 118 // Add all tests to list 119 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); }); 120 tests.put("test1", () -> { return test1(aB.clone(), bB.clone(), mB); }); 121 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); }); 122 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); }); 123 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); }); 124 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); }); 125 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); }); 126 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); }); 127 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); }); 128 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); }); 129 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); }); 130 131 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); }); 132 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); }); 133 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); }); 134 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); }); 135 136 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); }); 137 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); }); 138 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); }); 139 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); }); 140 141 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); }); 142 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); }); 143 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); }); 144 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); }); 145 146 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); }); 147 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); }); 148 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); }); 149 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); }); 150 151 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); }); 152 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); }); 153 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); }); 154 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); }); 155 156 tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); }); 157 158 tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); }); 159 tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); }); 160 tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); }); 161 tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 162 163 tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); }); 164 tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); }); 165 tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); }); 166 tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 167 168 tests.put("test14aB", () -> { return test14aB(aB.clone()); }); 169 tests.put("test14bB", () -> { return test14bB(aB.clone()); }); 170 tests.put("test14cB", () -> { return test14cB(aB.clone()); }); 171 172 tests.put("test15aB", () -> { return test15aB(aB.clone()); }); 173 tests.put("test15bB", () -> { return test15bB(aB.clone()); }); 174 tests.put("test15cB", () -> { return test15cB(aB.clone()); }); 175 176 tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); }); 177 tests.put("test16b", () -> { return test16b(aB.clone()); }); 178 179 tests.put("test17a", () -> { return test17a(aL.clone()); }); 180 tests.put("test17b", () -> { return test17b(aL.clone()); }); 181 tests.put("test17c", () -> { return test17c(aL.clone()); }); 182 tests.put("test17d", () -> { return test17d(aL.clone()); }); 183 184 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); }); 185 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); }); 186 187 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); }); 188 tests.put("test20", () -> { return test20(aB.clone()); }); 189 190 // Compute gold value for all test methods before compilation 191 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 192 String name = entry.getKey(); 193 TestFunction test = entry.getValue(); 194 Object[] gold = test.run(); 195 golds.put(name, gold); 196 } 197 } 198 199 @Warmup(100) 200 @Run(test = {"test0", 201 "test1", 202 "test2", 203 "test3", 204 "test4", 205 "test5", 206 "test6", 207 "test7", 208 "test8", 209 "test9", 210 "test10a", 211 "test10b", 212 "test10c", 213 "test10d", 214 "test11aB", 215 "test11aS", 216 "test11aI", 217 "test11aL", 218 "test11bB", 219 "test11bS", 220 "test11bI", 221 "test11bL", 222 "test11cB", 223 "test11cS", 224 "test11cI", 225 "test11cL", 226 "test11dB", 227 "test11dS", 228 "test11dI", 229 "test11dL", 230 "test12", 231 "test13aIL", 232 "test13aIB", 233 "test13aIS", 234 "test13aBSIL", 235 "test13bIL", 236 "test13bIB", 237 "test13bIS", 238 "test13bBSIL", 239 "test14aB", 240 "test14bB", 241 "test14cB", 242 "test15aB", 243 "test15bB", 244 "test15cB", 245 "test16a", 246 "test16b", 247 "test17a", 248 "test17b", 249 "test17c", 250 "test17d", 251 "test18a", 252 "test18b", 253 "test19", 254 "test20"}) 255 public void runTests() { 256 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 257 String name = entry.getKey(); 258 TestFunction test = entry.getValue(); 259 // Recall gold value from before compilation 260 Object[] gold = golds.get(name); 261 // Compute new result 262 Object[] result = test.run(); 263 // Compare gold and new result 264 verify(name, gold, result); 265 } 266 } 267 268 static byte[] generateB() { 269 byte[] a = new byte[RANGE]; 270 for (int i = 0; i < a.length; i++) { 271 a[i] = (byte)RANDOM.nextInt(); 272 } 273 return a; 274 } 275 276 static short[] generateS() { 277 short[] a = new short[RANGE]; 278 for (int i = 0; i < a.length; i++) { 279 a[i] = (short)RANDOM.nextInt(); 280 } 281 return a; 282 } 283 284 static int[] generateI() { 285 int[] a = new int[RANGE]; 286 for (int i = 0; i < a.length; i++) { 287 a[i] = RANDOM.nextInt(); 288 } 289 return a; 290 } 291 292 static long[] generateL() { 293 long[] a = new long[RANGE]; 294 for (int i = 0; i < a.length; i++) { 295 a[i] = RANDOM.nextLong(); 296 } 297 return a; 298 } 299 300 static void verify(String name, Object[] gold, Object[] result) { 301 if (gold.length != result.length) { 302 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " + 303 gold.length + ", result.length = " + result.length); 304 } 305 for (int i = 0; i < gold.length; i++) { 306 Object g = gold[i]; 307 Object r = result[i]; 308 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) { 309 throw new RuntimeException("verify " + name + ": must both be array of same type:" + 310 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 311 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 312 } 313 if (g == r) { 314 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" + 315 " gold[" + i + "] == result[" + i + "]"); 316 } 317 if (Array.getLength(g) != Array.getLength(r)) { 318 throw new RuntimeException("verify " + name + ": arrays must have same length:" + 319 " gold[" + i + "].length = " + Array.getLength(g) + 320 " result[" + i + "].length = " + Array.getLength(r)); 321 } 322 Class c = g.getClass().getComponentType(); 323 if (c == byte.class) { 324 verifyB(name, i, (byte[])g, (byte[])r); 325 } else if (c == short.class) { 326 verifyS(name, i, (short[])g, (short[])r); 327 } else if (c == int.class) { 328 verifyI(name, i, (int[])g, (int[])r); 329 } else if (c == long.class) { 330 verifyL(name, i, (long[])g, (long[])r); 331 } else { 332 throw new RuntimeException("verify " + name + ": array type not supported for verify:" + 333 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 334 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 335 } 336 } 337 } 338 339 static void verifyB(String name, int i, byte[] g, byte[] r) { 340 for (int j = 0; j < g.length; j++) { 341 if (g[j] != r[j]) { 342 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 343 " gold[" + i + "][" + j + "] = " + g[j] + 344 " result[" + i + "][" + j + "] = " + r[j]); 345 } 346 } 347 } 348 349 static void verifyS(String name, int i, short[] g, short[] r) { 350 for (int j = 0; j < g.length; j++) { 351 if (g[j] != r[j]) { 352 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 353 " gold[" + i + "][" + j + "] = " + g[j] + 354 " result[" + i + "][" + j + "] = " + r[j]); 355 } 356 } 357 } 358 359 static void verifyI(String name, int i, int[] g, int[] r) { 360 for (int j = 0; j < g.length; j++) { 361 if (g[j] != r[j]) { 362 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 363 " gold[" + i + "][" + j + "] = " + g[j] + 364 " result[" + i + "][" + j + "] = " + r[j]); 365 } 366 } 367 } 368 369 static void verifyL(String name, int i, long[] g, long[] r) { 370 for (int j = 0; j < g.length; j++) { 371 if (g[j] != r[j]) { 372 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 373 " gold[" + i + "][" + j + "] = " + g[j] + 374 " result[" + i + "][" + j + "] = " + r[j]); 375 } 376 } 377 } 378 379 @Test 380 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 381 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 382 IRNode.STORE_VECTOR, "> 0"}, 383 applyIf = {"MaxVectorSize", ">=8"}, 384 applyIfPlatform = {"64-bit", "true"}, 385 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 386 static Object[] test0(byte[] a, byte[] b, byte mask) { 387 for (int i = 0; i < RANGE; i+=8) { 388 // Safe to vectorize with AlignVector 389 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 390 b[i+1] = (byte)(a[i+1] & mask); 391 b[i+2] = (byte)(a[i+2] & mask); 392 b[i+3] = (byte)(a[i+3] & mask); 393 } 394 return new Object[]{ a, b }; 395 } 396 397 @Test 398 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 399 IRNode.AND_VB, "> 0", 400 IRNode.STORE_VECTOR, "> 0"}, 401 applyIf = {"UseCompactObjectHeaders", "false"}, 402 applyIfPlatform = {"64-bit", "true"}, 403 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 404 static Object[] test1(byte[] a, byte[] b, byte mask) { 405 for (int i = 0; i < RANGE; i+=8) { 406 // Safe to vectorize with AlignVector 407 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 408 b[i+1] = (byte)(a[i+1] & mask); 409 b[i+2] = (byte)(a[i+2] & mask); 410 b[i+3] = (byte)(a[i+3] & mask); 411 b[i+4] = (byte)(a[i+4] & mask); 412 b[i+5] = (byte)(a[i+5] & mask); 413 b[i+6] = (byte)(a[i+6] & mask); 414 b[i+7] = (byte)(a[i+7] & mask); 415 } 416 return new Object[]{ a, b }; 417 } 418 419 @Test 420 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 421 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 422 IRNode.STORE_VECTOR, "> 0"}, 423 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 424 applyIfPlatform = {"64-bit", "true"}, 425 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 426 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 427 IRNode.AND_VB, "= 0", 428 IRNode.STORE_VECTOR, "= 0"}, 429 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 430 applyIfPlatform = {"64-bit", "true"}, 431 applyIf = {"AlignVector", "true"}) 432 static Object[] test2(byte[] a, byte[] b, byte mask) { 433 for (int i = 0; i < RANGE; i+=8) { 434 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 435 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3 436 b[i+4] = (byte)(a[i+4] & mask); 437 b[i+5] = (byte)(a[i+5] & mask); 438 b[i+6] = (byte)(a[i+6] & mask); 439 } 440 return new Object[]{ a, b }; 441 } 442 443 @Test 444 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 445 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 446 IRNode.STORE_VECTOR, "> 0"}, 447 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 448 applyIfPlatform = {"64-bit", "true"}, 449 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 450 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 451 IRNode.AND_VB, "= 0", 452 IRNode.STORE_VECTOR, "= 0"}, 453 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 454 applyIfPlatform = {"64-bit", "true"}, 455 applyIf = {"AlignVector", "true"}) 456 static Object[] test3(byte[] a, byte[] b, byte mask) { 457 for (int i = 0; i < RANGE; i+=8) { 458 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 459 460 // Problematic for AlignVector 461 b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0 462 463 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes 464 b[i+4] = (byte)(a[i+4] & mask); 465 b[i+5] = (byte)(a[i+5] & mask); 466 b[i+6] = (byte)(a[i+6] & mask); 467 } 468 return new Object[]{ a, b }; 469 } 470 471 @Test 472 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 473 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0", 474 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 475 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0", 476 IRNode.STORE_VECTOR, "> 0"}, 477 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 478 applyIfPlatform = {"64-bit", "true"}, 479 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 480 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 481 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 482 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 483 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 484 IRNode.STORE_VECTOR, "> 0"}, 485 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 486 applyIfPlatform = {"64-bit", "true"}, 487 applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"}) 488 static Object[] test4(byte[] a, byte[] b, byte mask) { 489 for (int i = 0; i < RANGE/16; i++) { 490 // Problematic for AlignVector 491 b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned 492 b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask); 493 b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask); 494 b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask); 495 496 b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned 497 b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask); 498 b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask); 499 b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask); 500 b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask); 501 b[i*16 + 10] = (byte)(a[i*16 + 10] & mask); 502 b[i*16 + 11] = (byte)(a[i*16 + 11] & mask); 503 b[i*16 + 12] = (byte)(a[i*16 + 12] & mask); 504 } 505 return new Object[]{ a, b }; 506 } 507 508 @Test 509 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 510 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 511 IRNode.STORE_VECTOR, "> 0"}, 512 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 513 applyIfPlatform = {"64-bit", "true"}, 514 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 515 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 516 IRNode.AND_VB, "= 0", 517 IRNode.STORE_VECTOR, "= 0"}, 518 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 519 applyIfPlatform = {"64-bit", "true"}, 520 applyIf = {"AlignVector", "true"}) 521 static Object[] test5(byte[] a, byte[] b, byte mask, int inv) { 522 for (int i = 0; i < RANGE; i+=8) { 523 // Cannot align with AlignVector because of invariant 524 b[i+inv+0] = (byte)(a[i+inv+0] & mask); 525 526 b[i+inv+3] = (byte)(a[i+inv+3] & mask); 527 b[i+inv+4] = (byte)(a[i+inv+4] & mask); 528 b[i+inv+5] = (byte)(a[i+inv+5] & mask); 529 b[i+inv+6] = (byte)(a[i+inv+6] & mask); 530 } 531 return new Object[]{ a, b }; 532 } 533 534 @Test 535 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 536 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 537 IRNode.STORE_VECTOR, "> 0"}, 538 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 539 applyIfPlatform = {"64-bit", "true"}, 540 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 541 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 542 IRNode.AND_VB, "= 0", 543 IRNode.STORE_VECTOR, "= 0"}, 544 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 545 applyIfPlatform = {"64-bit", "true"}, 546 applyIf = {"AlignVector", "true"}) 547 static Object[] test6(byte[] a, byte[] b, byte mask) { 548 for (int i = 0; i < RANGE/8; i+=2) { 549 // Cannot align with AlignVector because offset is odd 550 b[i*4+0] = (byte)(a[i*4+0] & mask); 551 552 b[i*4+3] = (byte)(a[i*4+3] & mask); 553 b[i*4+4] = (byte)(a[i*4+4] & mask); 554 b[i*4+5] = (byte)(a[i*4+5] & mask); 555 b[i*4+6] = (byte)(a[i*4+6] & mask); 556 } 557 return new Object[]{ a, b }; 558 } 559 560 @Test 561 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 562 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 563 IRNode.STORE_VECTOR, "> 0"}, 564 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}, 565 applyIfPlatform = {"64-bit", "true"}, 566 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 567 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 568 IRNode.AND_VS, "= 0", 569 IRNode.STORE_VECTOR, "= 0"}, 570 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 571 applyIfPlatform = {"64-bit", "true"}, 572 applyIf = {"AlignVector", "true"}) 573 static Object[] test7(short[] a, short[] b, short mask) { 574 for (int i = 0; i < RANGE/8; i+=2) { 575 // Cannot align with AlignVector because offset is odd 576 b[i*4+0] = (short)(a[i*4+0] & mask); 577 578 b[i*4+3] = (short)(a[i*4+3] & mask); 579 b[i*4+4] = (short)(a[i*4+4] & mask); 580 b[i*4+5] = (short)(a[i*4+5] & mask); 581 b[i*4+6] = (short)(a[i*4+6] & mask); 582 } 583 return new Object[]{ a, b }; 584 } 585 586 @Test 587 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 588 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 589 IRNode.STORE_VECTOR, "> 0"}, 590 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 591 applyIfPlatform = {"64-bit", "true"}, 592 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 593 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 594 IRNode.AND_VB, "= 0", 595 IRNode.STORE_VECTOR, "= 0"}, 596 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 597 applyIfPlatform = {"64-bit", "true"}, 598 applyIf = {"AlignVector", "true"}) 599 static Object[] test8(byte[] a, byte[] b, byte mask, int init) { 600 for (int i = init; i < RANGE; i+=8) { 601 // Cannot align with AlignVector because of invariant (variable init becomes invar) 602 b[i+0] = (byte)(a[i+0] & mask); 603 604 b[i+3] = (byte)(a[i+3] & mask); 605 b[i+4] = (byte)(a[i+4] & mask); 606 b[i+5] = (byte)(a[i+5] & mask); 607 b[i+6] = (byte)(a[i+6] & mask); 608 } 609 return new Object[]{ a, b }; 610 } 611 612 @Test 613 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 614 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 615 IRNode.STORE_VECTOR, "> 0"}, 616 applyIf = {"MaxVectorSize", ">=8"}, 617 applyIfPlatform = {"64-bit", "true"}, 618 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 619 static Object[] test9(byte[] a, byte[] b, byte mask) { 620 // known non-zero init value does not affect offset, but has implicit effect on iv 621 for (int i = 13; i < RANGE-8; i+=8) { 622 b[i+0] = (byte)(a[i+0] & mask); 623 624 b[i+3] = (byte)(a[i+3] & mask); 625 b[i+4] = (byte)(a[i+4] & mask); 626 b[i+5] = (byte)(a[i+5] & mask); 627 b[i+6] = (byte)(a[i+6] & mask); 628 } 629 return new Object[]{ a, b }; 630 } 631 632 @Test 633 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 634 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 635 IRNode.STORE_VECTOR, "> 0"}, 636 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 637 applyIfPlatform = {"64-bit", "true"}, 638 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 639 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 640 IRNode.AND_VB, "= 0", 641 IRNode.STORE_VECTOR, "= 0"}, 642 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 643 applyIfPlatform = {"64-bit", "true"}, 644 applyIf = {"AlignVector", "true"}) 645 static Object[] test10a(byte[] a, byte[] b, byte mask) { 646 // This is not alignable with pre-loop, because of odd init. 647 for (int i = 3; i < RANGE-8; i+=8) { 648 b[i+0] = (byte)(a[i+0] & mask); 649 b[i+1] = (byte)(a[i+1] & mask); 650 b[i+2] = (byte)(a[i+2] & mask); 651 b[i+3] = (byte)(a[i+3] & mask); 652 } 653 return new Object[]{ a, b }; 654 } 655 656 @Test 657 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 658 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 659 IRNode.STORE_VECTOR, "> 0"}, 660 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 661 applyIfPlatform = {"64-bit", "true"}, 662 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 663 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 664 IRNode.AND_VB, "= 0", 665 IRNode.STORE_VECTOR, "= 0"}, 666 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 667 applyIfPlatform = {"64-bit", "true"}, 668 applyIf = {"AlignVector", "true"}) 669 static Object[] test10b(byte[] a, byte[] b, byte mask) { 670 // This is not alignable with pre-loop, because of odd init. 671 // Seems not correctly handled. 672 for (int i = 13; i < RANGE-8; i+=8) { 673 b[i+0] = (byte)(a[i+0] & mask); 674 b[i+1] = (byte)(a[i+1] & mask); 675 b[i+2] = (byte)(a[i+2] & mask); 676 b[i+3] = (byte)(a[i+3] & mask); 677 } 678 return new Object[]{ a, b }; 679 } 680 681 @Test 682 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 683 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 684 IRNode.STORE_VECTOR, "> 0"}, 685 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 686 applyIfPlatform = {"64-bit", "true"}, 687 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 688 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 689 IRNode.AND_VS, "= 0", 690 IRNode.STORE_VECTOR, "= 0"}, 691 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 692 applyIfPlatform = {"64-bit", "true"}, 693 applyIf = {"AlignVector", "true"}) 694 static Object[] test10c(short[] a, short[] b, short mask) { 695 // This is not alignable with pre-loop, because of odd init. 696 // Seems not correctly handled with MaxVectorSize >= 32. 697 for (int i = 13; i < RANGE-8; i+=8) { 698 b[i+0] = (short)(a[i+0] & mask); 699 b[i+1] = (short)(a[i+1] & mask); 700 b[i+2] = (short)(a[i+2] & mask); 701 b[i+3] = (short)(a[i+3] & mask); 702 } 703 return new Object[]{ a, b }; 704 } 705 706 @Test 707 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 708 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 709 IRNode.STORE_VECTOR, "> 0"}, 710 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "false"}, 711 applyIfPlatform = {"64-bit", "true"}, 712 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 713 static Object[] test10d(short[] a, short[] b, short mask) { 714 for (int i = 13; i < RANGE-16; i+=8) { 715 // init + offset -> aligned 716 b[i+0+3] = (short)(a[i+0+3] & mask); 717 b[i+1+3] = (short)(a[i+1+3] & mask); 718 b[i+2+3] = (short)(a[i+2+3] & mask); 719 b[i+3+3] = (short)(a[i+3+3] & mask); 720 } 721 return new Object[]{ a, b }; 722 } 723 724 @Test 725 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 726 IRNode.AND_VB, "> 0", 727 IRNode.STORE_VECTOR, "> 0"}, 728 applyIfPlatform = {"64-bit", "true"}, 729 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 730 static Object[] test11aB(byte[] a, byte[] b, byte mask) { 731 for (int i = 0; i < RANGE; i++) { 732 // always alignable 733 b[i+0] = (byte)(a[i+0] & mask); 734 } 735 return new Object[]{ a, b }; 736 } 737 738 @Test 739 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 740 IRNode.AND_VS, "> 0", 741 IRNode.STORE_VECTOR, "> 0"}, 742 applyIfPlatform = {"64-bit", "true"}, 743 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 744 static Object[] test11aS(short[] a, short[] b, short mask) { 745 for (int i = 0; i < RANGE; i++) { 746 // always alignable 747 b[i+0] = (short)(a[i+0] & mask); 748 } 749 return new Object[]{ a, b }; 750 } 751 752 @Test 753 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 754 IRNode.AND_VI, "> 0", 755 IRNode.STORE_VECTOR, "> 0"}, 756 applyIfPlatform = {"64-bit", "true"}, 757 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 758 static Object[] test11aI(int[] a, int[] b, int mask) { 759 for (int i = 0; i < RANGE; i++) { 760 // always alignable 761 b[i+0] = (int)(a[i+0] & mask); 762 } 763 return new Object[]{ a, b }; 764 } 765 766 @Test 767 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 768 IRNode.AND_VL, "> 0", 769 IRNode.STORE_VECTOR, "> 0"}, 770 applyIfPlatform = {"64-bit", "true"}, 771 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 772 static Object[] test11aL(long[] a, long[] b, long mask) { 773 for (int i = 0; i < RANGE; i++) { 774 // always alignable 775 b[i+0] = (long)(a[i+0] & mask); 776 } 777 return new Object[]{ a, b }; 778 } 779 780 @Test 781 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 782 IRNode.AND_VB, "> 0", 783 IRNode.STORE_VECTOR, "> 0"}, 784 applyIfPlatform = {"64-bit", "true"}, 785 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 786 static Object[] test11bB(byte[] a, byte[] b, byte mask) { 787 for (int i = 1; i < RANGE; i++) { 788 // always alignable 789 b[i+0] = (byte)(a[i+0] & mask); 790 } 791 return new Object[]{ a, b }; 792 } 793 794 @Test 795 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 796 IRNode.AND_VS, "> 0", 797 IRNode.STORE_VECTOR, "> 0"}, 798 applyIfPlatform = {"64-bit", "true"}, 799 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 800 static Object[] test11bS(short[] a, short[] b, short mask) { 801 for (int i = 1; i < RANGE; i++) { 802 // always alignable 803 b[i+0] = (short)(a[i+0] & mask); 804 } 805 return new Object[]{ a, b }; 806 } 807 808 @Test 809 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 810 IRNode.AND_VI, "> 0", 811 IRNode.STORE_VECTOR, "> 0"}, 812 applyIfPlatform = {"64-bit", "true"}, 813 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 814 static Object[] test11bI(int[] a, int[] b, int mask) { 815 for (int i = 1; i < RANGE; i++) { 816 // always alignable 817 b[i+0] = (int)(a[i+0] & mask); 818 } 819 return new Object[]{ a, b }; 820 } 821 822 @Test 823 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 824 IRNode.AND_VL, "> 0", 825 IRNode.STORE_VECTOR, "> 0"}, 826 applyIfPlatform = {"64-bit", "true"}, 827 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 828 static Object[] test11bL(long[] a, long[] b, long mask) { 829 for (int i = 1; i < RANGE; i++) { 830 // always alignable 831 b[i+0] = (long)(a[i+0] & mask); 832 } 833 return new Object[]{ a, b }; 834 } 835 836 @Test 837 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 838 IRNode.AND_VB, "> 0", 839 IRNode.STORE_VECTOR, "> 0"}, 840 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 841 applyIfPlatform = {"64-bit", "true"}, 842 applyIf = {"AlignVector", "false"}) 843 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 844 IRNode.AND_VB, "= 0", 845 IRNode.STORE_VECTOR, "= 0"}, 846 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 847 applyIfPlatform = {"64-bit", "true"}, 848 applyIf = {"AlignVector", "true"}) 849 static Object[] test11cB(byte[] a, byte[] b, byte mask) { 850 for (int i = 1; i < RANGE-1; i++) { 851 // 1 byte offset -> not alignable with AlignVector 852 b[i+0] = (byte)(a[i+1] & mask); 853 } 854 return new Object[]{ a, b }; 855 } 856 857 @Test 858 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 859 IRNode.AND_VS, "> 0", 860 IRNode.STORE_VECTOR, "> 0"}, 861 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 862 applyIfPlatform = {"64-bit", "true"}, 863 applyIf = {"AlignVector", "false"}) 864 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 865 IRNode.AND_VS, "= 0", 866 IRNode.STORE_VECTOR, "= 0"}, 867 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 868 applyIfPlatform = {"64-bit", "true"}, 869 applyIf = {"AlignVector", "true"}) 870 static Object[] test11cS(short[] a, short[] b, short mask) { 871 for (int i = 1; i < RANGE-1; i++) { 872 // 2 byte offset -> not alignable with AlignVector 873 b[i+0] = (short)(a[i+1] & mask); 874 } 875 return new Object[]{ a, b }; 876 } 877 878 @Test 879 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 880 IRNode.AND_VI, "> 0", 881 IRNode.STORE_VECTOR, "> 0"}, 882 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 883 applyIfPlatform = {"64-bit", "true"}, 884 applyIf = {"AlignVector", "false"}) 885 @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", 886 IRNode.AND_VI, "= 0", 887 IRNode.STORE_VECTOR, "= 0"}, 888 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 889 applyIfPlatform = {"64-bit", "true"}, 890 applyIf = {"AlignVector", "true"}) 891 static Object[] test11cI(int[] a, int[] b, int mask) { 892 for (int i = 1; i < RANGE-1; i++) { 893 // 4 byte offset -> not alignable with AlignVector 894 b[i+0] = (int)(a[i+1] & mask); 895 } 896 return new Object[]{ a, b }; 897 } 898 899 @Test 900 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 901 IRNode.AND_VL, "> 0", 902 IRNode.STORE_VECTOR, "> 0"}, 903 applyIfPlatform = {"64-bit", "true"}, 904 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 905 static Object[] test11cL(long[] a, long[] b, long mask) { 906 for (int i = 1; i < RANGE-1; i++) { 907 // always alignable (8 byte offset) 908 b[i+0] = (long)(a[i+1] & mask); 909 } 910 return new Object[]{ a, b }; 911 } 912 913 @Test 914 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 915 IRNode.AND_VB, "> 0", 916 IRNode.STORE_VECTOR, "> 0"}, 917 applyIfPlatform = {"64-bit", "true"}, 918 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 919 static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) { 920 for (int i = 0; i < RANGE; i++) { 921 b[i+0+invar] = (byte)(a[i+0+invar] & mask); 922 } 923 return new Object[]{ a, b }; 924 } 925 926 @Test 927 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 928 IRNode.AND_VS, "> 0", 929 IRNode.STORE_VECTOR, "> 0"}, 930 applyIfPlatform = {"64-bit", "true"}, 931 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 932 static Object[] test11dS(short[] a, short[] b, short mask, int invar) { 933 for (int i = 0; i < RANGE; i++) { 934 b[i+0+invar] = (short)(a[i+0+invar] & mask); 935 } 936 return new Object[]{ a, b }; 937 } 938 939 @Test 940 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 941 IRNode.AND_VI, "> 0", 942 IRNode.STORE_VECTOR, "> 0"}, 943 applyIfPlatform = {"64-bit", "true"}, 944 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 945 static Object[] test11dI(int[] a, int[] b, int mask, int invar) { 946 for (int i = 0; i < RANGE; i++) { 947 b[i+0+invar] = (int)(a[i+0+invar] & mask); 948 } 949 return new Object[]{ a, b }; 950 } 951 952 @Test 953 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 954 IRNode.AND_VL, "> 0", 955 IRNode.STORE_VECTOR, "> 0"}, 956 applyIfPlatform = {"64-bit", "true"}, 957 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 958 static Object[] test11dL(long[] a, long[] b, long mask, int invar) { 959 for (int i = 0; i < RANGE; i++) { 960 b[i+0+invar] = (long)(a[i+0+invar] & mask); 961 } 962 return new Object[]{ a, b }; 963 } 964 965 @Test 966 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 967 IRNode.AND_VB, "= 0", 968 IRNode.STORE_VECTOR, "= 0"}, 969 applyIfPlatform = {"64-bit", "true"}, 970 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 971 static Object[] test12(byte[] a, byte[] b, byte mask) { 972 for (int i = 0; i < RANGE/16; i++) { 973 // Currently does not vectorize at all 974 b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask); 975 b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask); 976 b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask); 977 b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask); 978 } 979 return new Object[]{ a, b }; 980 } 981 982 @Test 983 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 984 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 985 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 986 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 987 IRNode.STORE_VECTOR, "> 0"}, 988 applyIfPlatform = {"64-bit", "true"}, 989 applyIfCPUFeatureOr = {"avx2", "true"}) 990 // require avx to ensure vectors are larger than what unrolling produces 991 static Object[] test13aIL(int[] a, long[] b) { 992 for (int i = 0; i < RANGE; i++) { 993 a[i]++; 994 b[i]++; 995 } 996 return new Object[]{ a, b }; 997 } 998 999 @Test 1000 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1001 IRNode.LOAD_VECTOR_I, "> 0", 1002 IRNode.ADD_VB, "> 0", 1003 IRNode.ADD_VI, "> 0", 1004 IRNode.STORE_VECTOR, "> 0"}, 1005 applyIf = {"UseCompactObjectHeaders", "false"}, 1006 applyIfPlatform = {"64-bit", "true"}, 1007 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1008 static Object[] test13aIB(int[] a, byte[] b) { 1009 for (int i = 0; i < RANGE; i++) { 1010 a[i]++; 1011 b[i]++; 1012 } 1013 return new Object[]{ a, b }; 1014 } 1015 1016 @Test 1017 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1018 IRNode.LOAD_VECTOR_S, "> 0", 1019 IRNode.ADD_VI, "> 0", 1020 IRNode.ADD_VS, "> 0", 1021 IRNode.STORE_VECTOR, "> 0"}, 1022 applyIf = {"UseCompactObjectHeaders", "false"}, 1023 applyIfPlatform = {"64-bit", "true"}, 1024 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1025 static Object[] test13aIS(int[] a, short[] b) { 1026 for (int i = 0; i < RANGE; i++) { 1027 a[i]++; 1028 b[i]++; 1029 } 1030 return new Object[]{ a, b }; 1031 } 1032 1033 @Test 1034 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1035 IRNode.LOAD_VECTOR_S, "> 0", 1036 IRNode.LOAD_VECTOR_I, "> 0", 1037 IRNode.LOAD_VECTOR_L, "> 0", 1038 IRNode.ADD_VB, "> 0", 1039 IRNode.ADD_VS, "> 0", 1040 IRNode.ADD_VI, "> 0", 1041 IRNode.ADD_VL, "> 0", 1042 IRNode.STORE_VECTOR, "> 0"}, 1043 applyIf = {"UseCompactObjectHeaders", "false"}, 1044 applyIfPlatform = {"64-bit", "true"}, 1045 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1046 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) { 1047 for (int i = 0; i < RANGE; i++) { 1048 a[i]++; 1049 b[i]++; 1050 c[i]++; 1051 d[i]++; 1052 } 1053 return new Object[]{ a, b, c, d }; 1054 } 1055 1056 @Test 1057 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1058 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1059 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1060 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1061 IRNode.STORE_VECTOR, "> 0"}, 1062 applyIfPlatform = {"64-bit", "true"}, 1063 applyIfCPUFeatureOr = {"avx2", "true"}) 1064 // require avx to ensure vectors are larger than what unrolling produces 1065 static Object[] test13bIL(int[] a, long[] b) { 1066 for (int i = 1; i < RANGE; i++) { 1067 a[i]++; 1068 b[i]++; 1069 } 1070 return new Object[]{ a, b }; 1071 } 1072 1073 @Test 1074 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1075 IRNode.LOAD_VECTOR_I, "> 0", 1076 IRNode.ADD_VB, "> 0", 1077 IRNode.ADD_VI, "> 0", 1078 IRNode.STORE_VECTOR, "> 0"}, 1079 applyIf = {"UseCompactObjectHeaders", "false"}, 1080 applyIfPlatform = {"64-bit", "true"}, 1081 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1082 static Object[] test13bIB(int[] a, byte[] b) { 1083 for (int i = 1; i < RANGE; i++) { 1084 a[i]++; 1085 b[i]++; 1086 } 1087 return new Object[]{ a, b }; 1088 } 1089 1090 @Test 1091 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1092 IRNode.LOAD_VECTOR_S, "> 0", 1093 IRNode.ADD_VI, "> 0", 1094 IRNode.ADD_VS, "> 0", 1095 IRNode.STORE_VECTOR, "> 0"}, 1096 applyIf = {"UseCompactObjectHeaders", "false"}, 1097 applyIfPlatform = {"64-bit", "true"}, 1098 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1099 static Object[] test13bIS(int[] a, short[] b) { 1100 for (int i = 1; i < RANGE; i++) { 1101 a[i]++; 1102 b[i]++; 1103 } 1104 return new Object[]{ a, b }; 1105 } 1106 1107 @Test 1108 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1109 IRNode.LOAD_VECTOR_S, "> 0", 1110 IRNode.LOAD_VECTOR_I, "> 0", 1111 IRNode.LOAD_VECTOR_L, "> 0", 1112 IRNode.ADD_VB, "> 0", 1113 IRNode.ADD_VS, "> 0", 1114 IRNode.ADD_VI, "> 0", 1115 IRNode.ADD_VL, "> 0", 1116 IRNode.STORE_VECTOR, "> 0"}, 1117 applyIf = {"UseCompactObjectHeaders", "false"}, 1118 applyIfPlatform = {"64-bit", "true"}, 1119 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1120 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) { 1121 for (int i = 1; i < RANGE; i++) { 1122 a[i]++; 1123 b[i]++; 1124 c[i]++; 1125 d[i]++; 1126 } 1127 return new Object[]{ a, b, c, d }; 1128 } 1129 1130 @Test 1131 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1132 IRNode.ADD_VB, "> 0", 1133 IRNode.STORE_VECTOR, "> 0"}, 1134 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1135 applyIfPlatform = {"64-bit", "true"}, 1136 applyIf = {"AlignVector", "false"}) 1137 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1138 IRNode.ADD_VB, "= 0", 1139 IRNode.STORE_VECTOR, "= 0"}, 1140 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1141 applyIfPlatform = {"64-bit", "true"}, 1142 applyIf = {"AlignVector", "true"}) 1143 static Object[] test14aB(byte[] a) { 1144 // non-power-of-2 stride 1145 for (int i = 0; i < RANGE-20; i+=9) { 1146 a[i+0]++; 1147 a[i+1]++; 1148 a[i+2]++; 1149 a[i+3]++; 1150 a[i+4]++; 1151 a[i+5]++; 1152 a[i+6]++; 1153 a[i+7]++; 1154 a[i+8]++; 1155 a[i+9]++; 1156 a[i+10]++; 1157 a[i+11]++; 1158 a[i+12]++; 1159 a[i+13]++; 1160 a[i+14]++; 1161 a[i+15]++; 1162 } 1163 return new Object[]{ a }; 1164 } 1165 1166 @Test 1167 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1168 IRNode.ADD_VB, "> 0", 1169 IRNode.STORE_VECTOR, "> 0"}, 1170 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1171 applyIfPlatform = {"64-bit", "true"}, 1172 applyIf = {"AlignVector", "false"}) 1173 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1174 IRNode.ADD_VB, "= 0", 1175 IRNode.STORE_VECTOR, "= 0"}, 1176 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1177 applyIfPlatform = {"64-bit", "true"}, 1178 applyIf = {"AlignVector", "true"}) 1179 static Object[] test14bB(byte[] a) { 1180 // non-power-of-2 stride 1181 for (int i = 0; i < RANGE-20; i+=3) { 1182 a[i+0]++; 1183 a[i+1]++; 1184 a[i+2]++; 1185 a[i+3]++; 1186 a[i+4]++; 1187 a[i+5]++; 1188 a[i+6]++; 1189 a[i+7]++; 1190 a[i+8]++; 1191 a[i+9]++; 1192 a[i+10]++; 1193 a[i+11]++; 1194 a[i+12]++; 1195 a[i+13]++; 1196 a[i+14]++; 1197 a[i+15]++; 1198 } 1199 return new Object[]{ a }; 1200 } 1201 1202 @Test 1203 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1204 IRNode.ADD_VB, "> 0", 1205 IRNode.STORE_VECTOR, "> 0"}, 1206 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1207 applyIfPlatform = {"64-bit", "true"}, 1208 applyIf = {"AlignVector", "false"}) 1209 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1210 IRNode.ADD_VB, "= 0", 1211 IRNode.STORE_VECTOR, "= 0"}, 1212 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1213 applyIfPlatform = {"64-bit", "true"}, 1214 applyIf = {"AlignVector", "true"}) 1215 static Object[] test14cB(byte[] a) { 1216 // non-power-of-2 stride 1217 for (int i = 0; i < RANGE-20; i+=5) { 1218 a[i+0]++; 1219 a[i+1]++; 1220 a[i+2]++; 1221 a[i+3]++; 1222 a[i+4]++; 1223 a[i+5]++; 1224 a[i+6]++; 1225 a[i+7]++; 1226 a[i+8]++; 1227 a[i+9]++; 1228 a[i+10]++; 1229 a[i+11]++; 1230 a[i+12]++; 1231 a[i+13]++; 1232 a[i+14]++; 1233 a[i+15]++; 1234 } 1235 return new Object[]{ a }; 1236 } 1237 1238 @Test 1239 // IR rules difficult because of modulo wrapping with offset after peeling. 1240 static Object[] test15aB(byte[] a) { 1241 // non-power-of-2 scale 1242 for (int i = 0; i < RANGE/64-20; i++) { 1243 a[53*i+0]++; 1244 a[53*i+1]++; 1245 a[53*i+2]++; 1246 a[53*i+3]++; 1247 a[53*i+4]++; 1248 a[53*i+5]++; 1249 a[53*i+6]++; 1250 a[53*i+7]++; 1251 a[53*i+8]++; 1252 a[53*i+9]++; 1253 a[53*i+10]++; 1254 a[53*i+11]++; 1255 a[53*i+12]++; 1256 a[53*i+13]++; 1257 a[53*i+14]++; 1258 a[53*i+15]++; 1259 } 1260 return new Object[]{ a }; 1261 } 1262 1263 @Test 1264 // IR rules difficult because of modulo wrapping with offset after peeling. 1265 static Object[] test15bB(byte[] a) { 1266 // non-power-of-2 scale 1267 for (int i = 0; i < RANGE/64-20; i++) { 1268 a[25*i+0]++; 1269 a[25*i+1]++; 1270 a[25*i+2]++; 1271 a[25*i+3]++; 1272 a[25*i+4]++; 1273 a[25*i+5]++; 1274 a[25*i+6]++; 1275 a[25*i+7]++; 1276 a[25*i+8]++; 1277 a[25*i+9]++; 1278 a[25*i+10]++; 1279 a[25*i+11]++; 1280 a[25*i+12]++; 1281 a[25*i+13]++; 1282 a[25*i+14]++; 1283 a[25*i+15]++; 1284 } 1285 return new Object[]{ a }; 1286 } 1287 1288 @Test 1289 // IR rules difficult because of modulo wrapping with offset after peeling. 1290 static Object[] test15cB(byte[] a) { 1291 // non-power-of-2 scale 1292 for (int i = 0; i < RANGE/64-20; i++) { 1293 a[19*i+0]++; 1294 a[19*i+1]++; 1295 a[19*i+2]++; 1296 a[19*i+3]++; 1297 a[19*i+4]++; 1298 a[19*i+5]++; 1299 a[19*i+6]++; 1300 a[19*i+7]++; 1301 a[19*i+8]++; 1302 a[19*i+9]++; 1303 a[19*i+10]++; 1304 a[19*i+11]++; 1305 a[19*i+12]++; 1306 a[19*i+13]++; 1307 a[19*i+14]++; 1308 a[19*i+15]++; 1309 } 1310 return new Object[]{ a }; 1311 } 1312 1313 @Test 1314 static Object[] test16a(byte[] a, short[] b) { 1315 // infinite loop issues 1316 for (int i = 0; i < RANGE/2-20; i++) { 1317 a[2*i+0]++; 1318 a[2*i+1]++; 1319 a[2*i+2]++; 1320 a[2*i+3]++; 1321 a[2*i+4]++; 1322 a[2*i+5]++; 1323 a[2*i+6]++; 1324 a[2*i+7]++; 1325 a[2*i+8]++; 1326 a[2*i+9]++; 1327 a[2*i+10]++; 1328 a[2*i+11]++; 1329 a[2*i+12]++; 1330 a[2*i+13]++; 1331 a[2*i+14]++; 1332 1333 b[2*i+0]++; 1334 b[2*i+1]++; 1335 b[2*i+2]++; 1336 b[2*i+3]++; 1337 } 1338 return new Object[]{ a, b }; 1339 } 1340 1341 @Test 1342 static Object[] test16b(byte[] a) { 1343 // infinite loop issues 1344 for (int i = 0; i < RANGE/2-20; i++) { 1345 a[2*i+0]++; 1346 a[2*i+1]++; 1347 a[2*i+2]++; 1348 a[2*i+3]++; 1349 a[2*i+4]++; 1350 a[2*i+5]++; 1351 a[2*i+6]++; 1352 a[2*i+7]++; 1353 a[2*i+8]++; 1354 a[2*i+9]++; 1355 a[2*i+10]++; 1356 a[2*i+11]++; 1357 a[2*i+12]++; 1358 a[2*i+13]++; 1359 a[2*i+14]++; 1360 } 1361 return new Object[]{ a }; 1362 } 1363 1364 @Test 1365 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 1366 IRNode.ADD_VL, "> 0", 1367 IRNode.STORE_VECTOR, "> 0"}, 1368 applyIfPlatform = {"64-bit", "true"}, 1369 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1370 static Object[] test17a(long[] a) { 1371 // Unsafe: vectorizes with profiling (not xcomp) 1372 for (int i = 0; i < RANGE; i++) { 1373 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i; 1374 long v = UNSAFE.getLongUnaligned(a, adr); 1375 UNSAFE.putLongUnaligned(a, adr, v + 1); 1376 } 1377 return new Object[]{ a }; 1378 } 1379 1380 @Test 1381 // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs. 1382 static Object[] test17b(long[] a) { 1383 // Not alignable 1384 for (int i = 0; i < RANGE-1; i++) { 1385 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1; 1386 long v = UNSAFE.getLongUnaligned(a, adr); 1387 UNSAFE.putLongUnaligned(a, adr, v + 1); 1388 } 1389 return new Object[]{ a }; 1390 } 1391 1392 @Test 1393 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1394 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1395 IRNode.STORE_VECTOR, "> 0"}, 1396 applyIf = {"MaxVectorSize", ">=32"}, 1397 applyIfPlatform = {"64-bit", "true"}, 1398 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1399 static Object[] test17c(long[] a) { 1400 // Unsafe: aligned vectorizes 1401 for (int i = 0; i < RANGE-1; i+=4) { 1402 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i; 1403 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1404 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1405 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1406 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1407 } 1408 return new Object[]{ a }; 1409 } 1410 1411 @Test 1412 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1413 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1414 IRNode.STORE_VECTOR, "> 0"}, 1415 applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true"}, 1416 applyIfPlatform = {"64-bit", "true"}, 1417 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"}) 1418 // Ensure vector width is large enough to fit 64 byte for longs: 1419 // The offsets are: 25, 33, 57, 65 1420 // In modulo 32: 25, 1, 25, 1 -> does not vectorize 1421 // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes 1422 // This problem is because we compute modulo vector width in memory_alignment. 1423 @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0", 1424 IRNode.ADD_VL, "= 0", 1425 IRNode.STORE_VECTOR, "= 0"}, 1426 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1427 applyIfPlatform = {"64-bit", "true"}, 1428 applyIf = {"AlignVector", "true"}) 1429 static Object[] test17d(long[] a) { 1430 // Not alignable 1431 for (int i = 0; i < RANGE-1; i+=4) { 1432 int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1; 1433 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1434 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1435 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1436 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1437 } 1438 return new Object[]{ a }; 1439 } 1440 1441 @Test 1442 static Object[] test18a(byte[] a, int[] b) { 1443 // scale = 0 --> no iv 1444 for (int i = 0; i < RANGE; i++) { 1445 a[0] = 1; 1446 b[i] = 2; 1447 a[1] = 1; 1448 } 1449 return new Object[]{ a, b }; 1450 } 1451 1452 @Test 1453 static Object[] test18b(byte[] a, int[] b) { 1454 // scale = 0 --> no iv 1455 for (int i = 0; i < RANGE; i++) { 1456 a[1] = 1; 1457 b[i] = 2; 1458 a[2] = 1; 1459 } 1460 return new Object[]{ a, b }; 1461 } 1462 1463 @Test 1464 static Object[] test19(int[] a, int[] b) { 1465 for (int i = 5000; i > 0; i--) { 1466 a[RANGE_FINAL - i] = b[RANGE_FINAL - i]; 1467 } 1468 return new Object[]{ a, b }; 1469 } 1470 1471 @Test 1472 static Object[] test20(byte[] a) { 1473 // Example where it is easy to pass alignment check, 1474 // but used to fail the alignment calculation 1475 for (int i = 1; i < RANGE/2-50; i++) { 1476 a[2*i+0+30]++; 1477 a[2*i+1+30]++; 1478 a[2*i+2+30]++; 1479 a[2*i+3+30]++; 1480 } 1481 return new Object[]{ a }; 1482 } 1483 }