1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package compiler.loopopts.superword; 25 26 import compiler.lib.ir_framework.*; 27 import jdk.test.lib.Utils; 28 import jdk.test.whitebox.WhiteBox; 29 import jdk.internal.misc.Unsafe; 30 import java.lang.reflect.Array; 31 import java.util.Map; 32 import java.util.HashMap; 33 import java.util.Random; 34 import java.nio.ByteOrder; 35 36 /* 37 * @test id=NoAlignVector 38 * @bug 8310190 39 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 40 * @modules java.base/jdk.internal.misc 41 * @library /test/lib / 42 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector 43 */ 44 45 /* 46 * @test id=AlignVector 47 * @bug 8310190 48 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 49 * @modules java.base/jdk.internal.misc 50 * @library /test/lib / 51 * @run driver compiler.loopopts.superword.TestAlignVector AlignVector 52 */ 53 54 /* 55 * @test id=VerifyAlignVector 56 * @bug 8310190 57 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 58 * @modules java.base/jdk.internal.misc 59 * @library /test/lib / 60 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector 61 */ 62 63 /* 64 * @test id=NoAlignVector-COH 65 * @bug 8310190 66 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 67 * @modules java.base/jdk.internal.misc 68 * @library /test/lib / 69 * @run driver compiler.loopopts.superword.TestAlignVector NoAlignVector-COH 70 */ 71 72 /* 73 * @test id=VerifyAlignVector-COH 74 * @bug 8310190 75 * @summary Test AlignVector with various loop init, stride, scale, invar, etc. 76 * @modules java.base/jdk.internal.misc 77 * @library /test/lib / 78 * @run driver compiler.loopopts.superword.TestAlignVector VerifyAlignVector-COH 79 */ 80 81 public class TestAlignVector { 82 static int RANGE = 1024*8; 83 static int RANGE_FINAL = 1024*8; 84 private static final Unsafe UNSAFE = Unsafe.getUnsafe(); 85 private static final Random RANDOM = Utils.getRandomInstance(); 86 87 // Inputs 88 byte[] aB; 89 byte[] bB; 90 byte mB = (byte)31; 91 short[] aS; 92 short[] bS; 93 short mS = (short)0xF0F0; 94 int[] aI; 95 int[] bI; 96 int mI = 0xF0F0F0F0; 97 long[] aL; 98 long[] bL; 99 long mL = 0xF0F0F0F0F0F0F0F0L; 100 101 // List of tests 102 Map<String,TestFunction> tests = new HashMap<String,TestFunction>(); 103 104 // List of gold, the results from the first run before compilation 105 Map<String,Object[]> golds = new HashMap<String,Object[]>(); 106 107 interface TestFunction { 108 Object[] run(); 109 } 110 111 public static void main(String[] args) { 112 TestFramework framework = new TestFramework(TestAlignVector.class); 113 framework.addFlags("--add-modules", "java.base", "--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED", 114 "-XX:+IgnoreUnrecognizedVMOptions", "-XX:LoopUnrollLimit=250"); 115 116 switch (args[0]) { 117 case "NoAlignVector" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"); } 118 case "AlignVector" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"); } 119 case "VerifyAlignVector" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } 120 case "NoAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"); } 121 case "VerifyAlignVector-COH" -> { framework.addFlags("-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector", "-XX:+IgnoreUnrecognizedVMOptions", "-XX:+VerifyAlignVector"); } 122 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } 123 } 124 framework.start(); 125 } 126 127 public TestAlignVector() { 128 // Generate input once 129 aB = generateB(); 130 bB = generateB(); 131 aS = generateS(); 132 bS = generateS(); 133 aI = generateI(); 134 bI = generateI(); 135 aL = generateL(); 136 bL = generateL(); 137 138 // Add all tests to list 139 tests.put("test0", () -> { return test0(aB.clone(), bB.clone(), mB); }); 140 tests.put("test1a", () -> { return test1a(aB.clone(), bB.clone(), mB); }); 141 tests.put("test1b", () -> { return test1b(aB.clone(), bB.clone(), mB); }); 142 tests.put("test2", () -> { return test2(aB.clone(), bB.clone(), mB); }); 143 tests.put("test3", () -> { return test3(aB.clone(), bB.clone(), mB); }); 144 tests.put("test4", () -> { return test4(aB.clone(), bB.clone(), mB); }); 145 tests.put("test5", () -> { return test5(aB.clone(), bB.clone(), mB, 0); }); 146 tests.put("test6", () -> { return test6(aB.clone(), bB.clone(), mB); }); 147 tests.put("test7", () -> { return test7(aS.clone(), bS.clone(), mS); }); 148 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 0); }); 149 tests.put("test8", () -> { return test8(aB.clone(), bB.clone(), mB, 1); }); 150 tests.put("test9", () -> { return test9(aB.clone(), bB.clone(), mB); }); 151 152 tests.put("test10a", () -> { return test10a(aB.clone(), bB.clone(), mB); }); 153 tests.put("test10b", () -> { return test10b(aB.clone(), bB.clone(), mB); }); 154 tests.put("test10c", () -> { return test10c(aS.clone(), bS.clone(), mS); }); 155 tests.put("test10d", () -> { return test10d(aS.clone(), bS.clone(), mS); }); 156 tests.put("test10e", () -> { return test10e(aS.clone(), bS.clone(), mS); }); 157 158 tests.put("test11aB", () -> { return test11aB(aB.clone(), bB.clone(), mB); }); 159 tests.put("test11aS", () -> { return test11aS(aS.clone(), bS.clone(), mS); }); 160 tests.put("test11aI", () -> { return test11aI(aI.clone(), bI.clone(), mI); }); 161 tests.put("test11aL", () -> { return test11aL(aL.clone(), bL.clone(), mL); }); 162 163 tests.put("test11bB", () -> { return test11bB(aB.clone(), bB.clone(), mB); }); 164 tests.put("test11bS", () -> { return test11bS(aS.clone(), bS.clone(), mS); }); 165 tests.put("test11bI", () -> { return test11bI(aI.clone(), bI.clone(), mI); }); 166 tests.put("test11bL", () -> { return test11bL(aL.clone(), bL.clone(), mL); }); 167 168 tests.put("test11cB", () -> { return test11cB(aB.clone(), bB.clone(), mB); }); 169 tests.put("test11cS", () -> { return test11cS(aS.clone(), bS.clone(), mS); }); 170 tests.put("test11cI", () -> { return test11cI(aI.clone(), bI.clone(), mI); }); 171 tests.put("test11cL", () -> { return test11cL(aL.clone(), bL.clone(), mL); }); 172 173 tests.put("test11dB", () -> { return test11dB(aB.clone(), bB.clone(), mB, 0); }); 174 tests.put("test11dS", () -> { return test11dS(aS.clone(), bS.clone(), mS, 0); }); 175 tests.put("test11dI", () -> { return test11dI(aI.clone(), bI.clone(), mI, 0); }); 176 tests.put("test11dL", () -> { return test11dL(aL.clone(), bL.clone(), mL, 0); }); 177 178 tests.put("test12", () -> { return test12(aB.clone(), bB.clone(), mB); }); 179 180 tests.put("test13aIL", () -> { return test13aIL(aI.clone(), aL.clone()); }); 181 tests.put("test13aIB", () -> { return test13aIB(aI.clone(), aB.clone()); }); 182 tests.put("test13aIS", () -> { return test13aIS(aI.clone(), aS.clone()); }); 183 tests.put("test13aBSIL", () -> { return test13aBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 184 185 tests.put("test13bIL", () -> { return test13bIL(aI.clone(), aL.clone()); }); 186 tests.put("test13bIB", () -> { return test13bIB(aI.clone(), aB.clone()); }); 187 tests.put("test13bIS", () -> { return test13bIS(aI.clone(), aS.clone()); }); 188 tests.put("test13bBSIL", () -> { return test13bBSIL(aB.clone(), aS.clone(), aI.clone(), aL.clone()); }); 189 190 tests.put("test14aB", () -> { return test14aB(aB.clone()); }); 191 tests.put("test14bB", () -> { return test14bB(aB.clone()); }); 192 tests.put("test14cB", () -> { return test14cB(aB.clone()); }); 193 tests.put("test14dB", () -> { return test14dB(aB.clone()); }); 194 tests.put("test14eB", () -> { return test14eB(aB.clone()); }); 195 tests.put("test14fB", () -> { return test14fB(aB.clone()); }); 196 197 tests.put("test15aB", () -> { return test15aB(aB.clone()); }); 198 tests.put("test15bB", () -> { return test15bB(aB.clone()); }); 199 tests.put("test15cB", () -> { return test15cB(aB.clone()); }); 200 201 tests.put("test16a", () -> { return test16a(aB.clone(), aS.clone()); }); 202 tests.put("test16b", () -> { return test16b(aB.clone()); }); 203 204 tests.put("test17a", () -> { return test17a(aL.clone()); }); 205 tests.put("test17b", () -> { return test17b(aL.clone()); }); 206 tests.put("test17c", () -> { return test17c(aL.clone()); }); 207 tests.put("test17d", () -> { return test17d(aL.clone()); }); 208 209 tests.put("test18a", () -> { return test18a(aB.clone(), aI.clone()); }); 210 tests.put("test18b", () -> { return test18b(aB.clone(), aI.clone()); }); 211 212 tests.put("test19", () -> { return test19(aI.clone(), bI.clone()); }); 213 tests.put("test20", () -> { return test20(aB.clone()); }); 214 215 // Compute gold value for all test methods before compilation 216 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 217 String name = entry.getKey(); 218 TestFunction test = entry.getValue(); 219 Object[] gold = test.run(); 220 golds.put(name, gold); 221 } 222 } 223 224 @Warmup(100) 225 @Run(test = {"test0", 226 "test1a", 227 "test1b", 228 "test2", 229 "test3", 230 "test4", 231 "test5", 232 "test6", 233 "test7", 234 "test8", 235 "test9", 236 "test10a", 237 "test10b", 238 "test10c", 239 "test10d", 240 "test10e", 241 "test11aB", 242 "test11aS", 243 "test11aI", 244 "test11aL", 245 "test11bB", 246 "test11bS", 247 "test11bI", 248 "test11bL", 249 "test11cB", 250 "test11cS", 251 "test11cI", 252 "test11cL", 253 "test11dB", 254 "test11dS", 255 "test11dI", 256 "test11dL", 257 "test12", 258 "test13aIL", 259 "test13aIB", 260 "test13aIS", 261 "test13aBSIL", 262 "test13bIL", 263 "test13bIB", 264 "test13bIS", 265 "test13bBSIL", 266 "test14aB", 267 "test14bB", 268 "test14cB", 269 "test14dB", 270 "test14eB", 271 "test14fB", 272 "test15aB", 273 "test15bB", 274 "test15cB", 275 "test16a", 276 "test16b", 277 "test17a", 278 "test17b", 279 "test17c", 280 "test17d", 281 "test18a", 282 "test18b", 283 "test19", 284 "test20"}) 285 public void runTests() { 286 for (Map.Entry<String,TestFunction> entry : tests.entrySet()) { 287 String name = entry.getKey(); 288 TestFunction test = entry.getValue(); 289 // Recall gold value from before compilation 290 Object[] gold = golds.get(name); 291 // Compute new result 292 Object[] result = test.run(); 293 // Compare gold and new result 294 verify(name, gold, result); 295 } 296 } 297 298 static byte[] generateB() { 299 byte[] a = new byte[RANGE]; 300 for (int i = 0; i < a.length; i++) { 301 a[i] = (byte)RANDOM.nextInt(); 302 } 303 return a; 304 } 305 306 static short[] generateS() { 307 short[] a = new short[RANGE]; 308 for (int i = 0; i < a.length; i++) { 309 a[i] = (short)RANDOM.nextInt(); 310 } 311 return a; 312 } 313 314 static int[] generateI() { 315 int[] a = new int[RANGE]; 316 for (int i = 0; i < a.length; i++) { 317 a[i] = RANDOM.nextInt(); 318 } 319 return a; 320 } 321 322 static long[] generateL() { 323 long[] a = new long[RANGE]; 324 for (int i = 0; i < a.length; i++) { 325 a[i] = RANDOM.nextLong(); 326 } 327 return a; 328 } 329 330 static void verify(String name, Object[] gold, Object[] result) { 331 if (gold.length != result.length) { 332 throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " + 333 gold.length + ", result.length = " + result.length); 334 } 335 for (int i = 0; i < gold.length; i++) { 336 Object g = gold[i]; 337 Object r = result[i]; 338 if (g.getClass() != r.getClass() || !g.getClass().isArray() || !r.getClass().isArray()) { 339 throw new RuntimeException("verify " + name + ": must both be array of same type:" + 340 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 341 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 342 } 343 if (g == r) { 344 throw new RuntimeException("verify " + name + ": should be two separate arrays (with identical content):" + 345 " gold[" + i + "] == result[" + i + "]"); 346 } 347 if (Array.getLength(g) != Array.getLength(r)) { 348 throw new RuntimeException("verify " + name + ": arrays must have same length:" + 349 " gold[" + i + "].length = " + Array.getLength(g) + 350 " result[" + i + "].length = " + Array.getLength(r)); 351 } 352 Class c = g.getClass().getComponentType(); 353 if (c == byte.class) { 354 verifyB(name, i, (byte[])g, (byte[])r); 355 } else if (c == short.class) { 356 verifyS(name, i, (short[])g, (short[])r); 357 } else if (c == int.class) { 358 verifyI(name, i, (int[])g, (int[])r); 359 } else if (c == long.class) { 360 verifyL(name, i, (long[])g, (long[])r); 361 } else { 362 throw new RuntimeException("verify " + name + ": array type not supported for verify:" + 363 " gold[" + i + "].getClass() = " + g.getClass().getSimpleName() + 364 " result[" + i + "].getClass() = " + r.getClass().getSimpleName()); 365 } 366 } 367 } 368 369 static void verifyB(String name, int i, byte[] g, byte[] r) { 370 for (int j = 0; j < g.length; j++) { 371 if (g[j] != r[j]) { 372 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 373 " gold[" + i + "][" + j + "] = " + g[j] + 374 " result[" + i + "][" + j + "] = " + r[j]); 375 } 376 } 377 } 378 379 static void verifyS(String name, int i, short[] g, short[] r) { 380 for (int j = 0; j < g.length; j++) { 381 if (g[j] != r[j]) { 382 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 383 " gold[" + i + "][" + j + "] = " + g[j] + 384 " result[" + i + "][" + j + "] = " + r[j]); 385 } 386 } 387 } 388 389 static void verifyI(String name, int i, int[] g, int[] r) { 390 for (int j = 0; j < g.length; j++) { 391 if (g[j] != r[j]) { 392 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 393 " gold[" + i + "][" + j + "] = " + g[j] + 394 " result[" + i + "][" + j + "] = " + r[j]); 395 } 396 } 397 } 398 399 static void verifyL(String name, int i, long[] g, long[] r) { 400 for (int j = 0; j < g.length; j++) { 401 if (g[j] != r[j]) { 402 throw new RuntimeException("verify " + name + ": arrays must have same content:" + 403 " gold[" + i + "][" + j + "] = " + g[j] + 404 " result[" + i + "][" + j + "] = " + r[j]); 405 } 406 } 407 } 408 409 @Test 410 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 411 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 412 IRNode.STORE_VECTOR, "> 0"}, 413 applyIf = {"MaxVectorSize", ">=8"}, 414 applyIfPlatform = {"64-bit", "true"}, 415 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 416 static Object[] test0(byte[] a, byte[] b, byte mask) { 417 for (int i = 0; i < RANGE; i+=8) { 418 // Safe to vectorize with AlignVector 419 b[i+0] = (byte)(a[i+0] & mask); // offset 0, align 0 420 b[i+1] = (byte)(a[i+1] & mask); 421 b[i+2] = (byte)(a[i+2] & mask); 422 b[i+3] = (byte)(a[i+3] & mask); 423 } 424 return new Object[]{ a, b }; 425 } 426 427 @Test 428 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 429 IRNode.AND_VB, "> 0", 430 IRNode.STORE_VECTOR, "> 0"}, 431 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 432 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12. 433 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out. 434 applyIfPlatform = {"64-bit", "true"}, 435 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 436 static Object[] test1a(byte[] a, byte[] b, byte mask) { 437 for (int i = 0; i < RANGE; i+=8) { 438 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0 + iter*8 439 b[i+1] = (byte)(a[i+1] & mask); 440 b[i+2] = (byte)(a[i+2] & mask); 441 b[i+3] = (byte)(a[i+3] & mask); 442 b[i+4] = (byte)(a[i+4] & mask); 443 b[i+5] = (byte)(a[i+5] & mask); 444 b[i+6] = (byte)(a[i+6] & mask); 445 b[i+7] = (byte)(a[i+7] & mask); 446 } 447 return new Object[]{ a, b }; 448 } 449 450 @Test 451 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 452 IRNode.AND_VB, "> 0", 453 IRNode.STORE_VECTOR, "> 0"}, 454 applyIfOr = {"UseCompactObjectHeaders", "true", "AlignVector", "false"}, 455 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12. 456 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out. 457 applyIfPlatform = {"64-bit", "true"}, 458 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 459 static Object[] test1b(byte[] a, byte[] b, byte mask) { 460 for (int i = 4; i < RANGE-8; i+=8) { 461 b[i+0] = (byte)(a[i+0] & mask); // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4 + iter*8 462 b[i+1] = (byte)(a[i+1] & mask); 463 b[i+2] = (byte)(a[i+2] & mask); 464 b[i+3] = (byte)(a[i+3] & mask); 465 b[i+4] = (byte)(a[i+4] & mask); 466 b[i+5] = (byte)(a[i+5] & mask); 467 b[i+6] = (byte)(a[i+6] & mask); 468 b[i+7] = (byte)(a[i+7] & mask); 469 } 470 return new Object[]{ a, b }; 471 } 472 473 @Test 474 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 475 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 476 IRNode.STORE_VECTOR, "> 0"}, 477 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 478 applyIfPlatform = {"64-bit", "true"}, 479 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 480 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 481 IRNode.AND_VB, "= 0", 482 IRNode.STORE_VECTOR, "= 0"}, 483 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 484 applyIfPlatform = {"64-bit", "true"}, 485 applyIf = {"AlignVector", "true"}) 486 static Object[] test2(byte[] a, byte[] b, byte mask) { 487 for (int i = 0; i < RANGE; i+=8) { 488 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 489 b[i+3] = (byte)(a[i+3] & mask); // at alignment 3 490 b[i+4] = (byte)(a[i+4] & mask); 491 b[i+5] = (byte)(a[i+5] & mask); 492 b[i+6] = (byte)(a[i+6] & mask); 493 } 494 return new Object[]{ a, b }; 495 } 496 497 @Test 498 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 499 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 500 IRNode.STORE_VECTOR, "> 0"}, 501 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 502 applyIfPlatform = {"64-bit", "true"}, 503 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 504 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 505 IRNode.AND_VB, "= 0", 506 IRNode.STORE_VECTOR, "= 0"}, 507 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 508 applyIfPlatform = {"64-bit", "true"}, 509 applyIf = {"AlignVector", "true"}) 510 static Object[] test3(byte[] a, byte[] b, byte mask) { 511 for (int i = 0; i < RANGE; i+=8) { 512 // Cannot align with AlignVector: 3 + x * 8 % 8 = 3 513 514 // Problematic for AlignVector 515 b[i+0] = (byte)(a[i+0] & mask); // best_memref, align 0 516 517 b[i+3] = (byte)(a[i+3] & mask); // pack at offset 3 bytes 518 b[i+4] = (byte)(a[i+4] & mask); 519 b[i+5] = (byte)(a[i+5] & mask); 520 b[i+6] = (byte)(a[i+6] & mask); 521 } 522 return new Object[]{ a, b }; 523 } 524 525 @Test 526 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 527 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "> 0", 528 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 529 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "> 0", 530 IRNode.STORE_VECTOR, "> 0"}, 531 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 532 applyIfPlatform = {"64-bit", "true"}, 533 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 534 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 535 IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 536 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 537 IRNode.AND_VB, IRNode.VECTOR_SIZE_8, "= 0",// unaligned 538 IRNode.STORE_VECTOR, "> 0"}, 539 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 540 applyIfPlatform = {"64-bit", "true"}, 541 applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">=16"}) 542 static Object[] test4(byte[] a, byte[] b, byte mask) { 543 for (int i = 0; i < RANGE/16; i++) { 544 // Problematic for AlignVector 545 b[i*16 + 0 ] = (byte)(a[i*16 + 0 ] & mask); // 4 pack, 0 aligned 546 b[i*16 + 1 ] = (byte)(a[i*16 + 1 ] & mask); 547 b[i*16 + 2 ] = (byte)(a[i*16 + 2 ] & mask); 548 b[i*16 + 3 ] = (byte)(a[i*16 + 3 ] & mask); 549 550 b[i*16 + 5 ] = (byte)(a[i*16 + 5 ] & mask); // 8 pack, 5 aligned 551 b[i*16 + 6 ] = (byte)(a[i*16 + 6 ] & mask); 552 b[i*16 + 7 ] = (byte)(a[i*16 + 7 ] & mask); 553 b[i*16 + 8 ] = (byte)(a[i*16 + 8 ] & mask); 554 b[i*16 + 9 ] = (byte)(a[i*16 + 9 ] & mask); 555 b[i*16 + 10] = (byte)(a[i*16 + 10] & mask); 556 b[i*16 + 11] = (byte)(a[i*16 + 11] & mask); 557 b[i*16 + 12] = (byte)(a[i*16 + 12] & mask); 558 } 559 return new Object[]{ a, b }; 560 } 561 562 @Test 563 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 564 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 565 IRNode.STORE_VECTOR, "> 0"}, 566 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 567 applyIfPlatform = {"64-bit", "true"}, 568 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 569 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 570 IRNode.AND_VB, "= 0", 571 IRNode.STORE_VECTOR, "= 0"}, 572 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 573 applyIfPlatform = {"64-bit", "true"}, 574 applyIf = {"AlignVector", "true"}) 575 static Object[] test5(byte[] a, byte[] b, byte mask, int inv) { 576 for (int i = 0; i < RANGE; i+=8) { 577 // Cannot align with AlignVector because of invariant 578 b[i+inv+0] = (byte)(a[i+inv+0] & mask); 579 580 b[i+inv+3] = (byte)(a[i+inv+3] & mask); 581 b[i+inv+4] = (byte)(a[i+inv+4] & mask); 582 b[i+inv+5] = (byte)(a[i+inv+5] & mask); 583 b[i+inv+6] = (byte)(a[i+inv+6] & mask); 584 } 585 return new Object[]{ a, b }; 586 } 587 588 @Test 589 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 590 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 591 IRNode.STORE_VECTOR, "> 0"}, 592 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 593 applyIfPlatform = {"64-bit", "true"}, 594 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 595 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 596 IRNode.AND_VB, "= 0", 597 IRNode.STORE_VECTOR, "= 0"}, 598 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 599 applyIfPlatform = {"64-bit", "true"}, 600 applyIf = {"AlignVector", "true"}) 601 static Object[] test6(byte[] a, byte[] b, byte mask) { 602 for (int i = 0; i < RANGE/8; i+=2) { 603 // Cannot align with AlignVector because offset is odd 604 b[i*4+0] = (byte)(a[i*4+0] & mask); 605 606 b[i*4+3] = (byte)(a[i*4+3] & mask); 607 b[i*4+4] = (byte)(a[i*4+4] & mask); 608 b[i*4+5] = (byte)(a[i*4+5] & mask); 609 b[i*4+6] = (byte)(a[i*4+6] & mask); 610 } 611 return new Object[]{ a, b }; 612 } 613 614 @Test 615 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 616 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 617 IRNode.STORE_VECTOR, "> 0"}, 618 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}, 619 applyIfPlatform = {"64-bit", "true"}, 620 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 621 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 622 IRNode.AND_VS, "= 0", 623 IRNode.STORE_VECTOR, "= 0"}, 624 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 625 applyIfPlatform = {"64-bit", "true"}, 626 applyIf = {"AlignVector", "true"}) 627 static Object[] test7(short[] a, short[] b, short mask) { 628 for (int i = 0; i < RANGE/8; i+=2) { 629 // Cannot align with AlignVector because offset is odd 630 b[i*4+0] = (short)(a[i*4+0] & mask); 631 632 b[i*4+3] = (short)(a[i*4+3] & mask); 633 b[i*4+4] = (short)(a[i*4+4] & mask); 634 b[i*4+5] = (short)(a[i*4+5] & mask); 635 b[i*4+6] = (short)(a[i*4+6] & mask); 636 } 637 return new Object[]{ a, b }; 638 } 639 640 @Test 641 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 642 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 643 IRNode.STORE_VECTOR, "> 0"}, 644 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}, 645 applyIfPlatform = {"64-bit", "true"}, 646 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 647 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 648 IRNode.AND_VB, "= 0", 649 IRNode.STORE_VECTOR, "= 0"}, 650 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 651 applyIfPlatform = {"64-bit", "true"}, 652 applyIf = {"AlignVector", "true"}) 653 static Object[] test8(byte[] a, byte[] b, byte mask, int init) { 654 for (int i = init; i < RANGE; i+=8) { 655 // Cannot align with AlignVector because of invariant (variable init becomes invar) 656 b[i+0] = (byte)(a[i+0] & mask); 657 658 b[i+3] = (byte)(a[i+3] & mask); 659 b[i+4] = (byte)(a[i+4] & mask); 660 b[i+5] = (byte)(a[i+5] & mask); 661 b[i+6] = (byte)(a[i+6] & mask); 662 } 663 return new Object[]{ a, b }; 664 } 665 666 @Test 667 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 668 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 669 IRNode.STORE_VECTOR, "> 0"}, 670 applyIf = {"MaxVectorSize", ">=8"}, 671 applyIfPlatform = {"64-bit", "true"}, 672 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 673 static Object[] test9(byte[] a, byte[] b, byte mask) { 674 // known non-zero init value does not affect offset, but has implicit effect on iv 675 for (int i = 13; i < RANGE-8; i+=8) { 676 b[i+0] = (byte)(a[i+0] & mask); 677 678 b[i+3] = (byte)(a[i+3] & mask); 679 b[i+4] = (byte)(a[i+4] & mask); 680 b[i+5] = (byte)(a[i+5] & mask); 681 b[i+6] = (byte)(a[i+6] & mask); 682 } 683 return new Object[]{ a, b }; 684 } 685 686 @Test 687 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 688 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 689 IRNode.STORE_VECTOR, "> 0"}, 690 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 691 applyIfPlatform = {"64-bit", "true"}, 692 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 693 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 694 IRNode.AND_VB, "= 0", 695 IRNode.STORE_VECTOR, "= 0"}, 696 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 697 applyIfPlatform = {"64-bit", "true"}, 698 applyIf = {"AlignVector", "true"}) 699 static Object[] test10a(byte[] a, byte[] b, byte mask) { 700 // This is not alignable with pre-loop, because of odd init. 701 for (int i = 3; i < RANGE-8; i+=8) { 702 b[i+0] = (byte)(a[i+0] & mask); 703 b[i+1] = (byte)(a[i+1] & mask); 704 b[i+2] = (byte)(a[i+2] & mask); 705 b[i+3] = (byte)(a[i+3] & mask); 706 } 707 return new Object[]{ a, b }; 708 } 709 710 @Test 711 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE_4, "> 0", 712 IRNode.AND_VB, IRNode.VECTOR_SIZE_4, "> 0", 713 IRNode.STORE_VECTOR, "> 0"}, 714 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 715 applyIfPlatform = {"64-bit", "true"}, 716 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=8"}) 717 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 718 IRNode.AND_VB, "= 0", 719 IRNode.STORE_VECTOR, "= 0"}, 720 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 721 applyIfPlatform = {"64-bit", "true"}, 722 applyIf = {"AlignVector", "true"}) 723 static Object[] test10b(byte[] a, byte[] b, byte mask) { 724 // This is not alignable with pre-loop, because of odd init. 725 // Seems not correctly handled. 726 for (int i = 13; i < RANGE-8; i+=8) { 727 b[i+0] = (byte)(a[i+0] & mask); 728 b[i+1] = (byte)(a[i+1] & mask); 729 b[i+2] = (byte)(a[i+2] & mask); 730 b[i+3] = (byte)(a[i+3] & mask); 731 } 732 return new Object[]{ a, b }; 733 } 734 735 @Test 736 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 737 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 738 IRNode.STORE_VECTOR, "> 0"}, 739 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 740 applyIfPlatform = {"64-bit", "true"}, 741 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=16"}) 742 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 743 IRNode.AND_VS, "= 0", 744 IRNode.STORE_VECTOR, "= 0"}, 745 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 746 applyIfPlatform = {"64-bit", "true"}, 747 applyIf = {"AlignVector", "true"}) 748 static Object[] test10c(short[] a, short[] b, short mask) { 749 // This is not alignable with pre-loop, because of odd init. 750 // Seems not correctly handled with MaxVectorSize >= 32. 751 for (int i = 13; i < RANGE-8; i+=8) { 752 b[i+0] = (short)(a[i+0] & mask); 753 b[i+1] = (short)(a[i+1] & mask); 754 b[i+2] = (short)(a[i+2] & mask); 755 b[i+3] = (short)(a[i+3] & mask); 756 } 757 return new Object[]{ a, b }; 758 } 759 760 @Test 761 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 762 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 763 IRNode.STORE_VECTOR, "> 0"}, 764 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "false"}, 765 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12. 766 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out. 767 applyIfPlatform = {"64-bit", "true"}, 768 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 769 static Object[] test10d(short[] a, short[] b, short mask) { 770 for (int i = 13; i < RANGE-16; i+=8) { 771 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 13) + iter*16 772 b[i+0+3] = (short)(a[i+0+3] & mask); 773 b[i+1+3] = (short)(a[i+1+3] & mask); 774 b[i+2+3] = (short)(a[i+2+3] & mask); 775 b[i+3+3] = (short)(a[i+3+3] & mask); 776 } 777 return new Object[]{ a, b }; 778 } 779 780 @Test 781 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0", 782 IRNode.AND_VS, IRNode.VECTOR_SIZE_4, "> 0", 783 IRNode.STORE_VECTOR, "> 0"}, 784 applyIfAnd = {"MaxVectorSize", ">=16", "UseCompactObjectHeaders", "true"}, 785 // UNSAFE.ARRAY_BYTE_BASE_OFFSET = 16, but with compact object headers UNSAFE.ARRAY_BYTE_BASE_OFFSET=12. 786 // If AlignVector=true, we need the offset to be 8-byte aligned, else the vectors are filtered out. 787 applyIfPlatform = {"64-bit", "true"}, 788 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 789 static Object[] test10e(short[] a, short[] b, short mask) { 790 for (int i = 11; i < RANGE-16; i+=8) { 791 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*(3 + 11) + iter*16 792 b[i+0+3] = (short)(a[i+0+3] & mask); 793 b[i+1+3] = (short)(a[i+1+3] & mask); 794 b[i+2+3] = (short)(a[i+2+3] & mask); 795 b[i+3+3] = (short)(a[i+3+3] & mask); 796 } 797 return new Object[]{ a, b }; 798 } 799 800 @Test 801 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 802 IRNode.AND_VB, "> 0", 803 IRNode.STORE_VECTOR, "> 0"}, 804 applyIfPlatform = {"64-bit", "true"}, 805 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 806 static Object[] test11aB(byte[] a, byte[] b, byte mask) { 807 for (int i = 0; i < RANGE; i++) { 808 // always alignable 809 b[i+0] = (byte)(a[i+0] & mask); 810 } 811 return new Object[]{ a, b }; 812 } 813 814 @Test 815 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 816 IRNode.AND_VS, "> 0", 817 IRNode.STORE_VECTOR, "> 0"}, 818 applyIfPlatform = {"64-bit", "true"}, 819 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 820 static Object[] test11aS(short[] a, short[] b, short mask) { 821 for (int i = 0; i < RANGE; i++) { 822 // always alignable 823 b[i+0] = (short)(a[i+0] & mask); 824 } 825 return new Object[]{ a, b }; 826 } 827 828 @Test 829 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 830 IRNode.AND_VI, "> 0", 831 IRNode.STORE_VECTOR, "> 0"}, 832 applyIfPlatform = {"64-bit", "true"}, 833 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 834 static Object[] test11aI(int[] a, int[] b, int mask) { 835 for (int i = 0; i < RANGE; i++) { 836 // always alignable 837 b[i+0] = (int)(a[i+0] & mask); 838 } 839 return new Object[]{ a, b }; 840 } 841 842 @Test 843 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 844 IRNode.AND_VL, "> 0", 845 IRNode.STORE_VECTOR, "> 0"}, 846 applyIfPlatform = {"64-bit", "true"}, 847 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 848 static Object[] test11aL(long[] a, long[] b, long mask) { 849 for (int i = 0; i < RANGE; i++) { 850 // always alignable 851 b[i+0] = (long)(a[i+0] & mask); 852 } 853 return new Object[]{ a, b }; 854 } 855 856 @Test 857 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 858 IRNode.AND_VB, "> 0", 859 IRNode.STORE_VECTOR, "> 0"}, 860 applyIfPlatform = {"64-bit", "true"}, 861 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 862 static Object[] test11bB(byte[] a, byte[] b, byte mask) { 863 for (int i = 1; i < RANGE; i++) { 864 // always alignable 865 b[i+0] = (byte)(a[i+0] & mask); 866 } 867 return new Object[]{ a, b }; 868 } 869 870 @Test 871 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 872 IRNode.AND_VS, "> 0", 873 IRNode.STORE_VECTOR, "> 0"}, 874 applyIfPlatform = {"64-bit", "true"}, 875 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 876 static Object[] test11bS(short[] a, short[] b, short mask) { 877 for (int i = 1; i < RANGE; i++) { 878 // always alignable 879 b[i+0] = (short)(a[i+0] & mask); 880 } 881 return new Object[]{ a, b }; 882 } 883 884 @Test 885 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 886 IRNode.AND_VI, "> 0", 887 IRNode.STORE_VECTOR, "> 0"}, 888 applyIfPlatform = {"64-bit", "true"}, 889 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 890 static Object[] test11bI(int[] a, int[] b, int mask) { 891 for (int i = 1; i < RANGE; i++) { 892 // always alignable 893 b[i+0] = (int)(a[i+0] & mask); 894 } 895 return new Object[]{ a, b }; 896 } 897 898 @Test 899 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 900 IRNode.AND_VL, "> 0", 901 IRNode.STORE_VECTOR, "> 0"}, 902 applyIfPlatform = {"64-bit", "true"}, 903 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 904 static Object[] test11bL(long[] a, long[] b, long mask) { 905 for (int i = 1; i < RANGE; i++) { 906 // always alignable 907 b[i+0] = (long)(a[i+0] & mask); 908 } 909 return new Object[]{ a, b }; 910 } 911 912 @Test 913 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 914 IRNode.AND_VB, "> 0", 915 IRNode.STORE_VECTOR, "> 0"}, 916 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 917 applyIfPlatform = {"64-bit", "true"}, 918 applyIf = {"AlignVector", "false"}) 919 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 920 IRNode.AND_VB, "= 0", 921 IRNode.STORE_VECTOR, "= 0"}, 922 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 923 applyIfPlatform = {"64-bit", "true"}, 924 applyIf = {"AlignVector", "true"}) 925 static Object[] test11cB(byte[] a, byte[] b, byte mask) { 926 for (int i = 1; i < RANGE-1; i++) { 927 // 1 byte offset -> not alignable with AlignVector 928 b[i+0] = (byte)(a[i+1] & mask); 929 } 930 return new Object[]{ a, b }; 931 } 932 933 @Test 934 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 935 IRNode.AND_VS, "> 0", 936 IRNode.STORE_VECTOR, "> 0"}, 937 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 938 applyIfPlatform = {"64-bit", "true"}, 939 applyIf = {"AlignVector", "false"}) 940 @IR(counts = {IRNode.LOAD_VECTOR_S, "= 0", 941 IRNode.AND_VS, "= 0", 942 IRNode.STORE_VECTOR, "= 0"}, 943 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 944 applyIfPlatform = {"64-bit", "true"}, 945 applyIf = {"AlignVector", "true"}) 946 static Object[] test11cS(short[] a, short[] b, short mask) { 947 for (int i = 1; i < RANGE-1; i++) { 948 // 2 byte offset -> not alignable with AlignVector 949 b[i+0] = (short)(a[i+1] & mask); 950 } 951 return new Object[]{ a, b }; 952 } 953 954 @Test 955 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 956 IRNode.AND_VI, "> 0", 957 IRNode.STORE_VECTOR, "> 0"}, 958 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 959 applyIfPlatform = {"64-bit", "true"}, 960 applyIf = {"AlignVector", "false"}) 961 @IR(counts = {IRNode.LOAD_VECTOR_I, "= 0", 962 IRNode.AND_VI, "= 0", 963 IRNode.STORE_VECTOR, "= 0"}, 964 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 965 applyIfPlatform = {"64-bit", "true"}, 966 applyIf = {"AlignVector", "true"}) 967 static Object[] test11cI(int[] a, int[] b, int mask) { 968 for (int i = 1; i < RANGE-1; i++) { 969 // 4 byte offset -> not alignable with AlignVector 970 b[i+0] = (int)(a[i+1] & mask); 971 } 972 return new Object[]{ a, b }; 973 } 974 975 @Test 976 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 977 IRNode.AND_VL, "> 0", 978 IRNode.STORE_VECTOR, "> 0"}, 979 applyIfPlatform = {"64-bit", "true"}, 980 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 981 static Object[] test11cL(long[] a, long[] b, long mask) { 982 for (int i = 1; i < RANGE-1; i++) { 983 // always alignable (8 byte offset) 984 b[i+0] = (long)(a[i+1] & mask); 985 } 986 return new Object[]{ a, b }; 987 } 988 989 @Test 990 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 991 IRNode.AND_VB, "> 0", 992 IRNode.STORE_VECTOR, "> 0"}, 993 applyIfPlatform = {"64-bit", "true"}, 994 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 995 static Object[] test11dB(byte[] a, byte[] b, byte mask, int invar) { 996 for (int i = 0; i < RANGE; i++) { 997 b[i+0+invar] = (byte)(a[i+0+invar] & mask); 998 } 999 return new Object[]{ a, b }; 1000 } 1001 1002 @Test 1003 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", 1004 IRNode.AND_VS, "> 0", 1005 IRNode.STORE_VECTOR, "> 0"}, 1006 applyIfPlatform = {"64-bit", "true"}, 1007 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1008 static Object[] test11dS(short[] a, short[] b, short mask, int invar) { 1009 for (int i = 0; i < RANGE; i++) { 1010 b[i+0+invar] = (short)(a[i+0+invar] & mask); 1011 } 1012 return new Object[]{ a, b }; 1013 } 1014 1015 @Test 1016 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1017 IRNode.AND_VI, "> 0", 1018 IRNode.STORE_VECTOR, "> 0"}, 1019 applyIfPlatform = {"64-bit", "true"}, 1020 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1021 static Object[] test11dI(int[] a, int[] b, int mask, int invar) { 1022 for (int i = 0; i < RANGE; i++) { 1023 b[i+0+invar] = (int)(a[i+0+invar] & mask); 1024 } 1025 return new Object[]{ a, b }; 1026 } 1027 1028 @Test 1029 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 1030 IRNode.AND_VL, "> 0", 1031 IRNode.STORE_VECTOR, "> 0"}, 1032 applyIfPlatform = {"64-bit", "true"}, 1033 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1034 static Object[] test11dL(long[] a, long[] b, long mask, int invar) { 1035 for (int i = 0; i < RANGE; i++) { 1036 b[i+0+invar] = (long)(a[i+0+invar] & mask); 1037 } 1038 return new Object[]{ a, b }; 1039 } 1040 1041 @Test 1042 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1043 IRNode.AND_VB, "= 0", 1044 IRNode.STORE_VECTOR, "= 0"}, 1045 applyIfPlatform = {"64-bit", "true"}, 1046 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1047 static Object[] test12(byte[] a, byte[] b, byte mask) { 1048 for (int i = 0; i < RANGE/16; i++) { 1049 // Currently does not vectorize at all 1050 b[i*6 + 0 ] = (byte)(a[i*6 + 0 ] & mask); 1051 b[i*6 + 1 ] = (byte)(a[i*6 + 1 ] & mask); 1052 b[i*6 + 2 ] = (byte)(a[i*6 + 2 ] & mask); 1053 b[i*6 + 3 ] = (byte)(a[i*6 + 3 ] & mask); 1054 } 1055 return new Object[]{ a, b }; 1056 } 1057 1058 @Test 1059 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1060 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1061 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1062 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1063 IRNode.STORE_VECTOR, "> 0"}, 1064 applyIfPlatform = {"64-bit", "true"}, 1065 applyIfCPUFeatureOr = {"avx2", "true"}) 1066 // require avx to ensure vectors are larger than what unrolling produces 1067 static Object[] test13aIL(int[] a, long[] b) { 1068 for (int i = 0; i < RANGE; i++) { 1069 a[i]++; 1070 b[i]++; 1071 } 1072 return new Object[]{ a, b }; 1073 } 1074 1075 @Test 1076 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1077 IRNode.LOAD_VECTOR_I, "> 0", 1078 IRNode.ADD_VB, "> 0", 1079 IRNode.ADD_VI, "> 0", 1080 IRNode.STORE_VECTOR, "> 0"}, 1081 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 1082 applyIfPlatform = {"64-bit", "true"}, 1083 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1084 static Object[] test13aIB(int[] a, byte[] b) { 1085 for (int i = 0; i < RANGE; i++) { 1086 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter 1087 // = 16 (or 12 if UseCompactObjectHeaders=true) 1088 a[i]++; 1089 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter 1090 // = 16 (or 12 if UseCompactObjectHeaders=true) 1091 b[i]++; 1092 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold: 1093 // If UseCompactObjectHeaders=false: 1094 // a: 0, 8, 16, 24, 32, ... 1095 // b: 0, 2, 4, 6, 8, ... 1096 // -> Ok, aligns every 8th iteration. 1097 // If UseCompactObjectHeaders=true: 1098 // a: 4, 12, 20, 28, 36, ... 1099 // b: 1, 3, 5, 7, 9, ... 1100 // -> we can never align both vectors! 1101 } 1102 return new Object[]{ a, b }; 1103 } 1104 1105 @Test 1106 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1107 IRNode.LOAD_VECTOR_S, "> 0", 1108 IRNode.ADD_VI, "> 0", 1109 IRNode.ADD_VS, "> 0", 1110 IRNode.STORE_VECTOR, "> 0"}, 1111 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 1112 applyIfPlatform = {"64-bit", "true"}, 1113 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1114 static Object[] test13aIS(int[] a, short[] b) { 1115 for (int i = 0; i < RANGE; i++) { 1116 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4*iter 1117 // = 16 (or 12 if UseCompactObjectHeaders=true) 1118 a[i]++; 1119 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter 1120 // = 16 (or 12 if UseCompactObjectHeaders=true) 1121 b[i]++; 1122 // For AlignVector, all adr must be 8-byte aligned. Let's see for which iteration this can hold: 1123 // If UseCompactObjectHeaders=false: 1124 // a: iter % 2 == 0 1125 // b: iter % 4 == 0 1126 // -> Ok, aligns every 4th iteration. 1127 // If UseCompactObjectHeaders=true: 1128 // a: iter % 2 = 1 1129 // b: iter % 4 = 2 1130 // -> we can never align both vectors! 1131 } 1132 return new Object[]{ a, b }; 1133 } 1134 1135 @Test 1136 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1137 IRNode.LOAD_VECTOR_S, "> 0", 1138 IRNode.LOAD_VECTOR_I, "> 0", 1139 IRNode.LOAD_VECTOR_L, "> 0", 1140 IRNode.ADD_VB, "> 0", 1141 IRNode.ADD_VS, "> 0", 1142 IRNode.ADD_VI, "> 0", 1143 IRNode.ADD_VL, "> 0", 1144 IRNode.STORE_VECTOR, "> 0"}, 1145 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 1146 applyIfPlatform = {"64-bit", "true"}, 1147 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1148 static Object[] test13aBSIL(byte[] a, short[] b, int[] c, long[] d) { 1149 for (int i = 0; i < RANGE; i++) { 1150 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter 1151 // = 16 (or 12 if UseCompactObjectHeaders=true) 1152 a[i]++; 1153 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2*iter 1154 // = 16 (or 12 if UseCompactObjectHeaders=true) 1155 b[i]++; 1156 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter 1157 // = 16 (or 12 if UseCompactObjectHeaders=true) 1158 c[i]++; 1159 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8*iter 1160 // = 16 (always) 1161 d[i]++; 1162 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned: 1163 // a: iter % 8 = 4 1164 // c: iter % 2 = 1 1165 // -> can never align both vectors! 1166 } 1167 return new Object[]{ a, b, c, d }; 1168 } 1169 1170 @Test 1171 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1172 IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1173 IRNode.ADD_VI, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1174 IRNode.ADD_VL, IRNode.VECTOR_SIZE + "min(max_int, max_long)", "> 0", 1175 IRNode.STORE_VECTOR, "> 0"}, 1176 applyIfPlatform = {"64-bit", "true"}, 1177 applyIfCPUFeatureOr = {"avx2", "true"}) 1178 // require avx to ensure vectors are larger than what unrolling produces 1179 static Object[] test13bIL(int[] a, long[] b) { 1180 for (int i = 1; i < RANGE; i++) { 1181 a[i]++; 1182 b[i]++; 1183 } 1184 return new Object[]{ a, b }; 1185 } 1186 1187 @Test 1188 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1189 IRNode.LOAD_VECTOR_I, "> 0", 1190 IRNode.ADD_VB, "> 0", 1191 IRNode.ADD_VI, "> 0", 1192 IRNode.STORE_VECTOR, "> 0"}, 1193 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 1194 applyIfPlatform = {"64-bit", "true"}, 1195 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1196 static Object[] test13bIB(int[] a, byte[] b) { 1197 for (int i = 1; i < RANGE; i++) { 1198 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter 1199 // = 16 (or 12 if UseCompactObjectHeaders=true) 1200 a[i]++; 1201 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter 1202 // = 16 (or 12 if UseCompactObjectHeaders=true) 1203 b[i]++; 1204 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned: 1205 // a: iter % 2 = 0 1206 // b: iter % 8 = 3 1207 // -> can never align both vectors! 1208 } 1209 return new Object[]{ a, b }; 1210 } 1211 1212 @Test 1213 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", 1214 IRNode.LOAD_VECTOR_S, "> 0", 1215 IRNode.ADD_VI, "> 0", 1216 IRNode.ADD_VS, "> 0", 1217 IRNode.STORE_VECTOR, "> 0"}, 1218 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 1219 applyIfPlatform = {"64-bit", "true"}, 1220 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1221 static Object[] test13bIS(int[] a, short[] b) { 1222 for (int i = 1; i < RANGE; i++) { 1223 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter 1224 // = 16 (or 12 if UseCompactObjectHeaders=true) 1225 a[i]++; 1226 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter 1227 // = 16 (or 12 if UseCompactObjectHeaders=true) 1228 b[i]++; 1229 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned: 1230 // a: iter % 2 = 0 1231 // b: iter % 4 = 1 1232 // -> can never align both vectors! 1233 } 1234 return new Object[]{ a, b }; 1235 } 1236 1237 @Test 1238 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", 1239 IRNode.LOAD_VECTOR_S, "> 0", 1240 IRNode.LOAD_VECTOR_I, "> 0", 1241 IRNode.LOAD_VECTOR_L, "> 0", 1242 IRNode.ADD_VB, "> 0", 1243 IRNode.ADD_VS, "> 0", 1244 IRNode.ADD_VI, "> 0", 1245 IRNode.ADD_VL, "> 0", 1246 IRNode.STORE_VECTOR, "> 0"}, 1247 applyIfOr = {"UseCompactObjectHeaders", "false", "AlignVector", "false"}, 1248 applyIfPlatform = {"64-bit", "true"}, 1249 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true"}) 1250 static Object[] test13bBSIL(byte[] a, short[] b, int[] c, long[] d) { 1251 for (int i = 1; i < RANGE; i++) { 1252 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1 + 1*iter 1253 // = 16 (or 12 if UseCompactObjectHeaders=true) 1254 a[i]++; 1255 // adr = base + UNSAFE.ARRAY_SHORT_BASE_OFFSET + 2 + 2*iter 1256 // = 16 (or 12 if UseCompactObjectHeaders=true) 1257 b[i]++; 1258 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4 + 4*iter 1259 // = 16 (or 12 if UseCompactObjectHeaders=true) 1260 c[i]++; 1261 // adr = base + UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 + 8*iter 1262 // = 16 (always) 1263 d[i]++; 1264 // If AlignVector and UseCompactObjectHeaders, and we want all adr 8-byte aligned: 1265 // a: iter % 8 = 3 1266 // c: iter % 2 = 0 1267 // -> can never align both vectors! 1268 } 1269 return new Object[]{ a, b, c, d }; 1270 } 1271 1272 @Test 1273 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1274 IRNode.ADD_VB, "= 0", 1275 IRNode.STORE_VECTOR, "= 0"}, 1276 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1277 applyIfPlatform = {"64-bit", "true"}, 1278 applyIf = {"AlignVector", "false"}) 1279 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1280 IRNode.ADD_VB, "= 0", 1281 IRNode.STORE_VECTOR, "= 0"}, 1282 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1283 applyIfPlatform = {"64-bit", "true"}, 1284 applyIf = {"AlignVector", "true"}) 1285 static Object[] test14aB(byte[] a) { 1286 // non-power-of-2 stride 1287 for (int i = 0; i < RANGE-20; i+=9) { 1288 // Since the stride is shorter than the vector length, there will be always 1289 // partial overlap of loads with previous stores, this leads to failure in 1290 // store-to-load-forwarding -> vectorization not profitable. 1291 a[i+0]++; 1292 a[i+1]++; 1293 a[i+2]++; 1294 a[i+3]++; 1295 a[i+4]++; 1296 a[i+5]++; 1297 a[i+6]++; 1298 a[i+7]++; 1299 a[i+8]++; 1300 a[i+9]++; 1301 a[i+10]++; 1302 a[i+11]++; 1303 a[i+12]++; 1304 a[i+13]++; 1305 a[i+14]++; 1306 a[i+15]++; 1307 } 1308 return new Object[]{ a }; 1309 } 1310 1311 @Test 1312 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1313 IRNode.ADD_VB, "= 0", 1314 IRNode.STORE_VECTOR, "= 0"}, 1315 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1316 applyIfPlatform = {"64-bit", "true"}, 1317 applyIf = {"AlignVector", "false"}) 1318 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1319 IRNode.ADD_VB, "= 0", 1320 IRNode.STORE_VECTOR, "= 0"}, 1321 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1322 applyIfPlatform = {"64-bit", "true"}, 1323 applyIf = {"AlignVector", "true"}) 1324 static Object[] test14bB(byte[] a) { 1325 // non-power-of-2 stride 1326 for (int i = 0; i < RANGE-20; i+=3) { 1327 // Since the stride is shorter than the vector length, there will be always 1328 // partial overlap of loads with previous stores, this leads to failure in 1329 // store-to-load-forwarding -> vectorization not profitable. 1330 a[i+0]++; 1331 a[i+1]++; 1332 a[i+2]++; 1333 a[i+3]++; 1334 a[i+4]++; 1335 a[i+5]++; 1336 a[i+6]++; 1337 a[i+7]++; 1338 a[i+8]++; 1339 a[i+9]++; 1340 a[i+10]++; 1341 a[i+11]++; 1342 a[i+12]++; 1343 a[i+13]++; 1344 a[i+14]++; 1345 a[i+15]++; 1346 } 1347 return new Object[]{ a }; 1348 } 1349 1350 @Test 1351 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1352 IRNode.ADD_VB, "= 0", 1353 IRNode.STORE_VECTOR, "= 0"}, 1354 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1355 applyIfPlatform = {"64-bit", "true"}, 1356 applyIf = {"AlignVector", "false"}) 1357 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1358 IRNode.ADD_VB, "= 0", 1359 IRNode.STORE_VECTOR, "= 0"}, 1360 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1361 applyIfPlatform = {"64-bit", "true"}, 1362 applyIf = {"AlignVector", "true"}) 1363 static Object[] test14cB(byte[] a) { 1364 // non-power-of-2 stride 1365 for (int i = 0; i < RANGE-20; i+=5) { 1366 // Since the stride is shorter than the vector length, there will be always 1367 // partial overlap of loads with previous stores, this leads to failure in 1368 // store-to-load-forwarding -> vectorization not profitable. 1369 a[i+0]++; 1370 a[i+1]++; 1371 a[i+2]++; 1372 a[i+3]++; 1373 a[i+4]++; 1374 a[i+5]++; 1375 a[i+6]++; 1376 a[i+7]++; 1377 a[i+8]++; 1378 a[i+9]++; 1379 a[i+10]++; 1380 a[i+11]++; 1381 a[i+12]++; 1382 a[i+13]++; 1383 a[i+14]++; 1384 a[i+15]++; 1385 } 1386 return new Object[]{ a }; 1387 } 1388 1389 @Test 1390 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1391 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1392 IRNode.STORE_VECTOR, "> 0"}, 1393 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1394 applyIfPlatform = {"64-bit", "true"}, 1395 applyIf = {"AlignVector", "false"}) 1396 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1397 IRNode.ADD_VB, "= 0", 1398 IRNode.STORE_VECTOR, "= 0"}, 1399 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1400 applyIfPlatform = {"64-bit", "true"}, 1401 applyIf = {"AlignVector", "true"}) 1402 static Object[] test14dB(byte[] a) { 1403 // non-power-of-2 stride 1404 for (int i = 0; i < RANGE-20; i+=9) { 1405 a[i+0]++; 1406 a[i+1]++; 1407 a[i+2]++; 1408 a[i+3]++; 1409 a[i+4]++; 1410 a[i+5]++; 1411 a[i+6]++; 1412 a[i+7]++; 1413 } 1414 return new Object[]{ a }; 1415 } 1416 1417 @Test 1418 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1419 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1420 IRNode.STORE_VECTOR, "> 0"}, 1421 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1422 applyIfPlatform = {"64-bit", "true"}, 1423 applyIf = {"AlignVector", "false"}) 1424 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1425 IRNode.ADD_VB, "= 0", 1426 IRNode.STORE_VECTOR, "= 0"}, 1427 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1428 applyIfPlatform = {"64-bit", "true"}, 1429 applyIf = {"AlignVector", "true"}) 1430 static Object[] test14eB(byte[] a) { 1431 // non-power-of-2 stride 1432 for (int i = 0; i < RANGE-32; i+=11) { 1433 a[i+0]++; 1434 a[i+1]++; 1435 a[i+2]++; 1436 a[i+3]++; 1437 a[i+4]++; 1438 a[i+5]++; 1439 a[i+6]++; 1440 a[i+7]++; 1441 } 1442 return new Object[]{ a }; 1443 } 1444 1445 @Test 1446 @IR(counts = {IRNode.LOAD_VECTOR_B, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1447 IRNode.ADD_VB, IRNode.VECTOR_SIZE + "min(max_byte, 8)", "> 0", 1448 IRNode.STORE_VECTOR, "> 0"}, 1449 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1450 applyIfPlatform = {"64-bit", "true"}, 1451 applyIf = {"AlignVector", "false"}) 1452 @IR(counts = {IRNode.LOAD_VECTOR_B, "= 0", 1453 IRNode.ADD_VB, "= 0", 1454 IRNode.STORE_VECTOR, "= 0"}, 1455 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1456 applyIfPlatform = {"64-bit", "true"}, 1457 applyIf = {"AlignVector", "true"}) 1458 static Object[] test14fB(byte[] a) { 1459 // non-power-of-2 stride 1460 for (int i = 0; i < RANGE-40; i+=12) { 1461 a[i+0]++; 1462 a[i+1]++; 1463 a[i+2]++; 1464 a[i+3]++; 1465 a[i+4]++; 1466 a[i+5]++; 1467 a[i+6]++; 1468 a[i+7]++; 1469 } 1470 return new Object[]{ a }; 1471 } 1472 1473 @Test 1474 // IR rules difficult because of modulo wrapping with offset after peeling. 1475 static Object[] test15aB(byte[] a) { 1476 // non-power-of-2 scale 1477 for (int i = 0; i < RANGE/64-20; i++) { 1478 a[53*i+0]++; 1479 a[53*i+1]++; 1480 a[53*i+2]++; 1481 a[53*i+3]++; 1482 a[53*i+4]++; 1483 a[53*i+5]++; 1484 a[53*i+6]++; 1485 a[53*i+7]++; 1486 a[53*i+8]++; 1487 a[53*i+9]++; 1488 a[53*i+10]++; 1489 a[53*i+11]++; 1490 a[53*i+12]++; 1491 a[53*i+13]++; 1492 a[53*i+14]++; 1493 a[53*i+15]++; 1494 } 1495 return new Object[]{ a }; 1496 } 1497 1498 @Test 1499 // IR rules difficult because of modulo wrapping with offset after peeling. 1500 static Object[] test15bB(byte[] a) { 1501 // non-power-of-2 scale 1502 for (int i = 0; i < RANGE/64-20; i++) { 1503 a[25*i+0]++; 1504 a[25*i+1]++; 1505 a[25*i+2]++; 1506 a[25*i+3]++; 1507 a[25*i+4]++; 1508 a[25*i+5]++; 1509 a[25*i+6]++; 1510 a[25*i+7]++; 1511 a[25*i+8]++; 1512 a[25*i+9]++; 1513 a[25*i+10]++; 1514 a[25*i+11]++; 1515 a[25*i+12]++; 1516 a[25*i+13]++; 1517 a[25*i+14]++; 1518 a[25*i+15]++; 1519 } 1520 return new Object[]{ a }; 1521 } 1522 1523 @Test 1524 // IR rules difficult because of modulo wrapping with offset after peeling. 1525 static Object[] test15cB(byte[] a) { 1526 // non-power-of-2 scale 1527 for (int i = 0; i < RANGE/64-20; i++) { 1528 a[19*i+0]++; 1529 a[19*i+1]++; 1530 a[19*i+2]++; 1531 a[19*i+3]++; 1532 a[19*i+4]++; 1533 a[19*i+5]++; 1534 a[19*i+6]++; 1535 a[19*i+7]++; 1536 a[19*i+8]++; 1537 a[19*i+9]++; 1538 a[19*i+10]++; 1539 a[19*i+11]++; 1540 a[19*i+12]++; 1541 a[19*i+13]++; 1542 a[19*i+14]++; 1543 a[19*i+15]++; 1544 } 1545 return new Object[]{ a }; 1546 } 1547 1548 @Test 1549 static Object[] test16a(byte[] a, short[] b) { 1550 // infinite loop issues 1551 for (int i = 0; i < RANGE/2-20; i++) { 1552 a[2*i+0]++; 1553 a[2*i+1]++; 1554 a[2*i+2]++; 1555 a[2*i+3]++; 1556 a[2*i+4]++; 1557 a[2*i+5]++; 1558 a[2*i+6]++; 1559 a[2*i+7]++; 1560 a[2*i+8]++; 1561 a[2*i+9]++; 1562 a[2*i+10]++; 1563 a[2*i+11]++; 1564 a[2*i+12]++; 1565 a[2*i+13]++; 1566 a[2*i+14]++; 1567 1568 b[2*i+0]++; 1569 b[2*i+1]++; 1570 b[2*i+2]++; 1571 b[2*i+3]++; 1572 } 1573 return new Object[]{ a, b }; 1574 } 1575 1576 @Test 1577 static Object[] test16b(byte[] a) { 1578 // infinite loop issues 1579 for (int i = 0; i < RANGE/2-20; i++) { 1580 a[2*i+0]++; 1581 a[2*i+1]++; 1582 a[2*i+2]++; 1583 a[2*i+3]++; 1584 a[2*i+4]++; 1585 a[2*i+5]++; 1586 a[2*i+6]++; 1587 a[2*i+7]++; 1588 a[2*i+8]++; 1589 a[2*i+9]++; 1590 a[2*i+10]++; 1591 a[2*i+11]++; 1592 a[2*i+12]++; 1593 a[2*i+13]++; 1594 a[2*i+14]++; 1595 } 1596 return new Object[]{ a }; 1597 } 1598 1599 @Test 1600 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", 1601 IRNode.ADD_VL, "> 0", 1602 IRNode.STORE_VECTOR, "> 0"}, 1603 applyIfPlatform = {"64-bit", "true"}, 1604 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1605 static Object[] test17a(long[] a) { 1606 // Unsafe: vectorizes with profiling (not xcomp) 1607 for (int i = 0; i < RANGE; i++) { 1608 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i; 1609 long v = UNSAFE.getLongUnaligned(a, adr); 1610 UNSAFE.putLongUnaligned(a, adr, v + 1); 1611 } 1612 return new Object[]{ a }; 1613 } 1614 1615 @Test 1616 // Difficult to write good IR rule. Modulo calculus overflow can create non-power-of-2 packs. 1617 static Object[] test17b(long[] a) { 1618 // Not alignable 1619 for (int i = 0; i < RANGE-1; i++) { 1620 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1; 1621 long v = UNSAFE.getLongUnaligned(a, adr); 1622 UNSAFE.putLongUnaligned(a, adr, v + 1); 1623 } 1624 return new Object[]{ a }; 1625 } 1626 1627 @Test 1628 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1629 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1630 IRNode.STORE_VECTOR, "> 0"}, 1631 applyIf = {"MaxVectorSize", ">=32"}, 1632 applyIfPlatform = {"64-bit", "true"}, 1633 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}) 1634 static Object[] test17c(long[] a) { 1635 // Unsafe: aligned vectorizes 1636 for (int i = 0; i < RANGE-1; i+=4) { 1637 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i; 1638 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1639 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1640 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1641 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1642 } 1643 return new Object[]{ a }; 1644 } 1645 1646 @Test 1647 @IR(counts = {IRNode.LOAD_VECTOR_L, IRNode.VECTOR_SIZE_2, "> 0", 1648 IRNode.ADD_VL, IRNode.VECTOR_SIZE_2, "> 0", 1649 IRNode.STORE_VECTOR, "> 0"}, 1650 applyIfCPUFeatureOr = {"avx512", "true", "asimd", "true"}, 1651 applyIfPlatform = {"64-bit", "true"}, 1652 applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">=64"}) 1653 // Ensure vector width is large enough to fit 64 byte for longs: 1654 // The offsets are: 25, 33, 57, 65 1655 // In modulo 32: 25, 1, 25, 1 -> does not vectorize 1656 // In modulo 64: 25, 33, 57, 1 -> at least first pair vectorizes 1657 // This problem is because we compute modulo vector width in memory_alignment. 1658 @IR(counts = {IRNode.LOAD_VECTOR_L, "= 0", 1659 IRNode.ADD_VL, "= 0", 1660 IRNode.STORE_VECTOR, "= 0"}, 1661 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true"}, 1662 applyIfPlatform = {"64-bit", "true"}, 1663 applyIf = {"AlignVector", "true"}) 1664 static Object[] test17d(long[] a) { 1665 // Not alignable 1666 for (int i = 0; i < RANGE-1; i+=4) { 1667 long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1; 1668 long v0 = UNSAFE.getLongUnaligned(a, adr + 0); 1669 long v1 = UNSAFE.getLongUnaligned(a, adr + 8); 1670 UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1); 1671 UNSAFE.putLongUnaligned(a, adr + 8, v1 + 1); 1672 } 1673 return new Object[]{ a }; 1674 } 1675 1676 @Test 1677 static Object[] test18a(byte[] a, int[] b) { 1678 // scale = 0 --> no iv 1679 for (int i = 0; i < RANGE; i++) { 1680 a[0] = 1; 1681 b[i] = 2; 1682 a[1] = 1; 1683 } 1684 return new Object[]{ a, b }; 1685 } 1686 1687 @Test 1688 static Object[] test18b(byte[] a, int[] b) { 1689 // scale = 0 --> no iv 1690 for (int i = 0; i < RANGE; i++) { 1691 a[1] = 1; 1692 b[i] = 2; 1693 a[2] = 1; 1694 } 1695 return new Object[]{ a, b }; 1696 } 1697 1698 @Test 1699 static Object[] test19(int[] a, int[] b) { 1700 for (int i = 5000; i > 0; i--) { 1701 a[RANGE_FINAL - i] = b[RANGE_FINAL - i]; 1702 } 1703 return new Object[]{ a, b }; 1704 } 1705 1706 @Test 1707 static Object[] test20(byte[] a) { 1708 // Example where it is easy to pass alignment check, 1709 // but used to fail the alignment calculation 1710 for (int i = 1; i < RANGE/2-50; i++) { 1711 a[2*i+0+30]++; 1712 a[2*i+1+30]++; 1713 a[2*i+2+30]++; 1714 a[2*i+3+30]++; 1715 } 1716 return new Object[]{ a }; 1717 } 1718 }