1 /* 2 * Copyright (c) 2022, 2023, Arm Limited. All rights reserved. 3 * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 */ 24 25 /* 26 * @test 27 * @summary Vectorization test on combined operations 28 * @library /test/lib / 29 * 30 * @build jdk.test.whitebox.WhiteBox 31 * compiler.vectorization.runner.VectorizationTestRunner 32 * 33 * @requires vm.compiler2.enabled 34 * 35 * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox 36 * 37 * @run main/othervm -Xbootclasspath/a:. 38 * -XX:+UnlockDiagnosticVMOptions 39 * -XX:+WhiteBoxAPI 40 * compiler.vectorization.runner.LoopCombinedOpTest nCOH_nAV 41 * 42 * @run main/othervm -Xbootclasspath/a:. 43 * -XX:+UnlockDiagnosticVMOptions 44 * -XX:+WhiteBoxAPI 45 * compiler.vectorization.runner.LoopCombinedOpTest nCOH_yAV 46 * 47 * @run main/othervm -Xbootclasspath/a:. 48 * -XX:+UnlockDiagnosticVMOptions 49 * -XX:+WhiteBoxAPI 50 * compiler.vectorization.runner.LoopCombinedOpTest yCOH_nAV 51 * 52 * @run main/othervm -Xbootclasspath/a:. 53 * -XX:+UnlockDiagnosticVMOptions 54 * -XX:+WhiteBoxAPI 55 * compiler.vectorization.runner.LoopCombinedOpTest yCOH_yAV 56 */ 57 58 package compiler.vectorization.runner; 59 60 import compiler.lib.ir_framework.*; 61 62 import java.util.Random; 63 64 public class LoopCombinedOpTest extends VectorizationTestRunner { 65 66 // We must pass the flags directly to the test-VM, and not the driver vm in the @run above. 67 @Override 68 protected String[] testVMFlags(String[] args) { 69 return switch (args[0]) { 70 case "nCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:-AlignVector"}; 71 case "nCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:-UseCompactObjectHeaders", "-XX:+AlignVector"}; 72 case "yCOH_nAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:-AlignVector"}; 73 case "yCOH_yAV" -> new String[]{"-XX:+UnlockExperimentalVMOptions", "-XX:+UseCompactObjectHeaders", "-XX:+AlignVector"}; 74 default -> { throw new RuntimeException("Test argument not recognized: " + args[0]); } 75 }; 76 } 77 78 private static final int SIZE = 543; 79 80 private int[] a; 81 private int[] b; 82 private int[] c; 83 private int[] d; 84 private long[] l1; 85 private long[] l2; 86 private short[] s1; 87 private short[] s2; 88 private int intInv; 89 90 public LoopCombinedOpTest() { 91 a = new int[SIZE]; 92 b = new int[SIZE]; 93 c = new int[SIZE]; 94 d = new int[SIZE]; 95 l1 = new long[SIZE]; 96 l2 = new long[SIZE]; 97 s1 = new short[SIZE]; 98 s2 = new short[SIZE]; 99 for (int i = 0; i < SIZE; i++) { 100 a[i] = -654321 * i; 101 b[i] = 123456 * i; 102 c[i] = -998877 * i; 103 d[i] = 778899 * i; 104 l1[i] = 5000000000L * i; 105 l2[i] = -600000000L * i; 106 s1[i] = (short) (3 * i); 107 s2[i] = (short) (-2 * i); 108 } 109 Random ran = new Random(999); 110 intInv = ran.nextInt(); 111 } 112 113 @Test 114 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, 115 counts = {IRNode.STORE_VECTOR, ">0", 116 IRNode.LOAD_VECTOR_I, "> 0"}) 117 public int[] opWithConstant() { 118 int[] res = new int[SIZE]; 119 for (int i = 0; i < SIZE; i++) { 120 res[i] = a[i] + 1234567890; 121 } 122 return res; 123 } 124 125 @Test 126 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 127 counts = {IRNode.STORE_VECTOR, ">0", 128 IRNode.LOAD_VECTOR_I, "> 0"}) 129 public int[] opWithLoopInvariant() { 130 int[] res = new int[SIZE]; 131 for (int i = 0; i < SIZE; i++) { 132 res[i] = b[i] * intInv; 133 } 134 return res; 135 } 136 137 @Test 138 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 139 counts = {IRNode.STORE_VECTOR, ">0", 140 IRNode.LOAD_VECTOR_I, "> 0"}) 141 public int[] opWithConstantAndLoopInvariant() { 142 int[] res = new int[SIZE]; 143 for (int i = 0; i < SIZE; i++) { 144 res[i] = c[i] * (intInv & 0xfff); 145 } 146 return res; 147 } 148 149 @Test 150 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, 151 counts = {IRNode.STORE_VECTOR, ">0", 152 IRNode.LOAD_VECTOR_I, "> 0"}) 153 public int[] multipleOps() { 154 int[] res = new int[SIZE]; 155 for (int i = 0; i < SIZE; i++) { 156 res[i] = a[i] & b[i] + c[i] & d[i]; 157 } 158 return res; 159 } 160 161 @Test 162 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 163 counts = {IRNode.STORE_VECTOR, ">0", 164 IRNode.LOAD_VECTOR_I, "> 0"}) 165 public int[] multipleOpsWithMultipleConstants() { 166 int[] res = new int[SIZE]; 167 for (int i = 0; i < SIZE; i++) { 168 res[i] = a[i] * 12345678 + 87654321 + b[i] & 0xffff - c[i] * d[i] * 2; 169 } 170 return res; 171 } 172 173 @Test 174 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 175 counts = {IRNode.STORE_VECTOR, ">0", 176 IRNode.LOAD_VECTOR_I, "> 0"}) 177 // With sse2, the MulI does not vectorize. This means we have vectorized stores 178 // to res1, but scalar loads from res1. The store-to-load-forwarding failure 179 // detection catches this and rejects vectorization. 180 public int[] multipleStores() { 181 int[] res1 = new int[SIZE]; 182 int[] res2 = new int[SIZE]; 183 int[] res3 = new int[SIZE]; 184 for (int i = 0; i < SIZE; i++) { 185 res1[i] = a[i] & b[i]; 186 res2[i] = c[i] | d[i]; 187 res3[i] = res1[i] * res2[i]; 188 } 189 return res3; 190 } 191 192 @Test 193 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 194 counts = {IRNode.STORE_VECTOR, ">0", 195 IRNode.LOAD_VECTOR_I, "> 0"}) 196 public int[] multipleStoresWithCommonSubExpression() { 197 int[] res1 = new int[SIZE]; 198 int[] res2 = new int[SIZE]; 199 int[] res3 = new int[SIZE]; 200 for (int i = 0; i < SIZE; i++) { 201 res1[i] = a[i] * b[i]; 202 res2[i] = c[i] * d[i]; 203 res3[i] = res1[i] + res2[i]; 204 } 205 return res3; 206 } 207 208 @Test 209 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, 210 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, 211 counts = {IRNode.STORE_VECTOR, ">0", 212 IRNode.LOAD_VECTOR_S, "> 0", 213 IRNode.LOAD_VECTOR_I, "> 0"}) 214 public int[] multipleOpsWith2DifferentTypes() { 215 short[] res1 = new short[SIZE]; 216 int[] res2 = new int[SIZE]; 217 for (int i = 0; i < SIZE; i++) { 218 res1[i] = (short) (s1[i] + s2[i]); 219 res2[i] = a[i] + b[i]; 220 // We have a mix of int and short loads/stores. 221 // With UseCompactObjectHeaders and AlignVector, 222 // we must 8-byte align all vector loads/stores. 223 // 224 // int: 225 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter 226 // = 16 (or 12 if UseCompactObjectHeaders=true) 227 // If UseCompactObjectHeaders=false: iter % 2 = 0 228 // If UseCompactObjectHeaders=true: iter % 2 = 1 229 // 230 // byte: 231 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter 232 // = 16 (or 12 if UseCompactObjectHeaders=true) 233 // If UseCompactObjectHeaders=false: iter % 8 = 0 234 // If UseCompactObjectHeaders=true: iter % 8 = 4 235 // 236 // -> we cannot align both if UseCompactObjectHeaders=true. 237 } 238 return res2; 239 } 240 241 @Test 242 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, 243 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, 244 counts = {IRNode.STORE_VECTOR, ">0", 245 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_ANY, "> 0", 246 IRNode.LOAD_VECTOR_L, "> 0"}) 247 public long[] multipleOpsWith3DifferentTypes() { 248 short[] res1 = new short[SIZE]; 249 int[] res2 = new int[SIZE]; 250 long[] res3 = new long[SIZE]; 251 for (int i = 0; i < SIZE; i++) { 252 res1[i] = (short) (s1[i] + s2[i]); 253 res2[i] = a[i] + b[i]; 254 res3[i] = l1[i] + l2[i]; 255 // We have a mix of int and short loads/stores. 256 // With UseCompactObjectHeaders and AlignVector, 257 // we must 8-byte align all vector loads/stores. 258 // 259 // int: 260 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter 261 // = 16 (or 12 if UseCompactObjectHeaders=true) 262 // If UseCompactObjectHeaders=false: iter % 2 = 0 263 // If UseCompactObjectHeaders=true: iter % 2 = 1 264 // 265 // byte: 266 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter 267 // = 16 (or 12 if UseCompactObjectHeaders=true) 268 // If UseCompactObjectHeaders=false: iter % 8 = 0 269 // If UseCompactObjectHeaders=true: iter % 8 = 4 270 // 271 // -> we cannot align both if UseCompactObjectHeaders=true. 272 } 273 return res3; 274 } 275 276 @Test 277 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, 278 counts = {IRNode.STORE_VECTOR, ">0", 279 IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_ANY, "> 0", 280 IRNode.LOAD_VECTOR_L, "> 0"}) 281 public long[] multipleOpsWith2NonAdjacentTypes() { 282 short[] res1 = new short[SIZE]; 283 long[] res2 = new long[SIZE]; 284 for (int i = 0; i < SIZE; i++) { 285 res1[i] = (short) (s1[i] + s2[i]); 286 res2[i] = l1[i] + l2[i]; 287 } 288 return res2; 289 } 290 291 @Test 292 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"}, 293 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, 294 counts = {IRNode.STORE_VECTOR, ">0", 295 IRNode.LOAD_VECTOR_S, "> 0", 296 IRNode.LOAD_VECTOR_I, "> 0"}) 297 public int[] multipleOpsWith2DifferentTypesAndConstant() { 298 short[] res1 = new short[SIZE]; 299 int[] res2 = new int[SIZE]; 300 for (int i = 0; i < SIZE; i++) { 301 res1[i] = (short) (s1[i] + s2[i]); 302 res2[i] = a[i] + 88888888;; 303 // We have a mix of int and short loads/stores. 304 // With UseCompactObjectHeaders and AlignVector, 305 // we must 8-byte align all vector loads/stores. 306 // 307 // int: 308 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter 309 // = 16 (or 12 if UseCompactObjectHeaders=true) 310 // If UseCompactObjectHeaders=false: iter % 2 = 0 311 // If UseCompactObjectHeaders=true: iter % 2 = 1 312 // 313 // byte: 314 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter 315 // = 16 (or 12 if UseCompactObjectHeaders=true) 316 // If UseCompactObjectHeaders=false: iter % 8 = 0 317 // If UseCompactObjectHeaders=true: iter % 8 = 4 318 // 319 // -> we cannot align both if UseCompactObjectHeaders=true. 320 } 321 return res2; 322 } 323 324 @Test 325 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 326 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, 327 counts = {IRNode.STORE_VECTOR, ">0", 328 IRNode.LOAD_VECTOR_S, "> 0", 329 IRNode.LOAD_VECTOR_I, "> 0"}) 330 public int[] multipleOpsWith2DifferentTypesAndInvariant() { 331 short[] res1 = new short[SIZE]; 332 int[] res2 = new int[SIZE]; 333 for (int i = 0; i < SIZE; i++) { 334 res1[i] = (short) (s1[i] + s2[i]); 335 res2[i] = a[i] * intInv; 336 // We have a mix of int and short loads/stores. 337 // With UseCompactObjectHeaders and AlignVector, 338 // we must 8-byte align all vector loads/stores. 339 // 340 // int: 341 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter 342 // = 16 (or 12 if UseCompactObjectHeaders=true) 343 // If UseCompactObjectHeaders=false: iter % 2 = 0 344 // If UseCompactObjectHeaders=true: iter % 2 = 1 345 // 346 // byte: 347 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter 348 // = 16 (or 12 if UseCompactObjectHeaders=true) 349 // If UseCompactObjectHeaders=false: iter % 8 = 0 350 // If UseCompactObjectHeaders=true: iter % 8 = 4 351 // 352 // -> we cannot align both if UseCompactObjectHeaders=true. 353 } 354 return res2; 355 } 356 357 @Test 358 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 359 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, 360 counts = {IRNode.STORE_VECTOR, ">0", 361 IRNode.LOAD_VECTOR_S, "> 0", 362 IRNode.LOAD_VECTOR_I, "> 0"}) 363 public int[] multipleOpsWith2DifferentTypesAndComplexExpression() { 364 short[] res1 = new short[SIZE]; 365 int[] res2 = new int[SIZE]; 366 for (int i = 0; i < SIZE; i++) { 367 res1[i] = (short) (s1[i] + s2[i]); 368 res2[i] = a[i] * (b[i] + intInv * c[i] & 0xfffffa); 369 // same argument as in multipleOpsWith2DifferentTypesAndInvariant. 370 } 371 return res2; 372 } 373 374 @Test 375 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse3", "true"}, 376 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, 377 counts = {IRNode.STORE_VECTOR, ">0", 378 IRNode.LOAD_VECTOR_S, "> 0", 379 IRNode.LOAD_VECTOR_I, "> 0"}) 380 public int[] multipleOpsWith2DifferentTypesAndSharedOp() { 381 int i = 0, sum = 0; 382 int[] res1 = new int[SIZE]; 383 short[] res2 = new short[SIZE]; 384 while (++i < SIZE) { 385 sum += (res1[i]--); 386 res2[i]++; 387 // We have a mix of int and short loads/stores. 388 // With UseCompactObjectHeaders and AlignVector, 389 // we must 8-byte align all vector loads/stores. 390 // 391 // int: 392 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 4*iter 393 // = 16 (or 12 if UseCompactObjectHeaders=true) 394 // If UseCompactObjectHeaders=false: iter % 2 = 0 395 // If UseCompactObjectHeaders=true: iter % 2 = 1 396 // 397 // byte: 398 // adr = base + UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1*iter 399 // = 16 (or 12 if UseCompactObjectHeaders=true) 400 // If UseCompactObjectHeaders=false: iter % 8 = 0 401 // If UseCompactObjectHeaders=true: iter % 8 = 4 402 // 403 // -> we cannot align both if UseCompactObjectHeaders=true. 404 } 405 return res1; 406 } 407 408 @Test 409 // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. 410 public int[] fillIndexPlusStride() { 411 int[] res = new int[SIZE]; 412 for (int i = 0; i < SIZE; i++) { 413 res[i] = i + 1; 414 } 415 return res; 416 } 417 418 @Test 419 // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. 420 public int[] addArrayWithIndex() { 421 int[] res = new int[SIZE]; 422 for (int i = 0; i < SIZE; i++) { 423 res[i] = a[i] + i; 424 } 425 return res; 426 } 427 428 @Test 429 // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. 430 public short[] multiplyAddShortIndex() { 431 short[] res = new short[SIZE]; 432 for (int i = 0; i < SIZE; i++) { 433 res[i] = (short) (i * i + i); 434 } 435 return res; 436 } 437 438 @Test 439 // POPULATE_INDEX seems to mess with vectorization, see JDK-8332878. 440 public int[] multiplyBySumOfIndexAndInvariant() { 441 int[] res = new int[SIZE]; 442 for (int i = 0; i < SIZE; i++) { 443 res[i] = a[i] * (i + 10 + intInv); 444 } 445 return res; 446 } 447 448 @Test 449 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 450 applyIfOr = { "UseCompactObjectHeaders", "false", "AlignVector", "false"}, 451 counts = {IRNode.STORE_VECTOR, ">0"}) 452 public int[] manuallyUnrolledStride2() { 453 int[] res = new int[SIZE]; 454 for (int i = 0; i < SIZE - 1; i += 2) { 455 res[i] = a[i] * b[i]; 456 res[i + 1] = a[i + 1] * b[i + 1]; 457 // Hand-unrolling can mess with alignment! 458 // 459 // With UseCompactObjectHeaders and AlignVector, 460 // we must 8-byte align all vector loads/stores. 461 // 462 // adr = base + UNSAFE.ARRAY_INT_BASE_OFFSET + 8*iter 463 // = 16 (or 12 if UseCompactObjectHeaders=true) 464 // If UseCompactObjectHeaders=false: 16 divisible by 8 -> vectorize 465 // If UseCompactObjectHeaders=true: 12 not divisibly by 8 -> not vectorize 466 } 467 return res; 468 } 469 470 @Test 471 @IR(applyIfCPUFeatureOr = {"asimd", "true", "sse4.1", "true"}, 472 counts = {IRNode.STORE_VECTOR, ">0", 473 IRNode.LOAD_VECTOR_I, "> 0"}) 474 public int partialVectorizableLoop() { 475 int[] res = new int[SIZE]; 476 int k = 9; 477 for (int i = 0; i < SIZE / 2; i++) { 478 res[i] = a[i] * b[i]; 479 k = 3 * k + 1; 480 } 481 return k; 482 } 483 }