< prev index next >

test/hotspot/jtreg/compiler/loopopts/superword/TestSplitPacks.java

Print this page

 283             }
 284         }
 285     }
 286 
 287     static void verifyL(String name, int i, long[] g, long[] r) {
 288         for (int j = 0; j < g.length; j++) {
 289             if (g[j] != r[j]) {
 290                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 291                                            " gold[" + i + "][" + j + "] = " + g[j] +
 292                                            " result[" + i + "][" + j + "] = " + r[j]);
 293             }
 294         }
 295     }
 296 
 297     @Test
 298     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 299                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 300                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 301                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 302                   IRNode.STORE_VECTOR, "> 0"},
 303         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 304         applyIfPlatform = {"64-bit", "true"},
 305         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 306     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 307                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 308                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 309                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 310                   IRNode.STORE_VECTOR, "> 0"},
 311         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 312         applyIfPlatform = {"64-bit", "true"},
 313         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 314     // Load and store are already split
 315     //
 316     //  0 1 - - 4 5 6 7
 317     //  | |     | | | |
 318     //  0 1 - - 4 5 6 7
 319     static Object[] test0(int[] a, int[] b, int mask) {
 320         for (int i = 0; i < RANGE; i+=8) {
 321             int b0 = a[i+0] & mask;
 322             int b1 = a[i+1] & mask;
 323 
 324             int b4 = a[i+4] & mask;
 325             int b5 = a[i+5] & mask;
 326             int b6 = a[i+6] & mask;
 327             int b7 = a[i+7] & mask;
 328 
 329             b[i+0] = b0;
 330             b[i+1] = b1;
 331 
 332             b[i+4] = b4;
 333             b[i+5] = b5;
 334             b[i+6] = b6;
 335             b[i+7] = b7;
 336             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 337             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 338             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 339             // -> vectorize                                  -> no vectorization
 340         }
 341         return new Object[]{ a, b };
 342     }
 343 
 344     @Test
 345     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 346                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 347                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 348                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 349                   IRNode.STORE_VECTOR, "> 0"},
 350         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 351         applyIfPlatform = {"64-bit", "true"},
 352         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 353     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 354                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 355                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 356                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 357                   IRNode.STORE_VECTOR, "> 0"},
 358         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 359         applyIfPlatform = {"64-bit", "true"},
 360         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 361     // Adjacent Load and Store, but split by Add/Mul
 362     static Object[] test1a(int[] a, int[] b, int mask) {
 363         for (int i = 0; i < RANGE; i+=8) {
 364             b[i+0] = a[i+0] + mask; // Add
 365             b[i+1] = a[i+1] + mask;
 366             b[i+2] = a[i+2] + mask;
 367             b[i+3] = a[i+3] + mask;
 368 
 369             b[i+4] = a[i+4] * mask; // Mul
 370             b[i+5] = a[i+5] * mask;
 371             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 372             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 373             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 374             // -> vectorize                                  -> no vectorization
 375         }
 376         return new Object[]{ a, b };
 377     }
 378 
 379     @Test
 380     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 381                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 382                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 383                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 384                   IRNode.STORE_VECTOR, "> 0"},
 385         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 386         applyIfPlatform = {"64-bit", "true"},
 387         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 388     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 389                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 390                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 391                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 392                   IRNode.STORE_VECTOR, "> 0"},
 393         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 394         applyIfPlatform = {"64-bit", "true"},
 395         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 396     // Adjacent Load and Store, but split by Add/Mul
 397     static Object[] test1b(int[] a, int[] b, int mask) {
 398         for (int i = 0; i < RANGE; i+=8) {
 399             b[i+0] = a[i+0] * mask; // Mul
 400             b[i+1] = a[i+1] * mask;
 401             b[i+2] = a[i+2] * mask;
 402             b[i+3] = a[i+3] * mask;
 403 
 404             b[i+4] = a[i+4] + mask; // Add
 405             b[i+5] = a[i+5] + mask;
 406             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 407             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 408             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 409             // -> vectorize                                  -> no vectorization
 410         }
 411         return new Object[]{ a, b };
 412     }
 413 
 414     @Test
 415     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 416                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 417                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 418                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 419                   IRNode.STORE_VECTOR, "> 0"},
 420         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 421         applyIfPlatform = {"64-bit", "true"},
 422         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 423     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 424                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 425                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 426                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 427                   IRNode.STORE_VECTOR, "> 0"},
 428         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 429         applyIfPlatform = {"64-bit", "true"},
 430         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 431     // Adjacent Load and Store, but split by Add/Mul
 432     static Object[] test1c(int[] a, int[] b, int mask) {
 433         for (int i = 0; i < RANGE; i+=8) {
 434             b[i+0] = a[i+0] + mask; // Add
 435             b[i+1] = a[i+1] + mask;
 436 
 437             b[i+2] = a[i+2] * mask; // Mul
 438             b[i+3] = a[i+3] * mask;
 439             b[i+4] = a[i+4] * mask;
 440             b[i+5] = a[i+5] * mask;
 441             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 442             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 443             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 444             // -> vectorize                                  -> no vectorization
 445         }
 446         return new Object[]{ a, b };
 447     }
 448 
 449     @Test
 450     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 451                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 452                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 453                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 454                   IRNode.STORE_VECTOR, "> 0"},
 455         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 456         applyIfPlatform = {"64-bit", "true"},
 457         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 458     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 459                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 460                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 461                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 462                   IRNode.STORE_VECTOR, "> 0"},
 463         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 464         applyIfPlatform = {"64-bit", "true"},
 465         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 466     // Adjacent Load and Store, but split by Add/Mul
 467     static Object[] test1d(int[] a, int[] b, int mask) {
 468         for (int i = 0; i < RANGE; i+=8) {
 469             b[i+0] = a[i+0] * mask; // Mul
 470             b[i+1] = a[i+1] * mask;
 471 
 472             b[i+2] = a[i+2] + mask; // Add
 473             b[i+3] = a[i+3] + mask;
 474             b[i+4] = a[i+4] + mask;
 475             b[i+5] = a[i+5] + mask;
 476             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 477             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 478             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 479             // -> vectorize                                  -> no vectorization
 480         }
 481         return new Object[]{ a, b };
 482     }
 483 
 484     @Test
 485     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 486                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 487                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 488                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 489                   IRNode.STORE_VECTOR, "> 0"},
 490         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 491         applyIfPlatform = {"64-bit", "true"},
 492         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 493     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 494                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 495                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 496                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 497                   IRNode.STORE_VECTOR, "> 0"},
 498         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 499         applyIfPlatform = {"64-bit", "true"},
 500         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 501     // Split the load
 502     //
 503     //  0 1 2 3 4 5 - -
 504     //  | |  \ \ \ \
 505     //  | |   \ \ \ \
 506     //  | |    \ \ \ \
 507     //  0 1 - - 4 5 6 7
 508     //
 509     static Object[] test2a(int[] a, int[] b, int mask) {
 510         for (int i = 0; i < RANGE; i+=8) {
 511             int b0 = a[i+0] & mask;
 512             int b1 = a[i+1] & mask;
 513             int b2 = a[i+2] & mask;
 514             int b3 = a[i+3] & mask;
 515             int b4 = a[i+4] & mask;
 516             int b5 = a[i+5] & mask;
 517 
 518             b[i+0] = b0;
 519             b[i+1] = b1;
 520 
 521             b[i+4] = b2;
 522             b[i+5] = b3;
 523             b[i+6] = b4;
 524             b[i+7] = b5;
 525             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 526             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 527             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 528             // -> vectorize                                  -> no vectorization
 529         }
 530         return new Object[]{ a, b };
 531     }
 532 
 533     @Test
 534     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 535                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 536                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 537                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 538                   IRNode.STORE_VECTOR, "> 0"},
 539         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 540         applyIfPlatform = {"64-bit", "true"},
 541         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 542     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 543                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 544                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 545                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 546                   IRNode.STORE_VECTOR, "> 0"},
 547         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 548         applyIfPlatform = {"64-bit", "true"},
 549         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 550     // Split the load
 551     //
 552     //  0 1 2 3 4 5 - -
 553     //  | | | |  \ \
 554     //  | | | |   \ \
 555     //  | | | |    \ \
 556     //  0 1 2 3 -- 6 7
 557     //
 558     static Object[] test2b(int[] a, int[] b, int mask) {
 559         for (int i = 0; i < RANGE; i+=8) {
 560             int b0 = a[i+0] & mask;
 561             int b1 = a[i+1] & mask;
 562             int b2 = a[i+2] & mask;
 563             int b3 = a[i+3] & mask;
 564             int b4 = a[i+4] & mask;
 565             int b5 = a[i+5] & mask;
 566 
 567             b[i+0] = b0;
 568             b[i+1] = b1;
 569             b[i+2] = b2;
 570             b[i+3] = b3;
 571 
 572             b[i+6] = b4;
 573             b[i+7] = b5;
 574             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 575             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 576             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 577             // -> vectorize                                  -> no vectorization
 578         }
 579         return new Object[]{ a, b };
 580     }
 581 
 582     @Test
 583     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 584                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 585                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 586                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 587                   IRNode.STORE_VECTOR, "> 0"},
 588         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 589         applyIfPlatform = {"64-bit", "true"},
 590         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 591     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 592                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 593                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 594                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 595                   IRNode.STORE_VECTOR, "> 0"},
 596         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 597         applyIfPlatform = {"64-bit", "true"},
 598         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 599     // Split the load
 600     //
 601     //  0 1 - - 4 5 6 7
 602     //  | |    / / / /
 603     //  | |   / / / /
 604     //  | |  / / / /
 605     //  0 1 2 3 4 5 - -
 606     //
 607     static Object[] test2c(int[] a, int[] b, int mask) {
 608         for (int i = 0; i < RANGE; i+=8) {
 609             int b0 = a[i+0] & mask;
 610             int b1 = a[i+1] & mask;
 611 
 612             int b4 = a[i+4] & mask;
 613             int b5 = a[i+5] & mask;
 614             int b6 = a[i+6] & mask;
 615             int b7 = a[i+7] & mask;
 616 
 617             b[i+0] = b0;
 618             b[i+1] = b1;
 619             b[i+2] = b4;
 620             b[i+3] = b5;
 621             b[i+4] = b6;
 622             b[i+5] = b7;
 623             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 624             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 625             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 626             // -> vectorize                                  -> no vectorization
 627         }
 628         return new Object[]{ a, b };
 629     }
 630 
 631     @Test
 632     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 633                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 634                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 635                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 636                   IRNode.STORE_VECTOR, "> 0"},
 637         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 638         applyIfPlatform = {"64-bit", "true"},
 639         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 640     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 641                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 642                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 643                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 644                   IRNode.STORE_VECTOR, "> 0"},
 645         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 646         applyIfPlatform = {"64-bit", "true"},
 647         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 648     // Split the load
 649     //
 650     //  0 1 2 3 - - 6 7
 651     //  | | | |    / /
 652     //  | | | |   / /
 653     //  | | | |  / /
 654     //  0 1 2 3 4 5 - -
 655     //
 656     static Object[] test2d(int[] a, int[] b, int mask) {
 657         for (int i = 0; i < RANGE; i+=8) {
 658             int b0 = a[i+0] & mask;
 659             int b1 = a[i+1] & mask;
 660             int b2 = a[i+2] & mask;
 661             int b3 = a[i+3] & mask;
 662 
 663             int b6 = a[i+6] & mask;
 664             int b7 = a[i+7] & mask;
 665 
 666             b[i+0] = b0;
 667             b[i+1] = b1;
 668             b[i+2] = b2;
 669             b[i+3] = b3;
 670             b[i+4] = b6;
 671             b[i+5] = b7;
 672             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 673             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 674             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 675             // -> vectorize                                  -> no vectorization
 676         }
 677         return new Object[]{ a, b };
 678     }
 679 
 680     @Test
 681     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 682                   IRNode.STORE_VECTOR, "> 0"},
 683         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 684         applyIfPlatform = {"64-bit", "true"},
 685         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 686     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 687                   IRNode.STORE_VECTOR, "> 0"},
 688         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 689         applyIfPlatform = {"64-bit", "true"},
 690         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 691     // 0 1 2 3 4 5 6 7 -
 692     // | | | | | | | |
 693     // | + + + | | | |
 694     // |       | | | |
 695     // |     v | | | | v
 696     // |     | | | | | |
 697     // 1 - - 3 4 5 6 7 8
 698     static Object[] test3a(short[] a, short[] b, short val) {
 699         int sum = 0;
 700         for (int i = 0; i < RANGE; i+=16) {
 701             short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
 702 
 703             short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
 704             short a2 = a[i+2];
 705             short a3 = a[i+3];
 706 
 707             short a4 = a[i+4]; // 4-pack
 708             short a5 = a[i+5];
 709             short a6 = a[i+6];
 710             short a7 = a[i+7];
 711 
 712 
 713             b[i+0] = a0; // required for alignment / offsets, technical limitation.
 714 
 715             sum += a1 + a2 + a3; // not packed
 716 
 717             b[i+3] = val; // adjacent to 4-pack but needs to be split off
 718 
 719             b[i+4] = a4; // 4-pack
 720             b[i+5] = a5;
 721             b[i+6] = a6;
 722             b[i+7] = a7;
 723 
 724             b[i+8] = val; // adjacent to 4-pack but needs to be split off
 725 
 726             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 727             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 728             // adr = base + 16 + 8 + 32*i  ->  always        adr = base + 12 + 8 + 32*i  ->  never
 729             // -> vectorize                                  -> no vectorization
 730         }
 731         return new Object[]{ a, b, new int[]{ sum } };
 732     }
 733 
 734     @Test
 735     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 736                   IRNode.STORE_VECTOR, "> 0",
 737                   ".*multiversion.*", "= 0"},
 738         phase = CompilePhase.PRINT_IDEAL,
 739         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 740         applyIfPlatform = {"64-bit", "true"},
 741         applyIfCPUFeatureOr = {"sse4.1", "true"})
 742     // Cyclic dependency with distance 2 -> split into 2-packs
 743     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 744                   IRNode.STORE_VECTOR, "> 0",
 745                   ".*multiversion.*", "= 0"},
 746         phase = CompilePhase.PRINT_IDEAL,
 747         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
 748         applyIfPlatform = {"64-bit", "true"},
 749         applyIfCPUFeatureOr = {"sse4.1", "true"})

1109 
1110             b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
1111             b[i+ 9] = (short)(a[i+ 9] + val);
1112             b[i+10] = (short)(a[i+10] + val);
1113             b[i+11] = (short)(a[i+11] + val);
1114 
1115             b[i+12] = (short)(a[i+12] + val); // 2-pack
1116             b[i+13] = (short)(a[i+13] + val);
1117 
1118             b[i+14] = (short)(a[i+14] + val);
1119         }
1120         return new Object[]{ a, b };
1121     }
1122 
1123     @Test
1124     @IR(counts = {IRNode.LOAD_VECTOR_I,   IRNode.VECTOR_SIZE_4, "> 0",
1125                   IRNode.MUL_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1126                   IRNode.AND_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1127                   IRNode.ADD_VI,          IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1128                   IRNode.ADD_REDUCTION_V,                       "> 0"},
1129         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1130         applyIfPlatform = {"64-bit", "true"},
1131         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1132     @IR(counts = {IRNode.LOAD_VECTOR_I,   IRNode.VECTOR_SIZE_4, "> 0",
1133                   IRNode.MUL_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1134                   IRNode.AND_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1135                   IRNode.ADD_VI,          IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1136                   IRNode.ADD_REDUCTION_V,                       "> 0"},
1137         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
1138         applyIfPlatform = {"64-bit", "true"},
1139         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1140     // Split packs including reductions
1141     static Object[] test6a(int[] a, int[] b) {
1142         int s = 0;
1143         for (int i = 0; i < RANGE; i+=8) {
1144             s += a[i+0] * b[i+0];
1145             s += a[i+1] * b[i+1];
1146             s += a[i+2] * b[i+2];
1147             s += a[i+3] * b[i+3];
1148 
1149             s += a[i+4] & b[i+4];
1150             s += a[i+5] & b[i+5];
1151             s += a[i+6] & b[i+6];
1152             s += a[i+7] & b[i+7];
1153             // With AlignVector, we need 8-byte alignment of vector loads/stores.
1154             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
1155             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
1156             // -> vectorize                                  -> no vectorization
1157         }
1158         return new Object[]{ a, b, new int[]{ s } };
1159     }
1160 
1161     @Test
1162     @IR(counts = {IRNode.LOAD_VECTOR_I,  "> 0",
1163                   IRNode.MUL_VI,         "> 0",
1164                   IRNode.POPULATE_INDEX, "> 0"},
1165         applyIfPlatform = {"64-bit", "true"},
1166         applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1167     // Index Populate:
1168     // There can be an issue when all the (iv + 1), (iv + 2), ...
1169     // get packed, but not (iv). Then we have a pack that is one element
1170     // too short, and we start splitting everything in a bad way.
1171     static Object[] test7a(int[] a, int[] b) {
1172         for (int i = 0; i < RANGE; i++) {
1173             a[i] = b[i] * i;
1174         }
1175         return new Object[]{ a, b };
1176     }

 283             }
 284         }
 285     }
 286 
 287     static void verifyL(String name, int i, long[] g, long[] r) {
 288         for (int j = 0; j < g.length; j++) {
 289             if (g[j] != r[j]) {
 290                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 291                                            " gold[" + i + "][" + j + "] = " + g[j] +
 292                                            " result[" + i + "][" + j + "] = " + r[j]);
 293             }
 294         }
 295     }
 296 
 297     @Test
 298     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 299                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 300                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 301                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 302                   IRNode.STORE_VECTOR, "> 0"},
 303         applyIf = {"MaxVectorSize", ">=32"},








 304         applyIfPlatform = {"64-bit", "true"},
 305         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 306     // Load and store are already split
 307     //
 308     //  0 1 - - 4 5 6 7
 309     //  | |     | | | |
 310     //  0 1 - - 4 5 6 7
 311     static Object[] test0(int[] a, int[] b, int mask) {
 312         for (int i = 0; i < RANGE; i+=8) {
 313             int b0 = a[i+0] & mask;
 314             int b1 = a[i+1] & mask;
 315 
 316             int b4 = a[i+4] & mask;
 317             int b5 = a[i+5] & mask;
 318             int b6 = a[i+6] & mask;
 319             int b7 = a[i+7] & mask;
 320 
 321             b[i+0] = b0;
 322             b[i+1] = b1;
 323 
 324             b[i+4] = b4;
 325             b[i+5] = b5;
 326             b[i+6] = b6;
 327             b[i+7] = b7;




 328         }
 329         return new Object[]{ a, b };
 330     }
 331 
 332     @Test
 333     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 334                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 335                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 336                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 337                   IRNode.STORE_VECTOR, "> 0"},
 338         applyIf = {"MaxVectorSize", ">=32"},








 339         applyIfPlatform = {"64-bit", "true"},
 340         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 341     // Adjacent Load and Store, but split by Add/Mul
 342     static Object[] test1a(int[] a, int[] b, int mask) {
 343         for (int i = 0; i < RANGE; i+=8) {
 344             b[i+0] = a[i+0] + mask; // Add
 345             b[i+1] = a[i+1] + mask;
 346             b[i+2] = a[i+2] + mask;
 347             b[i+3] = a[i+3] + mask;
 348 
 349             b[i+4] = a[i+4] * mask; // Mul
 350             b[i+5] = a[i+5] * mask;




 351         }
 352         return new Object[]{ a, b };
 353     }
 354 
 355     @Test
 356     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 357                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 358                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 359                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 360                   IRNode.STORE_VECTOR, "> 0"},
 361         applyIf = {"MaxVectorSize", ">=32"},








 362         applyIfPlatform = {"64-bit", "true"},
 363         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 364     // Adjacent Load and Store, but split by Add/Mul
 365     static Object[] test1b(int[] a, int[] b, int mask) {
 366         for (int i = 0; i < RANGE; i+=8) {
 367             b[i+0] = a[i+0] * mask; // Mul
 368             b[i+1] = a[i+1] * mask;
 369             b[i+2] = a[i+2] * mask;
 370             b[i+3] = a[i+3] * mask;
 371 
 372             b[i+4] = a[i+4] + mask; // Add
 373             b[i+5] = a[i+5] + mask;




 374         }
 375         return new Object[]{ a, b };
 376     }
 377 
 378     @Test
 379     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 380                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 381                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 382                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 383                   IRNode.STORE_VECTOR, "> 0"},
 384         applyIf = {"MaxVectorSize", ">=32"},








 385         applyIfPlatform = {"64-bit", "true"},
 386         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 387     // Adjacent Load and Store, but split by Add/Mul
 388     static Object[] test1c(int[] a, int[] b, int mask) {
 389         for (int i = 0; i < RANGE; i+=8) {
 390             b[i+0] = a[i+0] + mask; // Add
 391             b[i+1] = a[i+1] + mask;
 392 
 393             b[i+2] = a[i+2] * mask; // Mul
 394             b[i+3] = a[i+3] * mask;
 395             b[i+4] = a[i+4] * mask;
 396             b[i+5] = a[i+5] * mask;




 397         }
 398         return new Object[]{ a, b };
 399     }
 400 
 401     @Test
 402     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 403                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 404                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 405                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 406                   IRNode.STORE_VECTOR, "> 0"},
 407         applyIf = {"MaxVectorSize", ">=32"},








 408         applyIfPlatform = {"64-bit", "true"},
 409         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 410     // Adjacent Load and Store, but split by Add/Mul
 411     static Object[] test1d(int[] a, int[] b, int mask) {
 412         for (int i = 0; i < RANGE; i+=8) {
 413             b[i+0] = a[i+0] * mask; // Mul
 414             b[i+1] = a[i+1] * mask;
 415 
 416             b[i+2] = a[i+2] + mask; // Add
 417             b[i+3] = a[i+3] + mask;
 418             b[i+4] = a[i+4] + mask;
 419             b[i+5] = a[i+5] + mask;




 420         }
 421         return new Object[]{ a, b };
 422     }
 423 
 424     @Test
 425     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 426                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 427                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 428                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 429                   IRNode.STORE_VECTOR, "> 0"},
 430         applyIf = {"MaxVectorSize", ">=32"},








 431         applyIfPlatform = {"64-bit", "true"},
 432         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 433     // Split the load
 434     //
 435     //  0 1 2 3 4 5 - -
 436     //  | |  \ \ \ \
 437     //  | |   \ \ \ \
 438     //  | |    \ \ \ \
 439     //  0 1 - - 4 5 6 7
 440     //
 441     static Object[] test2a(int[] a, int[] b, int mask) {
 442         for (int i = 0; i < RANGE; i+=8) {
 443             int b0 = a[i+0] & mask;
 444             int b1 = a[i+1] & mask;
 445             int b2 = a[i+2] & mask;
 446             int b3 = a[i+3] & mask;
 447             int b4 = a[i+4] & mask;
 448             int b5 = a[i+5] & mask;
 449 
 450             b[i+0] = b0;
 451             b[i+1] = b1;
 452 
 453             b[i+4] = b2;
 454             b[i+5] = b3;
 455             b[i+6] = b4;
 456             b[i+7] = b5;




 457         }
 458         return new Object[]{ a, b };
 459     }
 460 
 461     @Test
 462     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 463                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 464                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 465                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 466                   IRNode.STORE_VECTOR, "> 0"},
 467         applyIf = {"MaxVectorSize", ">=32"},








 468         applyIfPlatform = {"64-bit", "true"},
 469         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 470     // Split the load
 471     //
 472     //  0 1 2 3 4 5 - -
 473     //  | | | |  \ \
 474     //  | | | |   \ \
 475     //  | | | |    \ \
 476     //  0 1 2 3 -- 6 7
 477     //
 478     static Object[] test2b(int[] a, int[] b, int mask) {
 479         for (int i = 0; i < RANGE; i+=8) {
 480             int b0 = a[i+0] & mask;
 481             int b1 = a[i+1] & mask;
 482             int b2 = a[i+2] & mask;
 483             int b3 = a[i+3] & mask;
 484             int b4 = a[i+4] & mask;
 485             int b5 = a[i+5] & mask;
 486 
 487             b[i+0] = b0;
 488             b[i+1] = b1;
 489             b[i+2] = b2;
 490             b[i+3] = b3;
 491 
 492             b[i+6] = b4;
 493             b[i+7] = b5;




 494         }
 495         return new Object[]{ a, b };
 496     }
 497 
 498     @Test
 499     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 500                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 501                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 502                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 503                   IRNode.STORE_VECTOR, "> 0"},
 504         applyIf = {"MaxVectorSize", ">=32"},








 505         applyIfPlatform = {"64-bit", "true"},
 506         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 507     // Split the load
 508     //
 509     //  0 1 - - 4 5 6 7
 510     //  | |    / / / /
 511     //  | |   / / / /
 512     //  | |  / / / /
 513     //  0 1 2 3 4 5 - -
 514     //
 515     static Object[] test2c(int[] a, int[] b, int mask) {
 516         for (int i = 0; i < RANGE; i+=8) {
 517             int b0 = a[i+0] & mask;
 518             int b1 = a[i+1] & mask;
 519 
 520             int b4 = a[i+4] & mask;
 521             int b5 = a[i+5] & mask;
 522             int b6 = a[i+6] & mask;
 523             int b7 = a[i+7] & mask;
 524 
 525             b[i+0] = b0;
 526             b[i+1] = b1;
 527             b[i+2] = b4;
 528             b[i+3] = b5;
 529             b[i+4] = b6;
 530             b[i+5] = b7;




 531         }
 532         return new Object[]{ a, b };
 533     }
 534 
 535     @Test
 536     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 537                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 538                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 539                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 540                   IRNode.STORE_VECTOR, "> 0"},
 541         applyIf = {"MaxVectorSize", ">=32"},








 542         applyIfPlatform = {"64-bit", "true"},
 543         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 544     // Split the load
 545     //
 546     //  0 1 2 3 - - 6 7
 547     //  | | | |    / /
 548     //  | | | |   / /
 549     //  | | | |  / /
 550     //  0 1 2 3 4 5 - -
 551     //
 552     static Object[] test2d(int[] a, int[] b, int mask) {
 553         for (int i = 0; i < RANGE; i+=8) {
 554             int b0 = a[i+0] & mask;
 555             int b1 = a[i+1] & mask;
 556             int b2 = a[i+2] & mask;
 557             int b3 = a[i+3] & mask;
 558 
 559             int b6 = a[i+6] & mask;
 560             int b7 = a[i+7] & mask;
 561 
 562             b[i+0] = b0;
 563             b[i+1] = b1;
 564             b[i+2] = b2;
 565             b[i+3] = b3;
 566             b[i+4] = b6;
 567             b[i+5] = b7;




 568         }
 569         return new Object[]{ a, b };
 570     }
 571 
 572     @Test
 573     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 574                   IRNode.STORE_VECTOR, "> 0"},
 575         applyIf = {"MaxVectorSize", ">=32"},





 576         applyIfPlatform = {"64-bit", "true"},
 577         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 578     // 0 1 2 3 4 5 6 7 -
 579     // | | | | | | | |
 580     // | + + + | | | |
 581     // |       | | | |
 582     // |     v | | | | v
 583     // |     | | | | | |
 584     // 1 - - 3 4 5 6 7 8
 585     static Object[] test3a(short[] a, short[] b, short val) {
 586         int sum = 0;
 587         for (int i = 0; i < RANGE; i+=16) {
 588             short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
 589 
 590             short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
 591             short a2 = a[i+2];
 592             short a3 = a[i+3];
 593 
 594             short a4 = a[i+4]; // 4-pack
 595             short a5 = a[i+5];
 596             short a6 = a[i+6];
 597             short a7 = a[i+7];
 598 
 599 
 600             b[i+0] = a0; // required for alignment / offsets, technical limitation.
 601 
 602             sum += a1 + a2 + a3; // not packed
 603 
 604             b[i+3] = val; // adjacent to 4-pack but needs to be split off
 605 
 606             b[i+4] = a4; // 4-pack
 607             b[i+5] = a5;
 608             b[i+6] = a6;
 609             b[i+7] = a7;
 610 
 611             b[i+8] = val; // adjacent to 4-pack but needs to be split off





 612         }
 613         return new Object[]{ a, b, new int[]{ sum } };
 614     }
 615 
 616     @Test
 617     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 618                   IRNode.STORE_VECTOR, "> 0",
 619                   ".*multiversion.*", "= 0"},
 620         phase = CompilePhase.PRINT_IDEAL,
 621         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 622         applyIfPlatform = {"64-bit", "true"},
 623         applyIfCPUFeatureOr = {"sse4.1", "true"})
 624     // Cyclic dependency with distance 2 -> split into 2-packs
 625     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 626                   IRNode.STORE_VECTOR, "> 0",
 627                   ".*multiversion.*", "= 0"},
 628         phase = CompilePhase.PRINT_IDEAL,
 629         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
 630         applyIfPlatform = {"64-bit", "true"},
 631         applyIfCPUFeatureOr = {"sse4.1", "true"})

 991 
 992             b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
 993             b[i+ 9] = (short)(a[i+ 9] + val);
 994             b[i+10] = (short)(a[i+10] + val);
 995             b[i+11] = (short)(a[i+11] + val);
 996 
 997             b[i+12] = (short)(a[i+12] + val); // 2-pack
 998             b[i+13] = (short)(a[i+13] + val);
 999 
1000             b[i+14] = (short)(a[i+14] + val);
1001         }
1002         return new Object[]{ a, b };
1003     }
1004 
1005     @Test
1006     @IR(counts = {IRNode.LOAD_VECTOR_I,   IRNode.VECTOR_SIZE_4, "> 0",
1007                   IRNode.MUL_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1008                   IRNode.AND_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1009                   IRNode.ADD_VI,          IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1010                   IRNode.ADD_REDUCTION_V,                       "> 0"},
1011         applyIf = {"MaxVectorSize", ">=32"},








1012         applyIfPlatform = {"64-bit", "true"},
1013         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1014     // Split packs including reductions
1015     static Object[] test6a(int[] a, int[] b) {
1016         int s = 0;
1017         for (int i = 0; i < RANGE; i+=8) {
1018             s += a[i+0] * b[i+0];
1019             s += a[i+1] * b[i+1];
1020             s += a[i+2] * b[i+2];
1021             s += a[i+3] * b[i+3];
1022 
1023             s += a[i+4] & b[i+4];
1024             s += a[i+5] & b[i+5];
1025             s += a[i+6] & b[i+6];
1026             s += a[i+7] & b[i+7];




1027         }
1028         return new Object[]{ a, b, new int[]{ s } };
1029     }
1030 
1031     @Test
1032     @IR(counts = {IRNode.LOAD_VECTOR_I,  "> 0",
1033                   IRNode.MUL_VI,         "> 0",
1034                   IRNode.POPULATE_INDEX, "> 0"},
1035         applyIfPlatform = {"64-bit", "true"},
1036         applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1037     // Index Populate:
1038     // There can be an issue when all the (iv + 1), (iv + 2), ...
1039     // get packed, but not (iv). Then we have a pack that is one element
1040     // too short, and we start splitting everything in a bad way.
1041     static Object[] test7a(int[] a, int[] b) {
1042         for (int i = 0; i < RANGE; i++) {
1043             a[i] = b[i] * i;
1044         }
1045         return new Object[]{ a, b };
1046     }
< prev index next >