< prev index next >

test/hotspot/jtreg/compiler/loopopts/superword/TestSplitPacks.java

Print this page

 284             }
 285         }
 286     }
 287 
 288     static void verifyL(String name, int i, long[] g, long[] r) {
 289         for (int j = 0; j < g.length; j++) {
 290             if (g[j] != r[j]) {
 291                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 292                                            " gold[" + i + "][" + j + "] = " + g[j] +
 293                                            " result[" + i + "][" + j + "] = " + r[j]);
 294             }
 295         }
 296     }
 297 
 298     @Test
 299     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 300                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 301                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 302                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 303                   IRNode.STORE_VECTOR, "> 0"},
 304         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 305         applyIfPlatform = {"64-bit", "true"},
 306         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 307     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 308                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 309                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 310                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 311                   IRNode.STORE_VECTOR, "> 0"},
 312         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 313         applyIfPlatform = {"64-bit", "true"},
 314         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 315     // Load and store are already split
 316     //
 317     //  0 1 - - 4 5 6 7
 318     //  | |     | | | |
 319     //  0 1 - - 4 5 6 7
 320     static Object[] test0(int[] a, int[] b, int mask) {
 321         for (int i = 0; i < RANGE; i+=8) {
 322             int b0 = a[i+0] & mask;
 323             int b1 = a[i+1] & mask;
 324 
 325             int b4 = a[i+4] & mask;
 326             int b5 = a[i+5] & mask;
 327             int b6 = a[i+6] & mask;
 328             int b7 = a[i+7] & mask;
 329 
 330             b[i+0] = b0;
 331             b[i+1] = b1;
 332 
 333             b[i+4] = b4;
 334             b[i+5] = b5;
 335             b[i+6] = b6;
 336             b[i+7] = b7;
 337             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 338             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 339             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 340             // -> vectorize                                  -> no vectorization
 341         }
 342         return new Object[]{ a, b };
 343     }
 344 
 345     @Test
 346     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 347                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 348                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 349                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 350                   IRNode.STORE_VECTOR, "> 0"},
 351         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 352         applyIfPlatform = {"64-bit", "true"},
 353         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 354     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 355                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 356                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 357                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 358                   IRNode.STORE_VECTOR, "> 0"},
 359         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 360         applyIfPlatform = {"64-bit", "true"},
 361         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 362     // Adjacent Load and Store, but split by Add/Mul
 363     static Object[] test1a(int[] a, int[] b, int mask) {
 364         for (int i = 0; i < RANGE; i+=8) {
 365             b[i+0] = a[i+0] + mask; // Add
 366             b[i+1] = a[i+1] + mask;
 367             b[i+2] = a[i+2] + mask;
 368             b[i+3] = a[i+3] + mask;
 369 
 370             b[i+4] = a[i+4] * mask; // Mul
 371             b[i+5] = a[i+5] * mask;
 372             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 373             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 374             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 375             // -> vectorize                                  -> no vectorization
 376         }
 377         return new Object[]{ a, b };
 378     }
 379 
 380     @Test
 381     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 382                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 383                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 384                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 385                   IRNode.STORE_VECTOR, "> 0"},
 386         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 387         applyIfPlatform = {"64-bit", "true"},
 388         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 389     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 390                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 391                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 392                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 393                   IRNode.STORE_VECTOR, "> 0"},
 394         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 395         applyIfPlatform = {"64-bit", "true"},
 396         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 397     // Adjacent Load and Store, but split by Add/Mul
 398     static Object[] test1b(int[] a, int[] b, int mask) {
 399         for (int i = 0; i < RANGE; i+=8) {
 400             b[i+0] = a[i+0] * mask; // Mul
 401             b[i+1] = a[i+1] * mask;
 402             b[i+2] = a[i+2] * mask;
 403             b[i+3] = a[i+3] * mask;
 404 
 405             b[i+4] = a[i+4] + mask; // Add
 406             b[i+5] = a[i+5] + mask;
 407             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 408             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 409             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 410             // -> vectorize                                  -> no vectorization
 411         }
 412         return new Object[]{ a, b };
 413     }
 414 
 415     @Test
 416     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 417                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 418                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 419                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 420                   IRNode.STORE_VECTOR, "> 0"},
 421         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 422         applyIfPlatform = {"64-bit", "true"},
 423         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 424     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 425                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 426                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 427                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 428                   IRNode.STORE_VECTOR, "> 0"},
 429         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 430         applyIfPlatform = {"64-bit", "true"},
 431         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 432     // Adjacent Load and Store, but split by Add/Mul
 433     static Object[] test1c(int[] a, int[] b, int mask) {
 434         for (int i = 0; i < RANGE; i+=8) {
 435             b[i+0] = a[i+0] + mask; // Add
 436             b[i+1] = a[i+1] + mask;
 437 
 438             b[i+2] = a[i+2] * mask; // Mul
 439             b[i+3] = a[i+3] * mask;
 440             b[i+4] = a[i+4] * mask;
 441             b[i+5] = a[i+5] * mask;
 442             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 443             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 444             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 445             // -> vectorize                                  -> no vectorization
 446         }
 447         return new Object[]{ a, b };
 448     }
 449 
 450     @Test
 451     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 452                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 453                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 454                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 455                   IRNode.STORE_VECTOR, "> 0"},
 456         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 457         applyIfPlatform = {"64-bit", "true"},
 458         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 459     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 460                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 461                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 462                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 463                   IRNode.STORE_VECTOR, "> 0"},
 464         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 465         applyIfPlatform = {"64-bit", "true"},
 466         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 467     // Adjacent Load and Store, but split by Add/Mul
 468     static Object[] test1d(int[] a, int[] b, int mask) {
 469         for (int i = 0; i < RANGE; i+=8) {
 470             b[i+0] = a[i+0] * mask; // Mul
 471             b[i+1] = a[i+1] * mask;
 472 
 473             b[i+2] = a[i+2] + mask; // Add
 474             b[i+3] = a[i+3] + mask;
 475             b[i+4] = a[i+4] + mask;
 476             b[i+5] = a[i+5] + mask;
 477             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 478             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 479             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 480             // -> vectorize                                  -> no vectorization
 481         }
 482         return new Object[]{ a, b };
 483     }
 484 
 485     @Test
 486     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 487                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 488                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 489                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 490                   IRNode.STORE_VECTOR, "> 0"},
 491         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 492         applyIfPlatform = {"64-bit", "true"},
 493         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 494     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 495                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 496                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 497                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 498                   IRNode.STORE_VECTOR, "> 0"},
 499         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 500         applyIfPlatform = {"64-bit", "true"},
 501         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 502     // Split the load
 503     //
 504     //  0 1 2 3 4 5 - -
 505     //  | |  \ \ \ \
 506     //  | |   \ \ \ \
 507     //  | |    \ \ \ \
 508     //  0 1 - - 4 5 6 7
 509     //
 510     static Object[] test2a(int[] a, int[] b, int mask) {
 511         for (int i = 0; i < RANGE; i+=8) {
 512             int b0 = a[i+0] & mask;
 513             int b1 = a[i+1] & mask;
 514             int b2 = a[i+2] & mask;
 515             int b3 = a[i+3] & mask;
 516             int b4 = a[i+4] & mask;
 517             int b5 = a[i+5] & mask;
 518 
 519             b[i+0] = b0;
 520             b[i+1] = b1;
 521 
 522             b[i+4] = b2;
 523             b[i+5] = b3;
 524             b[i+6] = b4;
 525             b[i+7] = b5;
 526             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 527             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 528             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 529             // -> vectorize                                  -> no vectorization
 530         }
 531         return new Object[]{ a, b };
 532     }
 533 
 534     @Test
 535     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 536                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 537                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 538                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 539                   IRNode.STORE_VECTOR, "> 0"},
 540         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 541         applyIfPlatform = {"64-bit", "true"},
 542         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 543     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 544                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 545                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 546                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 547                   IRNode.STORE_VECTOR, "> 0"},
 548         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 549         applyIfPlatform = {"64-bit", "true"},
 550         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 551     // Split the load
 552     //
 553     //  0 1 2 3 4 5 - -
 554     //  | | | |  \ \
 555     //  | | | |   \ \
 556     //  | | | |    \ \
 557     //  0 1 2 3 -- 6 7
 558     //
 559     static Object[] test2b(int[] a, int[] b, int mask) {
 560         for (int i = 0; i < RANGE; i+=8) {
 561             int b0 = a[i+0] & mask;
 562             int b1 = a[i+1] & mask;
 563             int b2 = a[i+2] & mask;
 564             int b3 = a[i+3] & mask;
 565             int b4 = a[i+4] & mask;
 566             int b5 = a[i+5] & mask;
 567 
 568             b[i+0] = b0;
 569             b[i+1] = b1;
 570             b[i+2] = b2;
 571             b[i+3] = b3;
 572 
 573             b[i+6] = b4;
 574             b[i+7] = b5;
 575             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 576             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 577             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 578             // -> vectorize                                  -> no vectorization
 579         }
 580         return new Object[]{ a, b };
 581     }
 582 
 583     @Test
 584     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 585                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 586                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 587                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 588                   IRNode.STORE_VECTOR, "> 0"},
 589         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 590         applyIfPlatform = {"64-bit", "true"},
 591         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 592     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 593                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 594                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 595                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 596                   IRNode.STORE_VECTOR, "> 0"},
 597         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 598         applyIfPlatform = {"64-bit", "true"},
 599         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 600     // Split the load
 601     //
 602     //  0 1 - - 4 5 6 7
 603     //  | |    / / / /
 604     //  | |   / / / /
 605     //  | |  / / / /
 606     //  0 1 2 3 4 5 - -
 607     //
 608     static Object[] test2c(int[] a, int[] b, int mask) {
 609         for (int i = 0; i < RANGE; i+=8) {
 610             int b0 = a[i+0] & mask;
 611             int b1 = a[i+1] & mask;
 612 
 613             int b4 = a[i+4] & mask;
 614             int b5 = a[i+5] & mask;
 615             int b6 = a[i+6] & mask;
 616             int b7 = a[i+7] & mask;
 617 
 618             b[i+0] = b0;
 619             b[i+1] = b1;
 620             b[i+2] = b4;
 621             b[i+3] = b5;
 622             b[i+4] = b6;
 623             b[i+5] = b7;
 624             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 625             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 626             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 627             // -> vectorize                                  -> no vectorization
 628         }
 629         return new Object[]{ a, b };
 630     }
 631 
 632     @Test
 633     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 634                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 635                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 636                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 637                   IRNode.STORE_VECTOR, "> 0"},
 638         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 639         applyIfPlatform = {"64-bit", "true"},
 640         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 641     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 642                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 643                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 644                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 645                   IRNode.STORE_VECTOR, "> 0"},
 646         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 647         applyIfPlatform = {"64-bit", "true"},
 648         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 649     // Split the load
 650     //
 651     //  0 1 2 3 - - 6 7
 652     //  | | | |    / /
 653     //  | | | |   / /
 654     //  | | | |  / /
 655     //  0 1 2 3 4 5 - -
 656     //
 657     static Object[] test2d(int[] a, int[] b, int mask) {
 658         for (int i = 0; i < RANGE; i+=8) {
 659             int b0 = a[i+0] & mask;
 660             int b1 = a[i+1] & mask;
 661             int b2 = a[i+2] & mask;
 662             int b3 = a[i+3] & mask;
 663 
 664             int b6 = a[i+6] & mask;
 665             int b7 = a[i+7] & mask;
 666 
 667             b[i+0] = b0;
 668             b[i+1] = b1;
 669             b[i+2] = b2;
 670             b[i+3] = b3;
 671             b[i+4] = b6;
 672             b[i+5] = b7;
 673             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 674             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 675             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
 676             // -> vectorize                                  -> no vectorization
 677         }
 678         return new Object[]{ a, b };
 679     }
 680 
 681     @Test
 682     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 683                   IRNode.STORE_VECTOR, "> 0"},
 684         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
 685         applyIfPlatform = {"64-bit", "true"},
 686         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 687     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 688                   IRNode.STORE_VECTOR, "> 0"},
 689         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
 690         applyIfPlatform = {"64-bit", "true"},
 691         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 692     // 0 1 2 3 4 5 6 7 -
 693     // | | | | | | | |
 694     // | + + + | | | |
 695     // |       | | | |
 696     // |     v | | | | v
 697     // |     | | | | | |
 698     // 1 - - 3 4 5 6 7 8
 699     static Object[] test3a(short[] a, short[] b, short val) {
 700         int sum = 0;
 701         for (int i = 0; i < RANGE; i+=16) {
 702             short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
 703 
 704             short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
 705             short a2 = a[i+2];
 706             short a3 = a[i+3];
 707 
 708             short a4 = a[i+4]; // 4-pack
 709             short a5 = a[i+5];
 710             short a6 = a[i+6];
 711             short a7 = a[i+7];
 712 
 713 
 714             b[i+0] = a0; // required for alignment / offsets, technical limitation.
 715 
 716             sum += a1 + a2 + a3; // not packed
 717 
 718             b[i+3] = val; // adjacent to 4-pack but needs to be split off
 719 
 720             b[i+4] = a4; // 4-pack
 721             b[i+5] = a5;
 722             b[i+6] = a6;
 723             b[i+7] = a7;
 724 
 725             b[i+8] = val; // adjacent to 4-pack but needs to be split off
 726 
 727             // With AlignVector, we need 8-byte alignment of vector loads/stores.
 728             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
 729             // adr = base + 16 + 8 + 32*i  ->  always        adr = base + 12 + 8 + 32*i  ->  never
 730             // -> vectorize                                  -> no vectorization
 731         }
 732         return new Object[]{ a, b, new int[]{ sum } };
 733     }
 734 
 735     @Test
 736     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 737                   IRNode.STORE_VECTOR, "> 0",
 738                   ".*multiversion.*", "= 0"},
 739         phase = CompilePhase.PRINT_IDEAL,
 740         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 741         applyIfPlatform = {"64-bit", "true"},
 742         applyIfCPUFeatureOr = {"sse4.1", "true"})
 743     // Cyclic dependency with distance 2 -> split into 2-packs
 744     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 745                   IRNode.STORE_VECTOR, "> 0",
 746                   ".*multiversion.*", "= 0"},
 747         phase = CompilePhase.PRINT_IDEAL,
 748         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
 749         applyIfPlatform = {"64-bit", "true"},
 750         applyIfCPUFeatureOr = {"sse4.1", "true"})

1110 
1111             b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
1112             b[i+ 9] = (short)(a[i+ 9] + val);
1113             b[i+10] = (short)(a[i+10] + val);
1114             b[i+11] = (short)(a[i+11] + val);
1115 
1116             b[i+12] = (short)(a[i+12] + val); // 2-pack
1117             b[i+13] = (short)(a[i+13] + val);
1118 
1119             b[i+14] = (short)(a[i+14] + val);
1120         }
1121         return new Object[]{ a, b };
1122     }
1123 
1124     @Test
1125     @IR(counts = {IRNode.LOAD_VECTOR_I,   IRNode.VECTOR_SIZE_4, "> 0",
1126                   IRNode.MUL_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1127                   IRNode.AND_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1128                   IRNode.ADD_VI,          IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1129                   IRNode.ADD_REDUCTION_V,                       "> 0"},
1130         applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1131         applyIfPlatform = {"64-bit", "true"},
1132         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1133     @IR(counts = {IRNode.LOAD_VECTOR_I,   IRNode.VECTOR_SIZE_4, "> 0",
1134                   IRNode.MUL_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1135                   IRNode.AND_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1136                   IRNode.ADD_VI,          IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1137                   IRNode.ADD_REDUCTION_V,                       "> 0"},
1138         applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
1139         applyIfPlatform = {"64-bit", "true"},
1140         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1141     // Split packs including reductions
1142     static Object[] test6a(int[] a, int[] b) {
1143         int s = 0;
1144         for (int i = 0; i < RANGE; i+=8) {
1145             s += a[i+0] * b[i+0];
1146             s += a[i+1] * b[i+1];
1147             s += a[i+2] * b[i+2];
1148             s += a[i+3] * b[i+3];
1149 
1150             s += a[i+4] & b[i+4];
1151             s += a[i+5] & b[i+5];
1152             s += a[i+6] & b[i+6];
1153             s += a[i+7] & b[i+7];
1154             // With AlignVector, we need 8-byte alignment of vector loads/stores.
1155             // UseCompactObjectHeaders=false                 UseCompactObjectHeaders=true
1156             // adr = base + 16 + 32*i  ->  always            adr = base + 12 + 32*i  ->  never
1157             // -> vectorize                                  -> no vectorization
1158         }
1159         return new Object[]{ a, b, new int[]{ s } };
1160     }
1161 
1162     @Test
1163     @IR(counts = {IRNode.LOAD_VECTOR_I,  "> 0",
1164                   IRNode.MUL_VI,         "> 0",
1165                   IRNode.POPULATE_INDEX, "> 0"},
1166         applyIfPlatform = {"64-bit", "true"},
1167         applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1168     // Index Populate:
1169     // There can be an issue when all the (iv + 1), (iv + 2), ...
1170     // get packed, but not (iv). Then we have a pack that is one element
1171     // too short, and we start splitting everything in a bad way.
1172     static Object[] test7a(int[] a, int[] b) {
1173         for (int i = 0; i < RANGE; i++) {
1174             a[i] = b[i] * i;
1175         }
1176         return new Object[]{ a, b };
1177     }

 284             }
 285         }
 286     }
 287 
 288     static void verifyL(String name, int i, long[] g, long[] r) {
 289         for (int j = 0; j < g.length; j++) {
 290             if (g[j] != r[j]) {
 291                 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
 292                                            " gold[" + i + "][" + j + "] = " + g[j] +
 293                                            " result[" + i + "][" + j + "] = " + r[j]);
 294             }
 295         }
 296     }
 297 
 298     @Test
 299     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 300                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 301                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 302                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 303                   IRNode.STORE_VECTOR, "> 0"},
 304         applyIf = {"MaxVectorSize", ">=32"},








 305         applyIfPlatform = {"64-bit", "true"},
 306         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 307     // Load and store are already split
 308     //
 309     //  0 1 - - 4 5 6 7
 310     //  | |     | | | |
 311     //  0 1 - - 4 5 6 7
 312     static Object[] test0(int[] a, int[] b, int mask) {
 313         for (int i = 0; i < RANGE; i+=8) {
 314             int b0 = a[i+0] & mask;
 315             int b1 = a[i+1] & mask;
 316 
 317             int b4 = a[i+4] & mask;
 318             int b5 = a[i+5] & mask;
 319             int b6 = a[i+6] & mask;
 320             int b7 = a[i+7] & mask;
 321 
 322             b[i+0] = b0;
 323             b[i+1] = b1;
 324 
 325             b[i+4] = b4;
 326             b[i+5] = b5;
 327             b[i+6] = b6;
 328             b[i+7] = b7;




 329         }
 330         return new Object[]{ a, b };
 331     }
 332 
 333     @Test
 334     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 335                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 336                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 337                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 338                   IRNode.STORE_VECTOR, "> 0"},
 339         applyIf = {"MaxVectorSize", ">=32"},








 340         applyIfPlatform = {"64-bit", "true"},
 341         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 342     // Adjacent Load and Store, but split by Add/Mul
 343     static Object[] test1a(int[] a, int[] b, int mask) {
 344         for (int i = 0; i < RANGE; i+=8) {
 345             b[i+0] = a[i+0] + mask; // Add
 346             b[i+1] = a[i+1] + mask;
 347             b[i+2] = a[i+2] + mask;
 348             b[i+3] = a[i+3] + mask;
 349 
 350             b[i+4] = a[i+4] * mask; // Mul
 351             b[i+5] = a[i+5] * mask;




 352         }
 353         return new Object[]{ a, b };
 354     }
 355 
 356     @Test
 357     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 358                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 359                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 360                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 361                   IRNode.STORE_VECTOR, "> 0"},
 362         applyIf = {"MaxVectorSize", ">=32"},








 363         applyIfPlatform = {"64-bit", "true"},
 364         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 365     // Adjacent Load and Store, but split by Add/Mul
 366     static Object[] test1b(int[] a, int[] b, int mask) {
 367         for (int i = 0; i < RANGE; i+=8) {
 368             b[i+0] = a[i+0] * mask; // Mul
 369             b[i+1] = a[i+1] * mask;
 370             b[i+2] = a[i+2] * mask;
 371             b[i+3] = a[i+3] * mask;
 372 
 373             b[i+4] = a[i+4] + mask; // Add
 374             b[i+5] = a[i+5] + mask;




 375         }
 376         return new Object[]{ a, b };
 377     }
 378 
 379     @Test
 380     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 381                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 382                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 383                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 384                   IRNode.STORE_VECTOR, "> 0"},
 385         applyIf = {"MaxVectorSize", ">=32"},








 386         applyIfPlatform = {"64-bit", "true"},
 387         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 388     // Adjacent Load and Store, but split by Add/Mul
 389     static Object[] test1c(int[] a, int[] b, int mask) {
 390         for (int i = 0; i < RANGE; i+=8) {
 391             b[i+0] = a[i+0] + mask; // Add
 392             b[i+1] = a[i+1] + mask;
 393 
 394             b[i+2] = a[i+2] * mask; // Mul
 395             b[i+3] = a[i+3] * mask;
 396             b[i+4] = a[i+4] * mask;
 397             b[i+5] = a[i+5] * mask;




 398         }
 399         return new Object[]{ a, b };
 400     }
 401 
 402     @Test
 403     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 404                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 405                   IRNode.ADD_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 406                   IRNode.MUL_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 407                   IRNode.STORE_VECTOR, "> 0"},
 408         applyIf = {"MaxVectorSize", ">=32"},








 409         applyIfPlatform = {"64-bit", "true"},
 410         applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
 411     // Adjacent Load and Store, but split by Add/Mul
 412     static Object[] test1d(int[] a, int[] b, int mask) {
 413         for (int i = 0; i < RANGE; i+=8) {
 414             b[i+0] = a[i+0] * mask; // Mul
 415             b[i+1] = a[i+1] * mask;
 416 
 417             b[i+2] = a[i+2] + mask; // Add
 418             b[i+3] = a[i+3] + mask;
 419             b[i+4] = a[i+4] + mask;
 420             b[i+5] = a[i+5] + mask;




 421         }
 422         return new Object[]{ a, b };
 423     }
 424 
 425     @Test
 426     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 427                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 428                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 429                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 430                   IRNode.STORE_VECTOR, "> 0"},
 431         applyIf = {"MaxVectorSize", ">=32"},








 432         applyIfPlatform = {"64-bit", "true"},
 433         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 434     // Split the load
 435     //
 436     //  0 1 2 3 4 5 - -
 437     //  | |  \ \ \ \
 438     //  | |   \ \ \ \
 439     //  | |    \ \ \ \
 440     //  0 1 - - 4 5 6 7
 441     //
 442     static Object[] test2a(int[] a, int[] b, int mask) {
 443         for (int i = 0; i < RANGE; i+=8) {
 444             int b0 = a[i+0] & mask;
 445             int b1 = a[i+1] & mask;
 446             int b2 = a[i+2] & mask;
 447             int b3 = a[i+3] & mask;
 448             int b4 = a[i+4] & mask;
 449             int b5 = a[i+5] & mask;
 450 
 451             b[i+0] = b0;
 452             b[i+1] = b1;
 453 
 454             b[i+4] = b2;
 455             b[i+5] = b3;
 456             b[i+6] = b4;
 457             b[i+7] = b5;




 458         }
 459         return new Object[]{ a, b };
 460     }
 461 
 462     @Test
 463     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 464                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 465                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 466                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 467                   IRNode.STORE_VECTOR, "> 0"},
 468         applyIf = {"MaxVectorSize", ">=32"},








 469         applyIfPlatform = {"64-bit", "true"},
 470         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 471     // Split the load
 472     //
 473     //  0 1 2 3 4 5 - -
 474     //  | | | |  \ \
 475     //  | | | |   \ \
 476     //  | | | |    \ \
 477     //  0 1 2 3 -- 6 7
 478     //
 479     static Object[] test2b(int[] a, int[] b, int mask) {
 480         for (int i = 0; i < RANGE; i+=8) {
 481             int b0 = a[i+0] & mask;
 482             int b1 = a[i+1] & mask;
 483             int b2 = a[i+2] & mask;
 484             int b3 = a[i+3] & mask;
 485             int b4 = a[i+4] & mask;
 486             int b5 = a[i+5] & mask;
 487 
 488             b[i+0] = b0;
 489             b[i+1] = b1;
 490             b[i+2] = b2;
 491             b[i+3] = b3;
 492 
 493             b[i+6] = b4;
 494             b[i+7] = b5;




 495         }
 496         return new Object[]{ a, b };
 497     }
 498 
 499     @Test
 500     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 501                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 502                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 503                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 504                   IRNode.STORE_VECTOR, "> 0"},
 505         applyIf = {"MaxVectorSize", ">=32"},








 506         applyIfPlatform = {"64-bit", "true"},
 507         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 508     // Split the load
 509     //
 510     //  0 1 - - 4 5 6 7
 511     //  | |    / / / /
 512     //  | |   / / / /
 513     //  | |  / / / /
 514     //  0 1 2 3 4 5 - -
 515     //
 516     static Object[] test2c(int[] a, int[] b, int mask) {
 517         for (int i = 0; i < RANGE; i+=8) {
 518             int b0 = a[i+0] & mask;
 519             int b1 = a[i+1] & mask;
 520 
 521             int b4 = a[i+4] & mask;
 522             int b5 = a[i+5] & mask;
 523             int b6 = a[i+6] & mask;
 524             int b7 = a[i+7] & mask;
 525 
 526             b[i+0] = b0;
 527             b[i+1] = b1;
 528             b[i+2] = b4;
 529             b[i+3] = b5;
 530             b[i+4] = b6;
 531             b[i+5] = b7;




 532         }
 533         return new Object[]{ a, b };
 534     }
 535 
 536     @Test
 537     @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
 538                   IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
 539                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_2, "> 0",
 540                   IRNode.AND_VI,        IRNode.VECTOR_SIZE_4, "> 0",
 541                   IRNode.STORE_VECTOR, "> 0"},
 542         applyIf = {"MaxVectorSize", ">=32"},








 543         applyIfPlatform = {"64-bit", "true"},
 544         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 545     // Split the load
 546     //
 547     //  0 1 2 3 - - 6 7
 548     //  | | | |    / /
 549     //  | | | |   / /
 550     //  | | | |  / /
 551     //  0 1 2 3 4 5 - -
 552     //
 553     static Object[] test2d(int[] a, int[] b, int mask) {
 554         for (int i = 0; i < RANGE; i+=8) {
 555             int b0 = a[i+0] & mask;
 556             int b1 = a[i+1] & mask;
 557             int b2 = a[i+2] & mask;
 558             int b3 = a[i+3] & mask;
 559 
 560             int b6 = a[i+6] & mask;
 561             int b7 = a[i+7] & mask;
 562 
 563             b[i+0] = b0;
 564             b[i+1] = b1;
 565             b[i+2] = b2;
 566             b[i+3] = b3;
 567             b[i+4] = b6;
 568             b[i+5] = b7;




 569         }
 570         return new Object[]{ a, b };
 571     }
 572 
 573     @Test
 574     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
 575                   IRNode.STORE_VECTOR, "> 0"},
 576         applyIf = {"MaxVectorSize", ">=32"},





 577         applyIfPlatform = {"64-bit", "true"},
 578         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
 579     // 0 1 2 3 4 5 6 7 -
 580     // | | | | | | | |
 581     // | + + + | | | |
 582     // |       | | | |
 583     // |     v | | | | v
 584     // |     | | | | | |
 585     // 1 - - 3 4 5 6 7 8
 586     static Object[] test3a(short[] a, short[] b, short val) {
 587         int sum = 0;
 588         for (int i = 0; i < RANGE; i+=16) {
 589             short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
 590 
 591             short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
 592             short a2 = a[i+2];
 593             short a3 = a[i+3];
 594 
 595             short a4 = a[i+4]; // 4-pack
 596             short a5 = a[i+5];
 597             short a6 = a[i+6];
 598             short a7 = a[i+7];
 599 
 600 
 601             b[i+0] = a0; // required for alignment / offsets, technical limitation.
 602 
 603             sum += a1 + a2 + a3; // not packed
 604 
 605             b[i+3] = val; // adjacent to 4-pack but needs to be split off
 606 
 607             b[i+4] = a4; // 4-pack
 608             b[i+5] = a5;
 609             b[i+6] = a6;
 610             b[i+7] = a7;
 611 
 612             b[i+8] = val; // adjacent to 4-pack but needs to be split off





 613         }
 614         return new Object[]{ a, b, new int[]{ sum } };
 615     }
 616 
 617     @Test
 618     @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
 619                   IRNode.STORE_VECTOR, "> 0",
 620                   ".*multiversion.*", "= 0"},
 621         phase = CompilePhase.PRINT_IDEAL,
 622         applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
 623         applyIfPlatform = {"64-bit", "true"},
 624         applyIfCPUFeatureOr = {"sse4.1", "true"})
 625     // Cyclic dependency with distance 2 -> split into 2-packs
 626     @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
 627                   IRNode.STORE_VECTOR, "> 0",
 628                   ".*multiversion.*", "= 0"},
 629         phase = CompilePhase.PRINT_IDEAL,
 630         applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
 631         applyIfPlatform = {"64-bit", "true"},
 632         applyIfCPUFeatureOr = {"sse4.1", "true"})

 992 
 993             b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
 994             b[i+ 9] = (short)(a[i+ 9] + val);
 995             b[i+10] = (short)(a[i+10] + val);
 996             b[i+11] = (short)(a[i+11] + val);
 997 
 998             b[i+12] = (short)(a[i+12] + val); // 2-pack
 999             b[i+13] = (short)(a[i+13] + val);
1000 
1001             b[i+14] = (short)(a[i+14] + val);
1002         }
1003         return new Object[]{ a, b };
1004     }
1005 
1006     @Test
1007     @IR(counts = {IRNode.LOAD_VECTOR_I,   IRNode.VECTOR_SIZE_4, "> 0",
1008                   IRNode.MUL_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1009                   IRNode.AND_VI,          IRNode.VECTOR_SIZE_4, "> 0",
1010                   IRNode.ADD_VI,          IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1011                   IRNode.ADD_REDUCTION_V,                       "> 0"},
1012         applyIf = {"MaxVectorSize", ">=32"},








1013         applyIfPlatform = {"64-bit", "true"},
1014         applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1015     // Split packs including reductions
1016     static Object[] test6a(int[] a, int[] b) {
1017         int s = 0;
1018         for (int i = 0; i < RANGE; i+=8) {
1019             s += a[i+0] * b[i+0];
1020             s += a[i+1] * b[i+1];
1021             s += a[i+2] * b[i+2];
1022             s += a[i+3] * b[i+3];
1023 
1024             s += a[i+4] & b[i+4];
1025             s += a[i+5] & b[i+5];
1026             s += a[i+6] & b[i+6];
1027             s += a[i+7] & b[i+7];




1028         }
1029         return new Object[]{ a, b, new int[]{ s } };
1030     }
1031 
1032     @Test
1033     @IR(counts = {IRNode.LOAD_VECTOR_I,  "> 0",
1034                   IRNode.MUL_VI,         "> 0",
1035                   IRNode.POPULATE_INDEX, "> 0"},
1036         applyIfPlatform = {"64-bit", "true"},
1037         applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1038     // Index Populate:
1039     // There can be an issue when all the (iv + 1), (iv + 2), ...
1040     // get packed, but not (iv). Then we have a pack that is one element
1041     // too short, and we start splitting everything in a bad way.
1042     static Object[] test7a(int[] a, int[] b) {
1043         for (int i = 0; i < RANGE; i++) {
1044             a[i] = b[i] * i;
1045         }
1046         return new Object[]{ a, b };
1047     }
< prev index next >