284 }
285 }
286 }
287
288 static void verifyL(String name, int i, long[] g, long[] r) {
289 for (int j = 0; j < g.length; j++) {
290 if (g[j] != r[j]) {
291 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
292 " gold[" + i + "][" + j + "] = " + g[j] +
293 " result[" + i + "][" + j + "] = " + r[j]);
294 }
295 }
296 }
297
298 @Test
299 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
300 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
301 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
302 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
303 IRNode.STORE_VECTOR, "> 0"},
304 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
305 applyIfPlatform = {"64-bit", "true"},
306 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
307 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
308 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
309 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
310 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
311 IRNode.STORE_VECTOR, "> 0"},
312 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
313 applyIfPlatform = {"64-bit", "true"},
314 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
315 // Load and store are already split
316 //
317 // 0 1 - - 4 5 6 7
318 // | | | | | |
319 // 0 1 - - 4 5 6 7
320 static Object[] test0(int[] a, int[] b, int mask) {
321 for (int i = 0; i < RANGE; i+=8) {
322 int b0 = a[i+0] & mask;
323 int b1 = a[i+1] & mask;
324
325 int b4 = a[i+4] & mask;
326 int b5 = a[i+5] & mask;
327 int b6 = a[i+6] & mask;
328 int b7 = a[i+7] & mask;
329
330 b[i+0] = b0;
331 b[i+1] = b1;
332
333 b[i+4] = b4;
334 b[i+5] = b5;
335 b[i+6] = b6;
336 b[i+7] = b7;
337 // With AlignVector, we need 8-byte alignment of vector loads/stores.
338 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
339 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
340 // -> vectorize -> no vectorization
341 }
342 return new Object[]{ a, b };
343 }
344
345 @Test
346 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
347 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
348 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
349 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
350 IRNode.STORE_VECTOR, "> 0"},
351 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
352 applyIfPlatform = {"64-bit", "true"},
353 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
354 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
355 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
356 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
357 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
358 IRNode.STORE_VECTOR, "> 0"},
359 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
360 applyIfPlatform = {"64-bit", "true"},
361 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
362 // Adjacent Load and Store, but split by Add/Mul
363 static Object[] test1a(int[] a, int[] b, int mask) {
364 for (int i = 0; i < RANGE; i+=8) {
365 b[i+0] = a[i+0] + mask; // Add
366 b[i+1] = a[i+1] + mask;
367 b[i+2] = a[i+2] + mask;
368 b[i+3] = a[i+3] + mask;
369
370 b[i+4] = a[i+4] * mask; // Mul
371 b[i+5] = a[i+5] * mask;
372 // With AlignVector, we need 8-byte alignment of vector loads/stores.
373 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
374 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
375 // -> vectorize -> no vectorization
376 }
377 return new Object[]{ a, b };
378 }
379
380 @Test
381 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
382 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
383 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
384 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
385 IRNode.STORE_VECTOR, "> 0"},
386 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
387 applyIfPlatform = {"64-bit", "true"},
388 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
389 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
390 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
391 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
392 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
393 IRNode.STORE_VECTOR, "> 0"},
394 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
395 applyIfPlatform = {"64-bit", "true"},
396 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
397 // Adjacent Load and Store, but split by Add/Mul
398 static Object[] test1b(int[] a, int[] b, int mask) {
399 for (int i = 0; i < RANGE; i+=8) {
400 b[i+0] = a[i+0] * mask; // Mul
401 b[i+1] = a[i+1] * mask;
402 b[i+2] = a[i+2] * mask;
403 b[i+3] = a[i+3] * mask;
404
405 b[i+4] = a[i+4] + mask; // Add
406 b[i+5] = a[i+5] + mask;
407 // With AlignVector, we need 8-byte alignment of vector loads/stores.
408 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
409 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
410 // -> vectorize -> no vectorization
411 }
412 return new Object[]{ a, b };
413 }
414
415 @Test
416 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
417 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
418 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
419 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
420 IRNode.STORE_VECTOR, "> 0"},
421 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
422 applyIfPlatform = {"64-bit", "true"},
423 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
424 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
425 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
426 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
427 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
428 IRNode.STORE_VECTOR, "> 0"},
429 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
430 applyIfPlatform = {"64-bit", "true"},
431 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
432 // Adjacent Load and Store, but split by Add/Mul
433 static Object[] test1c(int[] a, int[] b, int mask) {
434 for (int i = 0; i < RANGE; i+=8) {
435 b[i+0] = a[i+0] + mask; // Add
436 b[i+1] = a[i+1] + mask;
437
438 b[i+2] = a[i+2] * mask; // Mul
439 b[i+3] = a[i+3] * mask;
440 b[i+4] = a[i+4] * mask;
441 b[i+5] = a[i+5] * mask;
442 // With AlignVector, we need 8-byte alignment of vector loads/stores.
443 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
444 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
445 // -> vectorize -> no vectorization
446 }
447 return new Object[]{ a, b };
448 }
449
450 @Test
451 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
452 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
453 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
454 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
455 IRNode.STORE_VECTOR, "> 0"},
456 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
457 applyIfPlatform = {"64-bit", "true"},
458 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
459 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
460 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
461 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
462 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
463 IRNode.STORE_VECTOR, "> 0"},
464 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
465 applyIfPlatform = {"64-bit", "true"},
466 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
467 // Adjacent Load and Store, but split by Add/Mul
468 static Object[] test1d(int[] a, int[] b, int mask) {
469 for (int i = 0; i < RANGE; i+=8) {
470 b[i+0] = a[i+0] * mask; // Mul
471 b[i+1] = a[i+1] * mask;
472
473 b[i+2] = a[i+2] + mask; // Add
474 b[i+3] = a[i+3] + mask;
475 b[i+4] = a[i+4] + mask;
476 b[i+5] = a[i+5] + mask;
477 // With AlignVector, we need 8-byte alignment of vector loads/stores.
478 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
479 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
480 // -> vectorize -> no vectorization
481 }
482 return new Object[]{ a, b };
483 }
484
485 @Test
486 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
487 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
488 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
489 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
490 IRNode.STORE_VECTOR, "> 0"},
491 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
492 applyIfPlatform = {"64-bit", "true"},
493 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
494 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
495 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
496 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
497 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
498 IRNode.STORE_VECTOR, "> 0"},
499 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
500 applyIfPlatform = {"64-bit", "true"},
501 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
502 // Split the load
503 //
504 // 0 1 2 3 4 5 - -
505 // | | \ \ \ \
506 // | | \ \ \ \
507 // | | \ \ \ \
508 // 0 1 - - 4 5 6 7
509 //
510 static Object[] test2a(int[] a, int[] b, int mask) {
511 for (int i = 0; i < RANGE; i+=8) {
512 int b0 = a[i+0] & mask;
513 int b1 = a[i+1] & mask;
514 int b2 = a[i+2] & mask;
515 int b3 = a[i+3] & mask;
516 int b4 = a[i+4] & mask;
517 int b5 = a[i+5] & mask;
518
519 b[i+0] = b0;
520 b[i+1] = b1;
521
522 b[i+4] = b2;
523 b[i+5] = b3;
524 b[i+6] = b4;
525 b[i+7] = b5;
526 // With AlignVector, we need 8-byte alignment of vector loads/stores.
527 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
528 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
529 // -> vectorize -> no vectorization
530 }
531 return new Object[]{ a, b };
532 }
533
534 @Test
535 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
536 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
537 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
538 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
539 IRNode.STORE_VECTOR, "> 0"},
540 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
541 applyIfPlatform = {"64-bit", "true"},
542 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
543 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
544 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
545 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
546 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
547 IRNode.STORE_VECTOR, "> 0"},
548 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
549 applyIfPlatform = {"64-bit", "true"},
550 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
551 // Split the load
552 //
553 // 0 1 2 3 4 5 - -
554 // | | | | \ \
555 // | | | | \ \
556 // | | | | \ \
557 // 0 1 2 3 -- 6 7
558 //
559 static Object[] test2b(int[] a, int[] b, int mask) {
560 for (int i = 0; i < RANGE; i+=8) {
561 int b0 = a[i+0] & mask;
562 int b1 = a[i+1] & mask;
563 int b2 = a[i+2] & mask;
564 int b3 = a[i+3] & mask;
565 int b4 = a[i+4] & mask;
566 int b5 = a[i+5] & mask;
567
568 b[i+0] = b0;
569 b[i+1] = b1;
570 b[i+2] = b2;
571 b[i+3] = b3;
572
573 b[i+6] = b4;
574 b[i+7] = b5;
575 // With AlignVector, we need 8-byte alignment of vector loads/stores.
576 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
577 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
578 // -> vectorize -> no vectorization
579 }
580 return new Object[]{ a, b };
581 }
582
583 @Test
584 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
585 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
586 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
587 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
588 IRNode.STORE_VECTOR, "> 0"},
589 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
590 applyIfPlatform = {"64-bit", "true"},
591 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
592 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
593 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
594 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
595 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
596 IRNode.STORE_VECTOR, "> 0"},
597 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
598 applyIfPlatform = {"64-bit", "true"},
599 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
600 // Split the load
601 //
602 // 0 1 - - 4 5 6 7
603 // | | / / / /
604 // | | / / / /
605 // | | / / / /
606 // 0 1 2 3 4 5 - -
607 //
608 static Object[] test2c(int[] a, int[] b, int mask) {
609 for (int i = 0; i < RANGE; i+=8) {
610 int b0 = a[i+0] & mask;
611 int b1 = a[i+1] & mask;
612
613 int b4 = a[i+4] & mask;
614 int b5 = a[i+5] & mask;
615 int b6 = a[i+6] & mask;
616 int b7 = a[i+7] & mask;
617
618 b[i+0] = b0;
619 b[i+1] = b1;
620 b[i+2] = b4;
621 b[i+3] = b5;
622 b[i+4] = b6;
623 b[i+5] = b7;
624 // With AlignVector, we need 8-byte alignment of vector loads/stores.
625 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
626 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
627 // -> vectorize -> no vectorization
628 }
629 return new Object[]{ a, b };
630 }
631
632 @Test
633 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
634 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
635 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
636 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
637 IRNode.STORE_VECTOR, "> 0"},
638 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
639 applyIfPlatform = {"64-bit", "true"},
640 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
641 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
642 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
643 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
644 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
645 IRNode.STORE_VECTOR, "> 0"},
646 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
647 applyIfPlatform = {"64-bit", "true"},
648 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
649 // Split the load
650 //
651 // 0 1 2 3 - - 6 7
652 // | | | | / /
653 // | | | | / /
654 // | | | | / /
655 // 0 1 2 3 4 5 - -
656 //
657 static Object[] test2d(int[] a, int[] b, int mask) {
658 for (int i = 0; i < RANGE; i+=8) {
659 int b0 = a[i+0] & mask;
660 int b1 = a[i+1] & mask;
661 int b2 = a[i+2] & mask;
662 int b3 = a[i+3] & mask;
663
664 int b6 = a[i+6] & mask;
665 int b7 = a[i+7] & mask;
666
667 b[i+0] = b0;
668 b[i+1] = b1;
669 b[i+2] = b2;
670 b[i+3] = b3;
671 b[i+4] = b6;
672 b[i+5] = b7;
673 // With AlignVector, we need 8-byte alignment of vector loads/stores.
674 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
675 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
676 // -> vectorize -> no vectorization
677 }
678 return new Object[]{ a, b };
679 }
680
681 @Test
682 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
683 IRNode.STORE_VECTOR, "> 0"},
684 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
685 applyIfPlatform = {"64-bit", "true"},
686 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
687 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
688 IRNode.STORE_VECTOR, "> 0"},
689 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
690 applyIfPlatform = {"64-bit", "true"},
691 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
692 // 0 1 2 3 4 5 6 7 -
693 // | | | | | | | |
694 // | + + + | | | |
695 // | | | | |
696 // | v | | | | v
697 // | | | | | | |
698 // 1 - - 3 4 5 6 7 8
699 static Object[] test3a(short[] a, short[] b, short val) {
700 int sum = 0;
701 for (int i = 0; i < RANGE; i+=16) {
702 short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
703
704 short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
705 short a2 = a[i+2];
706 short a3 = a[i+3];
707
708 short a4 = a[i+4]; // 4-pack
709 short a5 = a[i+5];
710 short a6 = a[i+6];
711 short a7 = a[i+7];
712
713
714 b[i+0] = a0; // required for alignment / offsets, technical limitation.
715
716 sum += a1 + a2 + a3; // not packed
717
718 b[i+3] = val; // adjacent to 4-pack but needs to be split off
719
720 b[i+4] = a4; // 4-pack
721 b[i+5] = a5;
722 b[i+6] = a6;
723 b[i+7] = a7;
724
725 b[i+8] = val; // adjacent to 4-pack but needs to be split off
726
727 // With AlignVector, we need 8-byte alignment of vector loads/stores.
728 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
729 // adr = base + 16 + 8 + 32*i -> always adr = base + 12 + 8 + 32*i -> never
730 // -> vectorize -> no vectorization
731 }
732 return new Object[]{ a, b, new int[]{ sum } };
733 }
734
735 @Test
736 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
737 IRNode.STORE_VECTOR, "> 0",
738 ".*multiversion.*", "= 0"},
739 phase = CompilePhase.PRINT_IDEAL,
740 applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
741 applyIfPlatform = {"64-bit", "true"},
742 applyIfCPUFeatureOr = {"sse4.1", "true"})
743 // Cyclic dependency with distance 2 -> split into 2-packs
744 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
745 IRNode.STORE_VECTOR, "> 0",
746 ".*multiversion.*", "= 0"},
747 phase = CompilePhase.PRINT_IDEAL,
748 applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
749 applyIfPlatform = {"64-bit", "true"},
750 applyIfCPUFeatureOr = {"sse4.1", "true"})
1110
1111 b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
1112 b[i+ 9] = (short)(a[i+ 9] + val);
1113 b[i+10] = (short)(a[i+10] + val);
1114 b[i+11] = (short)(a[i+11] + val);
1115
1116 b[i+12] = (short)(a[i+12] + val); // 2-pack
1117 b[i+13] = (short)(a[i+13] + val);
1118
1119 b[i+14] = (short)(a[i+14] + val);
1120 }
1121 return new Object[]{ a, b };
1122 }
1123
1124 @Test
1125 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
1126 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
1127 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
1128 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1129 IRNode.ADD_REDUCTION_V, "> 0"},
1130 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1131 applyIfPlatform = {"64-bit", "true"},
1132 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1133 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
1134 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
1135 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
1136 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1137 IRNode.ADD_REDUCTION_V, "> 0"},
1138 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
1139 applyIfPlatform = {"64-bit", "true"},
1140 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1141 // Split packs including reductions
1142 static Object[] test6a(int[] a, int[] b) {
1143 int s = 0;
1144 for (int i = 0; i < RANGE; i+=8) {
1145 s += a[i+0] * b[i+0];
1146 s += a[i+1] * b[i+1];
1147 s += a[i+2] * b[i+2];
1148 s += a[i+3] * b[i+3];
1149
1150 s += a[i+4] & b[i+4];
1151 s += a[i+5] & b[i+5];
1152 s += a[i+6] & b[i+6];
1153 s += a[i+7] & b[i+7];
1154 // With AlignVector, we need 8-byte alignment of vector loads/stores.
1155 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
1156 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
1157 // -> vectorize -> no vectorization
1158 }
1159 return new Object[]{ a, b, new int[]{ s } };
1160 }
1161
1162 @Test
1163 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1164 IRNode.MUL_VI, "> 0",
1165 IRNode.POPULATE_INDEX, "> 0"},
1166 applyIfPlatform = {"64-bit", "true"},
1167 applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1168 // Index Populate:
1169 // There can be an issue when all the (iv + 1), (iv + 2), ...
1170 // get packed, but not (iv). Then we have a pack that is one element
1171 // too short, and we start splitting everything in a bad way.
1172 static Object[] test7a(int[] a, int[] b) {
1173 for (int i = 0; i < RANGE; i++) {
1174 a[i] = b[i] * i;
1175 }
1176 return new Object[]{ a, b };
1177 }
|
284 }
285 }
286 }
287
288 static void verifyL(String name, int i, long[] g, long[] r) {
289 for (int j = 0; j < g.length; j++) {
290 if (g[j] != r[j]) {
291 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
292 " gold[" + i + "][" + j + "] = " + g[j] +
293 " result[" + i + "][" + j + "] = " + r[j]);
294 }
295 }
296 }
297
298 @Test
299 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
300 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
301 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
302 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
303 IRNode.STORE_VECTOR, "> 0"},
304 applyIf = {"MaxVectorSize", ">=32"},
305 applyIfPlatform = {"64-bit", "true"},
306 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
307 // Load and store are already split
308 //
309 // 0 1 - - 4 5 6 7
310 // | | | | | |
311 // 0 1 - - 4 5 6 7
312 static Object[] test0(int[] a, int[] b, int mask) {
313 for (int i = 0; i < RANGE; i+=8) {
314 int b0 = a[i+0] & mask;
315 int b1 = a[i+1] & mask;
316
317 int b4 = a[i+4] & mask;
318 int b5 = a[i+5] & mask;
319 int b6 = a[i+6] & mask;
320 int b7 = a[i+7] & mask;
321
322 b[i+0] = b0;
323 b[i+1] = b1;
324
325 b[i+4] = b4;
326 b[i+5] = b5;
327 b[i+6] = b6;
328 b[i+7] = b7;
329 }
330 return new Object[]{ a, b };
331 }
332
333 @Test
334 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
335 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
336 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
337 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
338 IRNode.STORE_VECTOR, "> 0"},
339 applyIf = {"MaxVectorSize", ">=32"},
340 applyIfPlatform = {"64-bit", "true"},
341 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
342 // Adjacent Load and Store, but split by Add/Mul
343 static Object[] test1a(int[] a, int[] b, int mask) {
344 for (int i = 0; i < RANGE; i+=8) {
345 b[i+0] = a[i+0] + mask; // Add
346 b[i+1] = a[i+1] + mask;
347 b[i+2] = a[i+2] + mask;
348 b[i+3] = a[i+3] + mask;
349
350 b[i+4] = a[i+4] * mask; // Mul
351 b[i+5] = a[i+5] * mask;
352 }
353 return new Object[]{ a, b };
354 }
355
356 @Test
357 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
358 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
359 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
360 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
361 IRNode.STORE_VECTOR, "> 0"},
362 applyIf = {"MaxVectorSize", ">=32"},
363 applyIfPlatform = {"64-bit", "true"},
364 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
365 // Adjacent Load and Store, but split by Add/Mul
366 static Object[] test1b(int[] a, int[] b, int mask) {
367 for (int i = 0; i < RANGE; i+=8) {
368 b[i+0] = a[i+0] * mask; // Mul
369 b[i+1] = a[i+1] * mask;
370 b[i+2] = a[i+2] * mask;
371 b[i+3] = a[i+3] * mask;
372
373 b[i+4] = a[i+4] + mask; // Add
374 b[i+5] = a[i+5] + mask;
375 }
376 return new Object[]{ a, b };
377 }
378
379 @Test
380 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
381 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
382 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
383 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
384 IRNode.STORE_VECTOR, "> 0"},
385 applyIf = {"MaxVectorSize", ">=32"},
386 applyIfPlatform = {"64-bit", "true"},
387 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
388 // Adjacent Load and Store, but split by Add/Mul
389 static Object[] test1c(int[] a, int[] b, int mask) {
390 for (int i = 0; i < RANGE; i+=8) {
391 b[i+0] = a[i+0] + mask; // Add
392 b[i+1] = a[i+1] + mask;
393
394 b[i+2] = a[i+2] * mask; // Mul
395 b[i+3] = a[i+3] * mask;
396 b[i+4] = a[i+4] * mask;
397 b[i+5] = a[i+5] * mask;
398 }
399 return new Object[]{ a, b };
400 }
401
402 @Test
403 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
404 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
405 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
406 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
407 IRNode.STORE_VECTOR, "> 0"},
408 applyIf = {"MaxVectorSize", ">=32"},
409 applyIfPlatform = {"64-bit", "true"},
410 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
411 // Adjacent Load and Store, but split by Add/Mul
412 static Object[] test1d(int[] a, int[] b, int mask) {
413 for (int i = 0; i < RANGE; i+=8) {
414 b[i+0] = a[i+0] * mask; // Mul
415 b[i+1] = a[i+1] * mask;
416
417 b[i+2] = a[i+2] + mask; // Add
418 b[i+3] = a[i+3] + mask;
419 b[i+4] = a[i+4] + mask;
420 b[i+5] = a[i+5] + mask;
421 }
422 return new Object[]{ a, b };
423 }
424
425 @Test
426 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
427 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
428 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
429 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
430 IRNode.STORE_VECTOR, "> 0"},
431 applyIf = {"MaxVectorSize", ">=32"},
432 applyIfPlatform = {"64-bit", "true"},
433 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
434 // Split the load
435 //
436 // 0 1 2 3 4 5 - -
437 // | | \ \ \ \
438 // | | \ \ \ \
439 // | | \ \ \ \
440 // 0 1 - - 4 5 6 7
441 //
442 static Object[] test2a(int[] a, int[] b, int mask) {
443 for (int i = 0; i < RANGE; i+=8) {
444 int b0 = a[i+0] & mask;
445 int b1 = a[i+1] & mask;
446 int b2 = a[i+2] & mask;
447 int b3 = a[i+3] & mask;
448 int b4 = a[i+4] & mask;
449 int b5 = a[i+5] & mask;
450
451 b[i+0] = b0;
452 b[i+1] = b1;
453
454 b[i+4] = b2;
455 b[i+5] = b3;
456 b[i+6] = b4;
457 b[i+7] = b5;
458 }
459 return new Object[]{ a, b };
460 }
461
462 @Test
463 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
464 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
465 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
466 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
467 IRNode.STORE_VECTOR, "> 0"},
468 applyIf = {"MaxVectorSize", ">=32"},
469 applyIfPlatform = {"64-bit", "true"},
470 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
471 // Split the load
472 //
473 // 0 1 2 3 4 5 - -
474 // | | | | \ \
475 // | | | | \ \
476 // | | | | \ \
477 // 0 1 2 3 -- 6 7
478 //
479 static Object[] test2b(int[] a, int[] b, int mask) {
480 for (int i = 0; i < RANGE; i+=8) {
481 int b0 = a[i+0] & mask;
482 int b1 = a[i+1] & mask;
483 int b2 = a[i+2] & mask;
484 int b3 = a[i+3] & mask;
485 int b4 = a[i+4] & mask;
486 int b5 = a[i+5] & mask;
487
488 b[i+0] = b0;
489 b[i+1] = b1;
490 b[i+2] = b2;
491 b[i+3] = b3;
492
493 b[i+6] = b4;
494 b[i+7] = b5;
495 }
496 return new Object[]{ a, b };
497 }
498
499 @Test
500 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
501 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
502 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
503 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
504 IRNode.STORE_VECTOR, "> 0"},
505 applyIf = {"MaxVectorSize", ">=32"},
506 applyIfPlatform = {"64-bit", "true"},
507 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
508 // Split the load
509 //
510 // 0 1 - - 4 5 6 7
511 // | | / / / /
512 // | | / / / /
513 // | | / / / /
514 // 0 1 2 3 4 5 - -
515 //
516 static Object[] test2c(int[] a, int[] b, int mask) {
517 for (int i = 0; i < RANGE; i+=8) {
518 int b0 = a[i+0] & mask;
519 int b1 = a[i+1] & mask;
520
521 int b4 = a[i+4] & mask;
522 int b5 = a[i+5] & mask;
523 int b6 = a[i+6] & mask;
524 int b7 = a[i+7] & mask;
525
526 b[i+0] = b0;
527 b[i+1] = b1;
528 b[i+2] = b4;
529 b[i+3] = b5;
530 b[i+4] = b6;
531 b[i+5] = b7;
532 }
533 return new Object[]{ a, b };
534 }
535
536 @Test
537 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
538 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
539 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
540 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
541 IRNode.STORE_VECTOR, "> 0"},
542 applyIf = {"MaxVectorSize", ">=32"},
543 applyIfPlatform = {"64-bit", "true"},
544 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
545 // Split the load
546 //
547 // 0 1 2 3 - - 6 7
548 // | | | | / /
549 // | | | | / /
550 // | | | | / /
551 // 0 1 2 3 4 5 - -
552 //
553 static Object[] test2d(int[] a, int[] b, int mask) {
554 for (int i = 0; i < RANGE; i+=8) {
555 int b0 = a[i+0] & mask;
556 int b1 = a[i+1] & mask;
557 int b2 = a[i+2] & mask;
558 int b3 = a[i+3] & mask;
559
560 int b6 = a[i+6] & mask;
561 int b7 = a[i+7] & mask;
562
563 b[i+0] = b0;
564 b[i+1] = b1;
565 b[i+2] = b2;
566 b[i+3] = b3;
567 b[i+4] = b6;
568 b[i+5] = b7;
569 }
570 return new Object[]{ a, b };
571 }
572
573 @Test
574 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
575 IRNode.STORE_VECTOR, "> 0"},
576 applyIf = {"MaxVectorSize", ">=32"},
577 applyIfPlatform = {"64-bit", "true"},
578 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
579 // 0 1 2 3 4 5 6 7 -
580 // | | | | | | | |
581 // | + + + | | | |
582 // | | | | |
583 // | v | | | | v
584 // | | | | | | |
585 // 1 - - 3 4 5 6 7 8
586 static Object[] test3a(short[] a, short[] b, short val) {
587 int sum = 0;
588 for (int i = 0; i < RANGE; i+=16) {
589 short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
590
591 short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
592 short a2 = a[i+2];
593 short a3 = a[i+3];
594
595 short a4 = a[i+4]; // 4-pack
596 short a5 = a[i+5];
597 short a6 = a[i+6];
598 short a7 = a[i+7];
599
600
601 b[i+0] = a0; // required for alignment / offsets, technical limitation.
602
603 sum += a1 + a2 + a3; // not packed
604
605 b[i+3] = val; // adjacent to 4-pack but needs to be split off
606
607 b[i+4] = a4; // 4-pack
608 b[i+5] = a5;
609 b[i+6] = a6;
610 b[i+7] = a7;
611
612 b[i+8] = val; // adjacent to 4-pack but needs to be split off
613 }
614 return new Object[]{ a, b, new int[]{ sum } };
615 }
616
617 @Test
618 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
619 IRNode.STORE_VECTOR, "> 0",
620 ".*multiversion.*", "= 0"},
621 phase = CompilePhase.PRINT_IDEAL,
622 applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
623 applyIfPlatform = {"64-bit", "true"},
624 applyIfCPUFeatureOr = {"sse4.1", "true"})
625 // Cyclic dependency with distance 2 -> split into 2-packs
626 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
627 IRNode.STORE_VECTOR, "> 0",
628 ".*multiversion.*", "= 0"},
629 phase = CompilePhase.PRINT_IDEAL,
630 applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
631 applyIfPlatform = {"64-bit", "true"},
632 applyIfCPUFeatureOr = {"sse4.1", "true"})
992
993 b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
994 b[i+ 9] = (short)(a[i+ 9] + val);
995 b[i+10] = (short)(a[i+10] + val);
996 b[i+11] = (short)(a[i+11] + val);
997
998 b[i+12] = (short)(a[i+12] + val); // 2-pack
999 b[i+13] = (short)(a[i+13] + val);
1000
1001 b[i+14] = (short)(a[i+14] + val);
1002 }
1003 return new Object[]{ a, b };
1004 }
1005
1006 @Test
1007 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
1008 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
1009 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
1010 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1011 IRNode.ADD_REDUCTION_V, "> 0"},
1012 applyIf = {"MaxVectorSize", ">=32"},
1013 applyIfPlatform = {"64-bit", "true"},
1014 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1015 // Split packs including reductions
1016 static Object[] test6a(int[] a, int[] b) {
1017 int s = 0;
1018 for (int i = 0; i < RANGE; i+=8) {
1019 s += a[i+0] * b[i+0];
1020 s += a[i+1] * b[i+1];
1021 s += a[i+2] * b[i+2];
1022 s += a[i+3] * b[i+3];
1023
1024 s += a[i+4] & b[i+4];
1025 s += a[i+5] & b[i+5];
1026 s += a[i+6] & b[i+6];
1027 s += a[i+7] & b[i+7];
1028 }
1029 return new Object[]{ a, b, new int[]{ s } };
1030 }
1031
1032 @Test
1033 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1034 IRNode.MUL_VI, "> 0",
1035 IRNode.POPULATE_INDEX, "> 0"},
1036 applyIfPlatform = {"64-bit", "true"},
1037 applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1038 // Index Populate:
1039 // There can be an issue when all the (iv + 1), (iv + 2), ...
1040 // get packed, but not (iv). Then we have a pack that is one element
1041 // too short, and we start splitting everything in a bad way.
1042 static Object[] test7a(int[] a, int[] b) {
1043 for (int i = 0; i < RANGE; i++) {
1044 a[i] = b[i] * i;
1045 }
1046 return new Object[]{ a, b };
1047 }
|