283 }
284 }
285 }
286
287 static void verifyL(String name, int i, long[] g, long[] r) {
288 for (int j = 0; j < g.length; j++) {
289 if (g[j] != r[j]) {
290 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
291 " gold[" + i + "][" + j + "] = " + g[j] +
292 " result[" + i + "][" + j + "] = " + r[j]);
293 }
294 }
295 }
296
297 @Test
298 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
299 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
300 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
301 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
302 IRNode.STORE_VECTOR, "> 0"},
303 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
304 applyIfPlatform = {"64-bit", "true"},
305 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
306 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
307 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
308 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
309 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
310 IRNode.STORE_VECTOR, "> 0"},
311 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
312 applyIfPlatform = {"64-bit", "true"},
313 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
314 // Load and store are already split
315 //
316 // 0 1 - - 4 5 6 7
317 // | | | | | |
318 // 0 1 - - 4 5 6 7
319 static Object[] test0(int[] a, int[] b, int mask) {
320 for (int i = 0; i < RANGE; i+=8) {
321 int b0 = a[i+0] & mask;
322 int b1 = a[i+1] & mask;
323
324 int b4 = a[i+4] & mask;
325 int b5 = a[i+5] & mask;
326 int b6 = a[i+6] & mask;
327 int b7 = a[i+7] & mask;
328
329 b[i+0] = b0;
330 b[i+1] = b1;
331
332 b[i+4] = b4;
333 b[i+5] = b5;
334 b[i+6] = b6;
335 b[i+7] = b7;
336 // With AlignVector, we need 8-byte alignment of vector loads/stores.
337 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
338 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
339 // -> vectorize -> no vectorization
340 }
341 return new Object[]{ a, b };
342 }
343
344 @Test
345 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
346 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
347 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
348 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
349 IRNode.STORE_VECTOR, "> 0"},
350 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
351 applyIfPlatform = {"64-bit", "true"},
352 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
353 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
354 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
355 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
356 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
357 IRNode.STORE_VECTOR, "> 0"},
358 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
359 applyIfPlatform = {"64-bit", "true"},
360 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
361 // Adjacent Load and Store, but split by Add/Mul
362 static Object[] test1a(int[] a, int[] b, int mask) {
363 for (int i = 0; i < RANGE; i+=8) {
364 b[i+0] = a[i+0] + mask; // Add
365 b[i+1] = a[i+1] + mask;
366 b[i+2] = a[i+2] + mask;
367 b[i+3] = a[i+3] + mask;
368
369 b[i+4] = a[i+4] * mask; // Mul
370 b[i+5] = a[i+5] * mask;
371 // With AlignVector, we need 8-byte alignment of vector loads/stores.
372 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
373 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
374 // -> vectorize -> no vectorization
375 }
376 return new Object[]{ a, b };
377 }
378
379 @Test
380 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
381 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
382 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
383 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
384 IRNode.STORE_VECTOR, "> 0"},
385 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
386 applyIfPlatform = {"64-bit", "true"},
387 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
388 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
389 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
390 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
391 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
392 IRNode.STORE_VECTOR, "> 0"},
393 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
394 applyIfPlatform = {"64-bit", "true"},
395 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
396 // Adjacent Load and Store, but split by Add/Mul
397 static Object[] test1b(int[] a, int[] b, int mask) {
398 for (int i = 0; i < RANGE; i+=8) {
399 b[i+0] = a[i+0] * mask; // Mul
400 b[i+1] = a[i+1] * mask;
401 b[i+2] = a[i+2] * mask;
402 b[i+3] = a[i+3] * mask;
403
404 b[i+4] = a[i+4] + mask; // Add
405 b[i+5] = a[i+5] + mask;
406 // With AlignVector, we need 8-byte alignment of vector loads/stores.
407 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
408 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
409 // -> vectorize -> no vectorization
410 }
411 return new Object[]{ a, b };
412 }
413
414 @Test
415 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
416 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
417 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
418 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
419 IRNode.STORE_VECTOR, "> 0"},
420 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
421 applyIfPlatform = {"64-bit", "true"},
422 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
423 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
424 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
425 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
426 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
427 IRNode.STORE_VECTOR, "> 0"},
428 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
429 applyIfPlatform = {"64-bit", "true"},
430 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
431 // Adjacent Load and Store, but split by Add/Mul
432 static Object[] test1c(int[] a, int[] b, int mask) {
433 for (int i = 0; i < RANGE; i+=8) {
434 b[i+0] = a[i+0] + mask; // Add
435 b[i+1] = a[i+1] + mask;
436
437 b[i+2] = a[i+2] * mask; // Mul
438 b[i+3] = a[i+3] * mask;
439 b[i+4] = a[i+4] * mask;
440 b[i+5] = a[i+5] * mask;
441 // With AlignVector, we need 8-byte alignment of vector loads/stores.
442 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
443 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
444 // -> vectorize -> no vectorization
445 }
446 return new Object[]{ a, b };
447 }
448
449 @Test
450 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
451 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
452 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
453 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
454 IRNode.STORE_VECTOR, "> 0"},
455 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
456 applyIfPlatform = {"64-bit", "true"},
457 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
458 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
459 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
460 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
461 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
462 IRNode.STORE_VECTOR, "> 0"},
463 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
464 applyIfPlatform = {"64-bit", "true"},
465 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
466 // Adjacent Load and Store, but split by Add/Mul
467 static Object[] test1d(int[] a, int[] b, int mask) {
468 for (int i = 0; i < RANGE; i+=8) {
469 b[i+0] = a[i+0] * mask; // Mul
470 b[i+1] = a[i+1] * mask;
471
472 b[i+2] = a[i+2] + mask; // Add
473 b[i+3] = a[i+3] + mask;
474 b[i+4] = a[i+4] + mask;
475 b[i+5] = a[i+5] + mask;
476 // With AlignVector, we need 8-byte alignment of vector loads/stores.
477 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
478 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
479 // -> vectorize -> no vectorization
480 }
481 return new Object[]{ a, b };
482 }
483
484 @Test
485 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
486 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
487 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
488 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
489 IRNode.STORE_VECTOR, "> 0"},
490 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
491 applyIfPlatform = {"64-bit", "true"},
492 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
493 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
494 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
495 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
496 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
497 IRNode.STORE_VECTOR, "> 0"},
498 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
499 applyIfPlatform = {"64-bit", "true"},
500 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
501 // Split the load
502 //
503 // 0 1 2 3 4 5 - -
504 // | | \ \ \ \
505 // | | \ \ \ \
506 // | | \ \ \ \
507 // 0 1 - - 4 5 6 7
508 //
509 static Object[] test2a(int[] a, int[] b, int mask) {
510 for (int i = 0; i < RANGE; i+=8) {
511 int b0 = a[i+0] & mask;
512 int b1 = a[i+1] & mask;
513 int b2 = a[i+2] & mask;
514 int b3 = a[i+3] & mask;
515 int b4 = a[i+4] & mask;
516 int b5 = a[i+5] & mask;
517
518 b[i+0] = b0;
519 b[i+1] = b1;
520
521 b[i+4] = b2;
522 b[i+5] = b3;
523 b[i+6] = b4;
524 b[i+7] = b5;
525 // With AlignVector, we need 8-byte alignment of vector loads/stores.
526 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
527 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
528 // -> vectorize -> no vectorization
529 }
530 return new Object[]{ a, b };
531 }
532
533 @Test
534 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
535 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
536 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
537 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
538 IRNode.STORE_VECTOR, "> 0"},
539 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
540 applyIfPlatform = {"64-bit", "true"},
541 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
542 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
543 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
544 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
545 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
546 IRNode.STORE_VECTOR, "> 0"},
547 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
548 applyIfPlatform = {"64-bit", "true"},
549 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
550 // Split the load
551 //
552 // 0 1 2 3 4 5 - -
553 // | | | | \ \
554 // | | | | \ \
555 // | | | | \ \
556 // 0 1 2 3 -- 6 7
557 //
558 static Object[] test2b(int[] a, int[] b, int mask) {
559 for (int i = 0; i < RANGE; i+=8) {
560 int b0 = a[i+0] & mask;
561 int b1 = a[i+1] & mask;
562 int b2 = a[i+2] & mask;
563 int b3 = a[i+3] & mask;
564 int b4 = a[i+4] & mask;
565 int b5 = a[i+5] & mask;
566
567 b[i+0] = b0;
568 b[i+1] = b1;
569 b[i+2] = b2;
570 b[i+3] = b3;
571
572 b[i+6] = b4;
573 b[i+7] = b5;
574 // With AlignVector, we need 8-byte alignment of vector loads/stores.
575 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
576 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
577 // -> vectorize -> no vectorization
578 }
579 return new Object[]{ a, b };
580 }
581
582 @Test
583 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
584 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
585 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
586 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
587 IRNode.STORE_VECTOR, "> 0"},
588 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
589 applyIfPlatform = {"64-bit", "true"},
590 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
591 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
592 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
593 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
594 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
595 IRNode.STORE_VECTOR, "> 0"},
596 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
597 applyIfPlatform = {"64-bit", "true"},
598 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
599 // Split the load
600 //
601 // 0 1 - - 4 5 6 7
602 // | | / / / /
603 // | | / / / /
604 // | | / / / /
605 // 0 1 2 3 4 5 - -
606 //
607 static Object[] test2c(int[] a, int[] b, int mask) {
608 for (int i = 0; i < RANGE; i+=8) {
609 int b0 = a[i+0] & mask;
610 int b1 = a[i+1] & mask;
611
612 int b4 = a[i+4] & mask;
613 int b5 = a[i+5] & mask;
614 int b6 = a[i+6] & mask;
615 int b7 = a[i+7] & mask;
616
617 b[i+0] = b0;
618 b[i+1] = b1;
619 b[i+2] = b4;
620 b[i+3] = b5;
621 b[i+4] = b6;
622 b[i+5] = b7;
623 // With AlignVector, we need 8-byte alignment of vector loads/stores.
624 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
625 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
626 // -> vectorize -> no vectorization
627 }
628 return new Object[]{ a, b };
629 }
630
631 @Test
632 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
633 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
634 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
635 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
636 IRNode.STORE_VECTOR, "> 0"},
637 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
638 applyIfPlatform = {"64-bit", "true"},
639 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
640 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
641 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
642 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
643 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
644 IRNode.STORE_VECTOR, "> 0"},
645 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
646 applyIfPlatform = {"64-bit", "true"},
647 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
648 // Split the load
649 //
650 // 0 1 2 3 - - 6 7
651 // | | | | / /
652 // | | | | / /
653 // | | | | / /
654 // 0 1 2 3 4 5 - -
655 //
656 static Object[] test2d(int[] a, int[] b, int mask) {
657 for (int i = 0; i < RANGE; i+=8) {
658 int b0 = a[i+0] & mask;
659 int b1 = a[i+1] & mask;
660 int b2 = a[i+2] & mask;
661 int b3 = a[i+3] & mask;
662
663 int b6 = a[i+6] & mask;
664 int b7 = a[i+7] & mask;
665
666 b[i+0] = b0;
667 b[i+1] = b1;
668 b[i+2] = b2;
669 b[i+3] = b3;
670 b[i+4] = b6;
671 b[i+5] = b7;
672 // With AlignVector, we need 8-byte alignment of vector loads/stores.
673 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
674 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
675 // -> vectorize -> no vectorization
676 }
677 return new Object[]{ a, b };
678 }
679
680 @Test
681 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
682 IRNode.STORE_VECTOR, "> 0"},
683 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
684 applyIfPlatform = {"64-bit", "true"},
685 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
686 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
687 IRNode.STORE_VECTOR, "> 0"},
688 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
689 applyIfPlatform = {"64-bit", "true"},
690 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
691 // 0 1 2 3 4 5 6 7 -
692 // | | | | | | | |
693 // | + + + | | | |
694 // | | | | |
695 // | v | | | | v
696 // | | | | | | |
697 // 1 - - 3 4 5 6 7 8
698 static Object[] test3a(short[] a, short[] b, short val) {
699 int sum = 0;
700 for (int i = 0; i < RANGE; i+=16) {
701 short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
702
703 short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
704 short a2 = a[i+2];
705 short a3 = a[i+3];
706
707 short a4 = a[i+4]; // 4-pack
708 short a5 = a[i+5];
709 short a6 = a[i+6];
710 short a7 = a[i+7];
711
712
713 b[i+0] = a0; // required for alignment / offsets, technical limitation.
714
715 sum += a1 + a2 + a3; // not packed
716
717 b[i+3] = val; // adjacent to 4-pack but needs to be split off
718
719 b[i+4] = a4; // 4-pack
720 b[i+5] = a5;
721 b[i+6] = a6;
722 b[i+7] = a7;
723
724 b[i+8] = val; // adjacent to 4-pack but needs to be split off
725
726 // With AlignVector, we need 8-byte alignment of vector loads/stores.
727 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
728 // adr = base + 16 + 8 + 32*i -> always adr = base + 12 + 8 + 32*i -> never
729 // -> vectorize -> no vectorization
730 }
731 return new Object[]{ a, b, new int[]{ sum } };
732 }
733
734 @Test
735 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
736 IRNode.STORE_VECTOR, "> 0",
737 ".*multiversion.*", "= 0"},
738 phase = CompilePhase.PRINT_IDEAL,
739 applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
740 applyIfPlatform = {"64-bit", "true"},
741 applyIfCPUFeatureOr = {"sse4.1", "true"})
742 // Cyclic dependency with distance 2 -> split into 2-packs
743 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
744 IRNode.STORE_VECTOR, "> 0",
745 ".*multiversion.*", "= 0"},
746 phase = CompilePhase.PRINT_IDEAL,
747 applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
748 applyIfPlatform = {"64-bit", "true"},
749 applyIfCPUFeatureOr = {"sse4.1", "true"})
1109
1110 b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
1111 b[i+ 9] = (short)(a[i+ 9] + val);
1112 b[i+10] = (short)(a[i+10] + val);
1113 b[i+11] = (short)(a[i+11] + val);
1114
1115 b[i+12] = (short)(a[i+12] + val); // 2-pack
1116 b[i+13] = (short)(a[i+13] + val);
1117
1118 b[i+14] = (short)(a[i+14] + val);
1119 }
1120 return new Object[]{ a, b };
1121 }
1122
1123 @Test
1124 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
1125 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
1126 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
1127 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1128 IRNode.ADD_REDUCTION_V, "> 0"},
1129 applyIfAnd = {"MaxVectorSize", ">=32", "AlignVector", "false"},
1130 applyIfPlatform = {"64-bit", "true"},
1131 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1132 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
1133 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
1134 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
1135 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1136 IRNode.ADD_REDUCTION_V, "> 0"},
1137 applyIfAnd = {"MaxVectorSize", ">=32", "UseCompactObjectHeaders", "false"},
1138 applyIfPlatform = {"64-bit", "true"},
1139 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1140 // Split packs including reductions
1141 static Object[] test6a(int[] a, int[] b) {
1142 int s = 0;
1143 for (int i = 0; i < RANGE; i+=8) {
1144 s += a[i+0] * b[i+0];
1145 s += a[i+1] * b[i+1];
1146 s += a[i+2] * b[i+2];
1147 s += a[i+3] * b[i+3];
1148
1149 s += a[i+4] & b[i+4];
1150 s += a[i+5] & b[i+5];
1151 s += a[i+6] & b[i+6];
1152 s += a[i+7] & b[i+7];
1153 // With AlignVector, we need 8-byte alignment of vector loads/stores.
1154 // UseCompactObjectHeaders=false UseCompactObjectHeaders=true
1155 // adr = base + 16 + 32*i -> always adr = base + 12 + 32*i -> never
1156 // -> vectorize -> no vectorization
1157 }
1158 return new Object[]{ a, b, new int[]{ s } };
1159 }
1160
1161 @Test
1162 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1163 IRNode.MUL_VI, "> 0",
1164 IRNode.POPULATE_INDEX, "> 0"},
1165 applyIfPlatform = {"64-bit", "true"},
1166 applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1167 // Index Populate:
1168 // There can be an issue when all the (iv + 1), (iv + 2), ...
1169 // get packed, but not (iv). Then we have a pack that is one element
1170 // too short, and we start splitting everything in a bad way.
1171 static Object[] test7a(int[] a, int[] b) {
1172 for (int i = 0; i < RANGE; i++) {
1173 a[i] = b[i] * i;
1174 }
1175 return new Object[]{ a, b };
1176 }
|
283 }
284 }
285 }
286
287 static void verifyL(String name, int i, long[] g, long[] r) {
288 for (int j = 0; j < g.length; j++) {
289 if (g[j] != r[j]) {
290 throw new RuntimeException("verify " + name + ": arrays must have same content:" +
291 " gold[" + i + "][" + j + "] = " + g[j] +
292 " result[" + i + "][" + j + "] = " + r[j]);
293 }
294 }
295 }
296
297 @Test
298 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
299 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
300 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
301 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
302 IRNode.STORE_VECTOR, "> 0"},
303 applyIf = {"MaxVectorSize", ">=32"},
304 applyIfPlatform = {"64-bit", "true"},
305 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
306 // Load and store are already split
307 //
308 // 0 1 - - 4 5 6 7
309 // | | | | | |
310 // 0 1 - - 4 5 6 7
311 static Object[] test0(int[] a, int[] b, int mask) {
312 for (int i = 0; i < RANGE; i+=8) {
313 int b0 = a[i+0] & mask;
314 int b1 = a[i+1] & mask;
315
316 int b4 = a[i+4] & mask;
317 int b5 = a[i+5] & mask;
318 int b6 = a[i+6] & mask;
319 int b7 = a[i+7] & mask;
320
321 b[i+0] = b0;
322 b[i+1] = b1;
323
324 b[i+4] = b4;
325 b[i+5] = b5;
326 b[i+6] = b6;
327 b[i+7] = b7;
328 }
329 return new Object[]{ a, b };
330 }
331
332 @Test
333 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
334 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
335 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
336 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
337 IRNode.STORE_VECTOR, "> 0"},
338 applyIf = {"MaxVectorSize", ">=32"},
339 applyIfPlatform = {"64-bit", "true"},
340 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
341 // Adjacent Load and Store, but split by Add/Mul
342 static Object[] test1a(int[] a, int[] b, int mask) {
343 for (int i = 0; i < RANGE; i+=8) {
344 b[i+0] = a[i+0] + mask; // Add
345 b[i+1] = a[i+1] + mask;
346 b[i+2] = a[i+2] + mask;
347 b[i+3] = a[i+3] + mask;
348
349 b[i+4] = a[i+4] * mask; // Mul
350 b[i+5] = a[i+5] * mask;
351 }
352 return new Object[]{ a, b };
353 }
354
355 @Test
356 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
357 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
358 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
359 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
360 IRNode.STORE_VECTOR, "> 0"},
361 applyIf = {"MaxVectorSize", ">=32"},
362 applyIfPlatform = {"64-bit", "true"},
363 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
364 // Adjacent Load and Store, but split by Add/Mul
365 static Object[] test1b(int[] a, int[] b, int mask) {
366 for (int i = 0; i < RANGE; i+=8) {
367 b[i+0] = a[i+0] * mask; // Mul
368 b[i+1] = a[i+1] * mask;
369 b[i+2] = a[i+2] * mask;
370 b[i+3] = a[i+3] * mask;
371
372 b[i+4] = a[i+4] + mask; // Add
373 b[i+5] = a[i+5] + mask;
374 }
375 return new Object[]{ a, b };
376 }
377
378 @Test
379 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
380 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
381 IRNode.ADD_VI, IRNode.VECTOR_SIZE_2, "> 0",
382 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
383 IRNode.STORE_VECTOR, "> 0"},
384 applyIf = {"MaxVectorSize", ">=32"},
385 applyIfPlatform = {"64-bit", "true"},
386 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
387 // Adjacent Load and Store, but split by Add/Mul
388 static Object[] test1c(int[] a, int[] b, int mask) {
389 for (int i = 0; i < RANGE; i+=8) {
390 b[i+0] = a[i+0] + mask; // Add
391 b[i+1] = a[i+1] + mask;
392
393 b[i+2] = a[i+2] * mask; // Mul
394 b[i+3] = a[i+3] * mask;
395 b[i+4] = a[i+4] * mask;
396 b[i+5] = a[i+5] * mask;
397 }
398 return new Object[]{ a, b };
399 }
400
401 @Test
402 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
403 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
404 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0",
405 IRNode.MUL_VI, IRNode.VECTOR_SIZE_2, "> 0",
406 IRNode.STORE_VECTOR, "> 0"},
407 applyIf = {"MaxVectorSize", ">=32"},
408 applyIfPlatform = {"64-bit", "true"},
409 applyIfCPUFeatureOr = {"avx2", "true", "asimd", "true", "rvv", "true"})
410 // Adjacent Load and Store, but split by Add/Mul
411 static Object[] test1d(int[] a, int[] b, int mask) {
412 for (int i = 0; i < RANGE; i+=8) {
413 b[i+0] = a[i+0] * mask; // Mul
414 b[i+1] = a[i+1] * mask;
415
416 b[i+2] = a[i+2] + mask; // Add
417 b[i+3] = a[i+3] + mask;
418 b[i+4] = a[i+4] + mask;
419 b[i+5] = a[i+5] + mask;
420 }
421 return new Object[]{ a, b };
422 }
423
424 @Test
425 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
426 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
427 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
428 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
429 IRNode.STORE_VECTOR, "> 0"},
430 applyIf = {"MaxVectorSize", ">=32"},
431 applyIfPlatform = {"64-bit", "true"},
432 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
433 // Split the load
434 //
435 // 0 1 2 3 4 5 - -
436 // | | \ \ \ \
437 // | | \ \ \ \
438 // | | \ \ \ \
439 // 0 1 - - 4 5 6 7
440 //
441 static Object[] test2a(int[] a, int[] b, int mask) {
442 for (int i = 0; i < RANGE; i+=8) {
443 int b0 = a[i+0] & mask;
444 int b1 = a[i+1] & mask;
445 int b2 = a[i+2] & mask;
446 int b3 = a[i+3] & mask;
447 int b4 = a[i+4] & mask;
448 int b5 = a[i+5] & mask;
449
450 b[i+0] = b0;
451 b[i+1] = b1;
452
453 b[i+4] = b2;
454 b[i+5] = b3;
455 b[i+6] = b4;
456 b[i+7] = b5;
457 }
458 return new Object[]{ a, b };
459 }
460
461 @Test
462 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
463 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
464 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
465 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
466 IRNode.STORE_VECTOR, "> 0"},
467 applyIf = {"MaxVectorSize", ">=32"},
468 applyIfPlatform = {"64-bit", "true"},
469 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
470 // Split the load
471 //
472 // 0 1 2 3 4 5 - -
473 // | | | | \ \
474 // | | | | \ \
475 // | | | | \ \
476 // 0 1 2 3 -- 6 7
477 //
478 static Object[] test2b(int[] a, int[] b, int mask) {
479 for (int i = 0; i < RANGE; i+=8) {
480 int b0 = a[i+0] & mask;
481 int b1 = a[i+1] & mask;
482 int b2 = a[i+2] & mask;
483 int b3 = a[i+3] & mask;
484 int b4 = a[i+4] & mask;
485 int b5 = a[i+5] & mask;
486
487 b[i+0] = b0;
488 b[i+1] = b1;
489 b[i+2] = b2;
490 b[i+3] = b3;
491
492 b[i+6] = b4;
493 b[i+7] = b5;
494 }
495 return new Object[]{ a, b };
496 }
497
498 @Test
499 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
500 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
501 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
502 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
503 IRNode.STORE_VECTOR, "> 0"},
504 applyIf = {"MaxVectorSize", ">=32"},
505 applyIfPlatform = {"64-bit", "true"},
506 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
507 // Split the load
508 //
509 // 0 1 - - 4 5 6 7
510 // | | / / / /
511 // | | / / / /
512 // | | / / / /
513 // 0 1 2 3 4 5 - -
514 //
515 static Object[] test2c(int[] a, int[] b, int mask) {
516 for (int i = 0; i < RANGE; i+=8) {
517 int b0 = a[i+0] & mask;
518 int b1 = a[i+1] & mask;
519
520 int b4 = a[i+4] & mask;
521 int b5 = a[i+5] & mask;
522 int b6 = a[i+6] & mask;
523 int b7 = a[i+7] & mask;
524
525 b[i+0] = b0;
526 b[i+1] = b1;
527 b[i+2] = b4;
528 b[i+3] = b5;
529 b[i+4] = b6;
530 b[i+5] = b7;
531 }
532 return new Object[]{ a, b };
533 }
534
535 @Test
536 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_2, "> 0",
537 IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
538 IRNode.AND_VI, IRNode.VECTOR_SIZE_2, "> 0",
539 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
540 IRNode.STORE_VECTOR, "> 0"},
541 applyIf = {"MaxVectorSize", ">=32"},
542 applyIfPlatform = {"64-bit", "true"},
543 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
544 // Split the load
545 //
546 // 0 1 2 3 - - 6 7
547 // | | | | / /
548 // | | | | / /
549 // | | | | / /
550 // 0 1 2 3 4 5 - -
551 //
552 static Object[] test2d(int[] a, int[] b, int mask) {
553 for (int i = 0; i < RANGE; i+=8) {
554 int b0 = a[i+0] & mask;
555 int b1 = a[i+1] & mask;
556 int b2 = a[i+2] & mask;
557 int b3 = a[i+3] & mask;
558
559 int b6 = a[i+6] & mask;
560 int b7 = a[i+7] & mask;
561
562 b[i+0] = b0;
563 b[i+1] = b1;
564 b[i+2] = b2;
565 b[i+3] = b3;
566 b[i+4] = b6;
567 b[i+5] = b7;
568 }
569 return new Object[]{ a, b };
570 }
571
572 @Test
573 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_4, "> 0",
574 IRNode.STORE_VECTOR, "> 0"},
575 applyIf = {"MaxVectorSize", ">=32"},
576 applyIfPlatform = {"64-bit", "true"},
577 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
578 // 0 1 2 3 4 5 6 7 -
579 // | | | | | | | |
580 // | + + + | | | |
581 // | | | | |
582 // | v | | | | v
583 // | | | | | | |
584 // 1 - - 3 4 5 6 7 8
585 static Object[] test3a(short[] a, short[] b, short val) {
586 int sum = 0;
587 for (int i = 0; i < RANGE; i+=16) {
588 short a0 = a[i+0]; // required for alignment / offsets, technical limitation.
589
590 short a1 = a[i+1]; // adjacent to 4-pack, but need to be split off
591 short a2 = a[i+2];
592 short a3 = a[i+3];
593
594 short a4 = a[i+4]; // 4-pack
595 short a5 = a[i+5];
596 short a6 = a[i+6];
597 short a7 = a[i+7];
598
599
600 b[i+0] = a0; // required for alignment / offsets, technical limitation.
601
602 sum += a1 + a2 + a3; // not packed
603
604 b[i+3] = val; // adjacent to 4-pack but needs to be split off
605
606 b[i+4] = a4; // 4-pack
607 b[i+5] = a5;
608 b[i+6] = a6;
609 b[i+7] = a7;
610
611 b[i+8] = val; // adjacent to 4-pack but needs to be split off
612 }
613 return new Object[]{ a, b, new int[]{ sum } };
614 }
615
616 @Test
617 @IR(counts = {IRNode.LOAD_VECTOR_S, IRNode.VECTOR_SIZE_2, "> 0",
618 IRNode.STORE_VECTOR, "> 0",
619 ".*multiversion.*", "= 0"},
620 phase = CompilePhase.PRINT_IDEAL,
621 applyIf = {"UseAutoVectorizationSpeculativeAliasingChecks", "false"},
622 applyIfPlatform = {"64-bit", "true"},
623 applyIfCPUFeatureOr = {"sse4.1", "true"})
624 // Cyclic dependency with distance 2 -> split into 2-packs
625 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0",
626 IRNode.STORE_VECTOR, "> 0",
627 ".*multiversion.*", "= 0"},
628 phase = CompilePhase.PRINT_IDEAL,
629 applyIfAnd = {"UseAutoVectorizationSpeculativeAliasingChecks", "true", "AlignVector", "false"},
630 applyIfPlatform = {"64-bit", "true"},
631 applyIfCPUFeatureOr = {"sse4.1", "true"})
991
992 b[i+ 8] = (short)(a[i+ 8] + val); // 4-pack
993 b[i+ 9] = (short)(a[i+ 9] + val);
994 b[i+10] = (short)(a[i+10] + val);
995 b[i+11] = (short)(a[i+11] + val);
996
997 b[i+12] = (short)(a[i+12] + val); // 2-pack
998 b[i+13] = (short)(a[i+13] + val);
999
1000 b[i+14] = (short)(a[i+14] + val);
1001 }
1002 return new Object[]{ a, b };
1003 }
1004
1005 @Test
1006 @IR(counts = {IRNode.LOAD_VECTOR_I, IRNode.VECTOR_SIZE_4, "> 0",
1007 IRNode.MUL_VI, IRNode.VECTOR_SIZE_4, "> 0",
1008 IRNode.AND_VI, IRNode.VECTOR_SIZE_4, "> 0",
1009 IRNode.ADD_VI, IRNode.VECTOR_SIZE_4, "> 0", // reduction moved out of loop
1010 IRNode.ADD_REDUCTION_V, "> 0"},
1011 applyIf = {"MaxVectorSize", ">=32"},
1012 applyIfPlatform = {"64-bit", "true"},
1013 applyIfCPUFeatureOr = {"sse4.1", "true", "asimd", "true", "rvv", "true"})
1014 // Split packs including reductions
1015 static Object[] test6a(int[] a, int[] b) {
1016 int s = 0;
1017 for (int i = 0; i < RANGE; i+=8) {
1018 s += a[i+0] * b[i+0];
1019 s += a[i+1] * b[i+1];
1020 s += a[i+2] * b[i+2];
1021 s += a[i+3] * b[i+3];
1022
1023 s += a[i+4] & b[i+4];
1024 s += a[i+5] & b[i+5];
1025 s += a[i+6] & b[i+6];
1026 s += a[i+7] & b[i+7];
1027 }
1028 return new Object[]{ a, b, new int[]{ s } };
1029 }
1030
1031 @Test
1032 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0",
1033 IRNode.MUL_VI, "> 0",
1034 IRNode.POPULATE_INDEX, "> 0"},
1035 applyIfPlatform = {"64-bit", "true"},
1036 applyIfCPUFeatureOr = {"avx2", "true", "sve", "true", "rvv", "true"})
1037 // Index Populate:
1038 // There can be an issue when all the (iv + 1), (iv + 2), ...
1039 // get packed, but not (iv). Then we have a pack that is one element
1040 // too short, and we start splitting everything in a bad way.
1041 static Object[] test7a(int[] a, int[] b) {
1042 for (int i = 0; i < RANGE; i++) {
1043 a[i] = b[i] * i;
1044 }
1045 return new Object[]{ a, b };
1046 }
|