< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page




8341 %}
8342 
8343 instruct vshiftcntimm(vecS dst, immI8 cnt, rRegI tmp) %{
8344   match(Set dst cnt);
8345   effect(TEMP tmp);
8346   format %{ "movl    $tmp,$cnt\t"
8347             "movdl   $dst,$tmp\t! load shift count" %}
8348   ins_encode %{
8349     __ movl($tmp$$Register, $cnt$$constant);
8350     __ movdl($dst$$XMMRegister, $tmp$$Register);
8351   %}
8352   ins_pipe( pipe_slow );
8353 %}
8354 
8355 // Byte vector shift
8356 instruct vshift4B(vecS dst, vecS src, vecS shift, vecS tmp, rRegI scratch) %{
8357   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
8358   match(Set dst (LShiftVB src shift));
8359   match(Set dst (RShiftVB src shift));
8360   match(Set dst (URShiftVB src shift));
8361   effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
8362   format %{"vextendbw $tmp,$src\n\t"
8363            "vshiftw   $tmp,$shift\n\t"
8364            "movdqu    $dst,[0x00ff00ff0x00ff00ff]\n\t"
8365            "pand      $dst,$tmp\n\t"
8366            "packuswb  $dst,$dst\n\t ! packed4B shift" %}
8367   ins_encode %{
8368     int opcode = this->as_Mach()->ideal_Opcode();
8369 
8370     __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister);
8371     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
8372     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 
8373     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
8374     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
8375   %}
8376   ins_pipe( pipe_slow );
8377 %}
8378 
8379 instruct vshift8B(vecD dst, vecD src, vecS shift, vecD tmp, rRegI scratch) %{
8380   predicate(UseSSE > 3 && n->as_Vector()->length() == 8);
8381   match(Set dst (LShiftVB src shift));
8382   match(Set dst (RShiftVB src shift));
8383   match(Set dst (URShiftVB src shift));
8384   effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
8385   format %{"vextendbw $tmp,$src\n\t"
8386            "vshiftw   $tmp,$shift\n\t"
8387            "movdqu    $dst,[0x00ff00ff0x00ff00ff]\n\t"
8388            "pand      $dst,$tmp\n\t"
8389            "packuswb  $dst,$dst\n\t ! packed8B shift" %}
8390   ins_encode %{
8391     int opcode = this->as_Mach()->ideal_Opcode();
8392 
8393     __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister);
8394     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
8395     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 
8396     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
8397     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
8398   %}
8399   ins_pipe( pipe_slow );
8400 %}
8401 
8402 instruct vshift16B(vecX dst, vecX src, vecS shift, vecX tmp1, vecX tmp2, rRegI scratch) %{
8403   predicate(UseSSE > 3  && UseAVX <= 1 && n->as_Vector()->length() == 16);
8404   match(Set dst (LShiftVB src shift));
8405   match(Set dst (RShiftVB src shift));
8406   match(Set dst (URShiftVB src shift));
8407   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch);
8408   format %{"vextendbw $tmp1,$src\n\t"
8409            "vshiftw   $tmp1,$shift\n\t"
8410            "pshufd    $tmp2,$src\n\t"
8411            "vextendbw $tmp2,$tmp2\n\t"
8412            "vshiftw   $tmp2,$shift\n\t"
8413            "movdqu    $dst,[0x00ff00ff0x00ff00ff]\n\t"
8414            "pand      $tmp2,$dst\n\t"
8415            "pand      $dst,$tmp1\n\t"
8416            "packuswb  $dst,$tmp2\n\t! packed16B shift" %}
8417   ins_encode %{
8418     int opcode = this->as_Mach()->ideal_Opcode();
8419 
8420     __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister);
8421     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
8422     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
8423     __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
8424     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
8425     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
8426     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
8427     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
8428     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
8429   %}
8430   ins_pipe( pipe_slow );
8431 %}
8432 
8433 instruct vshift16B_avx(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{
8434   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8435   match(Set dst (LShiftVB src shift));
8436   match(Set dst (RShiftVB src shift));
8437   match(Set dst (URShiftVB src shift));
8438   effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
8439   format %{"vextendbw  $tmp,$src\n\t"
8440            "vshiftw    $tmp,$tmp,$shift\n\t"
8441            "vpand      $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t"
8442            "vextracti128_high  $dst,$tmp\n\t"
8443            "vpackuswb  $dst,$tmp,$dst\n\t! packed16B shift" %}
8444   ins_encode %{
8445     int opcode = this->as_Mach()->ideal_Opcode();
8446 
8447     int vector_len = 1;
8448     __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len);
8449     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
8450     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
8451     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
8452     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
8453   %}
8454   ins_pipe( pipe_slow );
8455 %}
8456 
8457 instruct vshift32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{
8458   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
8459   match(Set dst (LShiftVB src shift));
8460   match(Set dst (RShiftVB src shift));
8461   match(Set dst (URShiftVB src shift));
8462   effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
8463   format %{"vextracti128_high  $tmp,$src\n\t"
8464            "vextendbw  $tmp,$tmp\n\t"
8465            "vextendbw  $dst,$src\n\t"
8466            "vshiftw    $tmp,$tmp,$shift\n\t"
8467            "vshiftw    $dst,$dst,$shift\n\t"
8468            "vpand      $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t"
8469            "vpand      $dst,$dst,[0x00ff00ff0x00ff00ff]\n\t"
8470            "vpackuswb  $dst,$dst,$tmp\n\t"
8471            "vpermq     $dst,$dst,0xD8\n\t! packed32B shift" %}
8472   ins_encode %{
8473     int opcode = this->as_Mach()->ideal_Opcode();
8474 
8475     int vector_len = 1;
8476     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
8477     __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
8478     __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len);
8479     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
8480     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len);
8481     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
8482     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
8483     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
8484     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
8485   %}
8486   ins_pipe( pipe_slow );
8487 %}
8488 
8489 instruct vshift64B_avx(vecZ dst, vecZ src, vecS shift, vecZ tmp1, vecZ tmp2, rRegI scratch) %{
8490   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
8491   match(Set dst (LShiftVB src shift));
8492   match(Set dst (RShiftVB src shift));
8493   match(Set dst (URShiftVB src shift));
8494   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch);
8495   format %{"vextracti64x4  $tmp1,$src\n\t"
8496            "vextendbw      $tmp1,$tmp1\n\t"
8497            "vextendbw      $tmp2,$src\n\t"
8498            "vshiftw        $tmp1,$tmp1,$shift\n\t"
8499            "vshiftw        $tmp2,$tmp2,$shift\n\t"
8500            "vmovdqu        $dst,[0x00ff00ff0x00ff00ff]\n\t"
8501            "vpbroadcastd   $dst,$dst\n\t"
8502            "vpand          $tmp1,$tmp1,$dst\n\t"
8503            "vpand          $tmp2,$tmp2,$dst\n\t"
8504            "vpackuswb      $dst,$tmp1,$tmp2\n\t"
8505            "evmovdquq      $tmp2, [0x0604020007050301]\n\t"
8506            "vpermq         $dst,$tmp2,$dst\n\t! packed64B shift" %}
8507   ins_encode %{
8508     int opcode = this->as_Mach()->ideal_Opcode();
8509 
8510     int vector_len = 2;
8511     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
8512     __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
8513     __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len);
8514     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len);


8517     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
8518     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
8519     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
8520     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
8521     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
8522     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
8523   %}
8524   ins_pipe( pipe_slow );
8525 %}
8526 
8527 // Shorts vector logical right shift produces incorrect Java result
8528 // for negative data because java code convert short value into int with
8529 // sign extension before a shift. But char vectors are fine since chars are
8530 // unsigned values.
8531 // Shorts/Chars vector left shift
8532 instruct vshist2S(vecS dst, vecS src, vecS shift) %{
8533   predicate(n->as_Vector()->length() == 2);
8534   match(Set dst (LShiftVS src shift));
8535   match(Set dst (RShiftVS src shift));
8536   match(Set dst (URShiftVS src shift));
8537   effect(TEMP dst, USE src, USE shift);
8538   format %{ "vshiftw  $dst,$src,$shift\t! shift packed2S" %}
8539   ins_encode %{
8540     int opcode = this->as_Mach()->ideal_Opcode();
8541     if (UseAVX == 0) { 
8542       if ($dst$$XMMRegister != $src$$XMMRegister)
8543          __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8544       __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8545     } else {
8546       int vector_len = 0;
8547       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8548     }
8549   %}
8550   ins_pipe( pipe_slow );
8551 %}
8552 
8553 instruct vshift4S(vecD dst, vecD src, vecS shift) %{
8554   predicate(n->as_Vector()->length() == 4);
8555   match(Set dst (LShiftVS src shift));
8556   match(Set dst (RShiftVS src shift));
8557   match(Set dst (URShiftVS src shift));
8558   effect(TEMP dst, USE src, USE shift);
8559   format %{ "vshiftw  $dst,$src,$shift\t! shift packed4S" %}
8560   ins_encode %{
8561     int opcode = this->as_Mach()->ideal_Opcode();
8562     if (UseAVX == 0) { 
8563       if ($dst$$XMMRegister != $src$$XMMRegister)
8564          __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8565       __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8566     
8567     } else {
8568       int vector_len = 0;
8569       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8570     }
8571   %}
8572   ins_pipe( pipe_slow );
8573 %}
8574 
8575 instruct vshift8S(vecX dst, vecX src, vecS shift) %{
8576   predicate(n->as_Vector()->length() == 8);
8577   match(Set dst (LShiftVS src shift));
8578   match(Set dst (RShiftVS src shift));
8579   match(Set dst (URShiftVS src shift));
8580   effect(TEMP dst, USE src, USE shift);
8581   format %{ "vshiftw  $dst,$src,$shift\t! shift packed8S" %}
8582   ins_encode %{
8583     int opcode = this->as_Mach()->ideal_Opcode();
8584     if (UseAVX == 0) { 
8585       if ($dst$$XMMRegister != $src$$XMMRegister)
8586          __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
8587       __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8588     } else {
8589       int vector_len = 0;
8590       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8591     }
8592   %}
8593   ins_pipe( pipe_slow );
8594 %}
8595 
8596 instruct vshift16S(vecY dst, vecY src, vecS shift) %{
8597   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8598   match(Set dst (LShiftVS src shift));
8599   match(Set dst (RShiftVS src shift));
8600   match(Set dst (URShiftVS src shift));
8601   effect(DEF dst, USE src, USE shift);
8602   format %{ "vshiftw  $dst,$src,$shift\t! shift packed16S" %}
8603   ins_encode %{
8604     int vector_len = 1;
8605     int opcode = this->as_Mach()->ideal_Opcode();
8606     __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8607   %}
8608   ins_pipe( pipe_slow );
8609 %}
8610 
8611 instruct vshift32S(vecZ dst, vecZ src, vecS shift) %{
8612   predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
8613   match(Set dst (LShiftVS src shift));
8614   match(Set dst (RShiftVS src shift));
8615   match(Set dst (URShiftVS src shift));
8616   effect(DEF dst, USE src, USE shift);
8617   format %{ "vshiftw  $dst,$src,$shift\t! shift packed32S" %}
8618   ins_encode %{
8619     int vector_len = 2;
8620     int opcode = this->as_Mach()->ideal_Opcode();
8621     __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8622   %}
8623   ins_pipe( pipe_slow );
8624 %}
8625 
8626 // Integers vector left shift
8627 instruct vshift2I(vecD dst, vecD src, vecS shift) %{
8628   predicate(n->as_Vector()->length() == 2);
8629   match(Set dst (LShiftVI src shift));
8630   match(Set dst (RShiftVI src shift));
8631   match(Set dst (URShiftVI src shift));
8632   effect(TEMP dst, USE src, USE shift);
8633   format %{ "vshiftd  $dst,$src,$shift\t! shift packed2I" %}
8634   ins_encode %{
8635     int opcode = this->as_Mach()->ideal_Opcode();
8636     if (UseAVX == 0) { 
8637       if ($dst$$XMMRegister != $src$$XMMRegister)
8638          __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8639       __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8640     } else {
8641       int vector_len = 0;
8642       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8643     }
8644   %}
8645   ins_pipe( pipe_slow );
8646 %}
8647 
8648 instruct vshift4I(vecX dst, vecX src, vecS shift) %{
8649   predicate(n->as_Vector()->length() == 4);
8650   match(Set dst (LShiftVI src shift));
8651   match(Set dst (RShiftVI src shift));
8652   match(Set dst (URShiftVI src shift));
8653   effect(TEMP dst, USE src, USE shift);
8654   format %{ "vshiftd  $dst,$src,$shift\t! shift packed4I" %}
8655   ins_encode %{
8656     int opcode = this->as_Mach()->ideal_Opcode();
8657     if (UseAVX == 0) { 
8658       if ($dst$$XMMRegister != $src$$XMMRegister)
8659          __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
8660       __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8661     } else {
8662       int vector_len = 0;
8663       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8664     }
8665   %}
8666   ins_pipe( pipe_slow );
8667 %}
8668 
8669 instruct vshift8I(vecY dst, vecY src, vecS shift) %{
8670   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8671   match(Set dst (LShiftVI src shift));
8672   match(Set dst (RShiftVI src shift));
8673   match(Set dst (URShiftVI src shift));
8674   effect(DEF dst, USE src, USE shift);
8675   format %{ "vshiftd  $dst,$src,$shift\t! shift packed8I" %}
8676   ins_encode %{
8677     int vector_len = 1;
8678     int opcode = this->as_Mach()->ideal_Opcode();
8679     __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8680   %}
8681   ins_pipe( pipe_slow );
8682 %}
8683 
8684 instruct vshift16I(vecZ dst, vecZ src, vecS shift) %{
8685   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8686   match(Set dst (LShiftVI src shift));
8687   match(Set dst (RShiftVI src shift));
8688   match(Set dst (URShiftVI src shift));
8689   effect(DEF dst, USE src, USE shift);
8690   format %{ "vshiftd  $dst,$src,$shift\t! shift packed16I" %}
8691   ins_encode %{
8692     int vector_len = 2;
8693     int opcode = this->as_Mach()->ideal_Opcode();
8694     __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8695   %}
8696   ins_pipe( pipe_slow );
8697 %}
8698 
8699 // Longs vector shift
8700 instruct vshift2L(vecX dst, vecX src, vecS shift) %{
8701   predicate(n->as_Vector()->length() == 2);
8702   match(Set dst (LShiftVL src shift));
8703   match(Set dst (URShiftVL src shift));
8704   effect(TEMP dst, USE src, USE shift);
8705   format %{ "vshiftq  $dst,$src,$shift\t! shift packed2L" %}
8706   ins_encode %{
8707     int opcode = this->as_Mach()->ideal_Opcode();
8708     if (UseAVX == 0) { 
8709       if ($dst$$XMMRegister != $src$$XMMRegister)
8710          __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
8711       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8712     } else {
8713       int vector_len = 0;
8714       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8715     }
8716   %}
8717   ins_pipe( pipe_slow );
8718 %}
8719 
8720 instruct vshift4L(vecY dst, vecY src, vecS shift) %{
8721   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
8722   match(Set dst (LShiftVL src shift));
8723   match(Set dst (URShiftVL src shift));
8724   effect(DEF dst, USE src, USE shift);
8725   format %{ "vshiftq  $dst,$src,$shift\t! left shift packed4L" %}
8726   ins_encode %{
8727     int vector_len = 1;
8728     int opcode = this->as_Mach()->ideal_Opcode();
8729     __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8730   %}
8731   ins_pipe( pipe_slow );
8732 %}
8733 
8734 instruct vshift8L(vecZ dst, vecZ src, vecS shift) %{
8735   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8736   match(Set dst (LShiftVL src shift));
8737   match(Set dst (RShiftVL src shift));
8738   match(Set dst (URShiftVL src shift));
8739   effect(DEF dst, USE src, USE shift);
8740   format %{ "vshiftq  $dst,$src,$shift\t! shift packed8L" %}
8741   ins_encode %{
8742     int vector_len = 2;
8743     int opcode = this->as_Mach()->ideal_Opcode();
8744     __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8745   %}
8746   ins_pipe( pipe_slow );
8747 %}
8748 
8749 // -------------------ArithmeticRightShift -----------------------------------
8750 // Long vector arithmetic right shift
8751 instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{
8752   predicate(UseSSE >= 2 && n->as_Vector()->length() == 2);
8753   match(Set dst (RShiftVL src shift));
8754   effect(TEMP dst, TEMP tmp, TEMP scratch);
8755   format %{ "movdqu  $dst,$src\n\t"
8756             "psrlq   $dst,$shift\n\t"
8757             "movdqu  $tmp,[0x8000000000000000]\n\t"
8758             "psrlq   $tmp,$shift\n\t"
8759             "pxor    $dst,$tmp\n\t"




8341 %}
8342 
8343 instruct vshiftcntimm(vecS dst, immI8 cnt, rRegI tmp) %{
8344   match(Set dst cnt);
8345   effect(TEMP tmp);
8346   format %{ "movl    $tmp,$cnt\t"
8347             "movdl   $dst,$tmp\t! load shift count" %}
8348   ins_encode %{
8349     __ movl($tmp$$Register, $cnt$$constant);
8350     __ movdl($dst$$XMMRegister, $tmp$$Register);
8351   %}
8352   ins_pipe( pipe_slow );
8353 %}
8354 
8355 // Byte vector shift
8356 instruct vshift4B(vecS dst, vecS src, vecS shift, vecS tmp, rRegI scratch) %{
8357   predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
8358   match(Set dst (LShiftVB src shift));
8359   match(Set dst (RShiftVB src shift));
8360   match(Set dst (URShiftVB src shift));
8361   effect(TEMP dst, TEMP tmp, TEMP scratch);
8362   format %{"vextendbw $tmp,$src\n\t"
8363            "vshiftw   $tmp,$shift\n\t"
8364            "movdqu    $dst,[0x00ff00ff0x00ff00ff]\n\t"
8365            "pand      $dst,$tmp\n\t"
8366            "packuswb  $dst,$dst\n\t ! packed4B shift" %}
8367   ins_encode %{
8368     int opcode = this->as_Mach()->ideal_Opcode();
8369 
8370     __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister);
8371     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
8372     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 
8373     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
8374     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
8375   %}
8376   ins_pipe( pipe_slow );
8377 %}
8378 
8379 instruct vshift8B(vecD dst, vecD src, vecS shift, vecD tmp, rRegI scratch) %{
8380   predicate(UseSSE > 3 && n->as_Vector()->length() == 8);
8381   match(Set dst (LShiftVB src shift));
8382   match(Set dst (RShiftVB src shift));
8383   match(Set dst (URShiftVB src shift));
8384   effect(TEMP dst, TEMP tmp, TEMP scratch);
8385   format %{"vextendbw $tmp,$src\n\t"
8386            "vshiftw   $tmp,$shift\n\t"
8387            "movdqu    $dst,[0x00ff00ff0x00ff00ff]\n\t"
8388            "pand      $dst,$tmp\n\t"
8389            "packuswb  $dst,$dst\n\t ! packed8B shift" %}
8390   ins_encode %{
8391     int opcode = this->as_Mach()->ideal_Opcode();
8392 
8393     __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister);
8394     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
8395     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 
8396     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
8397     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
8398   %}
8399   ins_pipe( pipe_slow );
8400 %}
8401 
8402 instruct vshift16B(vecX dst, vecX src, vecS shift, vecX tmp1, vecX tmp2, rRegI scratch) %{
8403   predicate(UseSSE > 3  && UseAVX <= 1 && n->as_Vector()->length() == 16);
8404   match(Set dst (LShiftVB src shift));
8405   match(Set dst (RShiftVB src shift));
8406   match(Set dst (URShiftVB src shift));
8407   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
8408   format %{"vextendbw $tmp1,$src\n\t"
8409            "vshiftw   $tmp1,$shift\n\t"
8410            "pshufd    $tmp2,$src\n\t"
8411            "vextendbw $tmp2,$tmp2\n\t"
8412            "vshiftw   $tmp2,$shift\n\t"
8413            "movdqu    $dst,[0x00ff00ff0x00ff00ff]\n\t"
8414            "pand      $tmp2,$dst\n\t"
8415            "pand      $dst,$tmp1\n\t"
8416            "packuswb  $dst,$tmp2\n\t! packed16B shift" %}
8417   ins_encode %{
8418     int opcode = this->as_Mach()->ideal_Opcode();
8419 
8420     __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister);
8421     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
8422     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
8423     __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
8424     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
8425     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
8426     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
8427     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
8428     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
8429   %}
8430   ins_pipe( pipe_slow );
8431 %}
8432 
8433 instruct vshift16B_avx(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{
8434   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8435   match(Set dst (LShiftVB src shift));
8436   match(Set dst (RShiftVB src shift));
8437   match(Set dst (URShiftVB src shift));
8438   effect(TEMP dst, TEMP tmp, TEMP scratch);
8439   format %{"vextendbw  $tmp,$src\n\t"
8440            "vshiftw    $tmp,$tmp,$shift\n\t"
8441            "vpand      $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t"
8442            "vextracti128_high  $dst,$tmp\n\t"
8443            "vpackuswb  $dst,$tmp,$dst\n\t! packed16B shift" %}
8444   ins_encode %{
8445     int opcode = this->as_Mach()->ideal_Opcode();
8446 
8447     int vector_len = 1;
8448     __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len);
8449     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
8450     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
8451     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
8452     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
8453   %}
8454   ins_pipe( pipe_slow );
8455 %}
8456 
8457 instruct vshift32B_avx(vecY dst, vecY src, vecS shift, vecY tmp, rRegI scratch) %{
8458   predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
8459   match(Set dst (LShiftVB src shift));
8460   match(Set dst (RShiftVB src shift));
8461   match(Set dst (URShiftVB src shift));
8462   effect(TEMP dst, TEMP tmp, TEMP scratch);
8463   format %{"vextracti128_high  $tmp,$src\n\t"
8464            "vextendbw  $tmp,$tmp\n\t"
8465            "vextendbw  $dst,$src\n\t"
8466            "vshiftw    $tmp,$tmp,$shift\n\t"
8467            "vshiftw    $dst,$dst,$shift\n\t"
8468            "vpand      $tmp,$tmp,[0x00ff00ff0x00ff00ff]\n\t"
8469            "vpand      $dst,$dst,[0x00ff00ff0x00ff00ff]\n\t"
8470            "vpackuswb  $dst,$dst,$tmp\n\t"
8471            "vpermq     $dst,$dst,0xD8\n\t! packed32B shift" %}
8472   ins_encode %{
8473     int opcode = this->as_Mach()->ideal_Opcode();
8474 
8475     int vector_len = 1;
8476     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
8477     __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
8478     __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len);
8479     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
8480     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len);
8481     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
8482     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
8483     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
8484     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
8485   %}
8486   ins_pipe( pipe_slow );
8487 %}
8488 
8489 instruct vshift64B_avx(vecZ dst, vecZ src, vecS shift, vecZ tmp1, vecZ tmp2, rRegI scratch) %{
8490   predicate(UseAVX > 2 && n->as_Vector()->length() == 64);
8491   match(Set dst (LShiftVB src shift));
8492   match(Set dst (RShiftVB src shift));
8493   match(Set dst (URShiftVB src shift));
8494   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
8495   format %{"vextracti64x4  $tmp1,$src\n\t"
8496            "vextendbw      $tmp1,$tmp1\n\t"
8497            "vextendbw      $tmp2,$src\n\t"
8498            "vshiftw        $tmp1,$tmp1,$shift\n\t"
8499            "vshiftw        $tmp2,$tmp2,$shift\n\t"
8500            "vmovdqu        $dst,[0x00ff00ff0x00ff00ff]\n\t"
8501            "vpbroadcastd   $dst,$dst\n\t"
8502            "vpand          $tmp1,$tmp1,$dst\n\t"
8503            "vpand          $tmp2,$tmp2,$dst\n\t"
8504            "vpackuswb      $dst,$tmp1,$tmp2\n\t"
8505            "evmovdquq      $tmp2, [0x0604020007050301]\n\t"
8506            "vpermq         $dst,$tmp2,$dst\n\t! packed64B shift" %}
8507   ins_encode %{
8508     int opcode = this->as_Mach()->ideal_Opcode();
8509 
8510     int vector_len = 2;
8511     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
8512     __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
8513     __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len);
8514     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len);


8517     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
8518     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
8519     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
8520     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
8521     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
8522     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
8523   %}
8524   ins_pipe( pipe_slow );
8525 %}
8526 
8527 // Shorts vector logical right shift produces incorrect Java result
8528 // for negative data because java code convert short value into int with
8529 // sign extension before a shift. But char vectors are fine since chars are
8530 // unsigned values.
8531 // Shorts/Chars vector left shift
8532 instruct vshist2S(vecS dst, vecS src, vecS shift) %{
8533   predicate(n->as_Vector()->length() == 2);
8534   match(Set dst (LShiftVS src shift));
8535   match(Set dst (RShiftVS src shift));
8536   match(Set dst (URShiftVS src shift));

8537   format %{ "vshiftw  $dst,$src,$shift\t! shift packed2S" %}
8538   ins_encode %{
8539     int opcode = this->as_Mach()->ideal_Opcode();
8540     if (UseAVX == 0) { 
8541       if ($dst$$XMMRegister != $src$$XMMRegister)
8542          __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8543       __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8544     } else {
8545       int vector_len = 0;
8546       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8547     }
8548   %}
8549   ins_pipe( pipe_slow );
8550 %}
8551 
8552 instruct vshift4S(vecD dst, vecD src, vecS shift) %{
8553   predicate(n->as_Vector()->length() == 4);
8554   match(Set dst (LShiftVS src shift));
8555   match(Set dst (RShiftVS src shift));
8556   match(Set dst (URShiftVS src shift));

8557   format %{ "vshiftw  $dst,$src,$shift\t! shift packed4S" %}
8558   ins_encode %{
8559     int opcode = this->as_Mach()->ideal_Opcode();
8560     if (UseAVX == 0) { 
8561       if ($dst$$XMMRegister != $src$$XMMRegister)
8562          __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8563       __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8564     
8565     } else {
8566       int vector_len = 0;
8567       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8568     }
8569   %}
8570   ins_pipe( pipe_slow );
8571 %}
8572 
8573 instruct vshift8S(vecX dst, vecX src, vecS shift) %{
8574   predicate(n->as_Vector()->length() == 8);
8575   match(Set dst (LShiftVS src shift));
8576   match(Set dst (RShiftVS src shift));
8577   match(Set dst (URShiftVS src shift));

8578   format %{ "vshiftw  $dst,$src,$shift\t! shift packed8S" %}
8579   ins_encode %{
8580     int opcode = this->as_Mach()->ideal_Opcode();
8581     if (UseAVX == 0) { 
8582       if ($dst$$XMMRegister != $src$$XMMRegister)
8583          __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
8584       __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8585     } else {
8586       int vector_len = 0;
8587       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8588     }
8589   %}
8590   ins_pipe( pipe_slow );
8591 %}
8592 
8593 instruct vshift16S(vecY dst, vecY src, vecS shift) %{
8594   predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
8595   match(Set dst (LShiftVS src shift));
8596   match(Set dst (RShiftVS src shift));
8597   match(Set dst (URShiftVS src shift));

8598   format %{ "vshiftw  $dst,$src,$shift\t! shift packed16S" %}
8599   ins_encode %{
8600     int vector_len = 1;
8601     int opcode = this->as_Mach()->ideal_Opcode();
8602     __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8603   %}
8604   ins_pipe( pipe_slow );
8605 %}
8606 
8607 instruct vshift32S(vecZ dst, vecZ src, vecS shift) %{
8608   predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
8609   match(Set dst (LShiftVS src shift));
8610   match(Set dst (RShiftVS src shift));
8611   match(Set dst (URShiftVS src shift));

8612   format %{ "vshiftw  $dst,$src,$shift\t! shift packed32S" %}
8613   ins_encode %{
8614     int vector_len = 2;
8615     int opcode = this->as_Mach()->ideal_Opcode();
8616     __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8617   %}
8618   ins_pipe( pipe_slow );
8619 %}
8620 
8621 // Integers vector left shift
8622 instruct vshift2I(vecD dst, vecD src, vecS shift) %{
8623   predicate(n->as_Vector()->length() == 2);
8624   match(Set dst (LShiftVI src shift));
8625   match(Set dst (RShiftVI src shift));
8626   match(Set dst (URShiftVI src shift));

8627   format %{ "vshiftd  $dst,$src,$shift\t! shift packed2I" %}
8628   ins_encode %{
8629     int opcode = this->as_Mach()->ideal_Opcode();
8630     if (UseAVX == 0) { 
8631       if ($dst$$XMMRegister != $src$$XMMRegister)
8632          __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8633       __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8634     } else {
8635       int vector_len = 0;
8636       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8637     }
8638   %}
8639   ins_pipe( pipe_slow );
8640 %}
8641 
8642 instruct vshift4I(vecX dst, vecX src, vecS shift) %{
8643   predicate(n->as_Vector()->length() == 4);
8644   match(Set dst (LShiftVI src shift));
8645   match(Set dst (RShiftVI src shift));
8646   match(Set dst (URShiftVI src shift));

8647   format %{ "vshiftd  $dst,$src,$shift\t! shift packed4I" %}
8648   ins_encode %{
8649     int opcode = this->as_Mach()->ideal_Opcode();
8650     if (UseAVX == 0) { 
8651       if ($dst$$XMMRegister != $src$$XMMRegister)
8652          __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
8653       __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8654     } else {
8655       int vector_len = 0;
8656       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8657     }
8658   %}
8659   ins_pipe( pipe_slow );
8660 %}
8661 
8662 instruct vshift8I(vecY dst, vecY src, vecS shift) %{
8663   predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
8664   match(Set dst (LShiftVI src shift));
8665   match(Set dst (RShiftVI src shift));
8666   match(Set dst (URShiftVI src shift));

8667   format %{ "vshiftd  $dst,$src,$shift\t! shift packed8I" %}
8668   ins_encode %{
8669     int vector_len = 1;
8670     int opcode = this->as_Mach()->ideal_Opcode();
8671     __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8672   %}
8673   ins_pipe( pipe_slow );
8674 %}
8675 
8676 instruct vshift16I(vecZ dst, vecZ src, vecS shift) %{
8677   predicate(UseAVX > 2 && n->as_Vector()->length() == 16);
8678   match(Set dst (LShiftVI src shift));
8679   match(Set dst (RShiftVI src shift));
8680   match(Set dst (URShiftVI src shift));

8681   format %{ "vshiftd  $dst,$src,$shift\t! shift packed16I" %}
8682   ins_encode %{
8683     int vector_len = 2;
8684     int opcode = this->as_Mach()->ideal_Opcode();
8685     __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8686   %}
8687   ins_pipe( pipe_slow );
8688 %}
8689 
8690 // Longs vector shift
8691 instruct vshift2L(vecX dst, vecX src, vecS shift) %{
8692   predicate(n->as_Vector()->length() == 2);
8693   match(Set dst (LShiftVL src shift));
8694   match(Set dst (URShiftVL src shift));

8695   format %{ "vshiftq  $dst,$src,$shift\t! shift packed2L" %}
8696   ins_encode %{
8697     int opcode = this->as_Mach()->ideal_Opcode();
8698     if (UseAVX == 0) { 
8699       if ($dst$$XMMRegister != $src$$XMMRegister)
8700          __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
8701       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
8702     } else {
8703       int vector_len = 0;
8704       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8705     }
8706   %}
8707   ins_pipe( pipe_slow );
8708 %}
8709 
8710 instruct vshift4L(vecY dst, vecY src, vecS shift) %{
8711   predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
8712   match(Set dst (LShiftVL src shift));
8713   match(Set dst (URShiftVL src shift));

8714   format %{ "vshiftq  $dst,$src,$shift\t! left shift packed4L" %}
8715   ins_encode %{
8716     int vector_len = 1;
8717     int opcode = this->as_Mach()->ideal_Opcode();
8718     __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8719   %}
8720   ins_pipe( pipe_slow );
8721 %}
8722 
8723 instruct vshift8L(vecZ dst, vecZ src, vecS shift) %{
8724   predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
8725   match(Set dst (LShiftVL src shift));
8726   match(Set dst (RShiftVL src shift));
8727   match(Set dst (URShiftVL src shift));

8728   format %{ "vshiftq  $dst,$src,$shift\t! shift packed8L" %}
8729   ins_encode %{
8730     int vector_len = 2;
8731     int opcode = this->as_Mach()->ideal_Opcode();
8732     __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
8733   %}
8734   ins_pipe( pipe_slow );
8735 %}
8736 
8737 // -------------------ArithmeticRightShift -----------------------------------
8738 // Long vector arithmetic right shift
8739 instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp, rRegI scratch) %{
8740   predicate(UseSSE >= 2 && n->as_Vector()->length() == 2);
8741   match(Set dst (RShiftVL src shift));
8742   effect(TEMP dst, TEMP tmp, TEMP scratch);
8743   format %{ "movdqu  $dst,$src\n\t"
8744             "psrlq   $dst,$shift\n\t"
8745             "movdqu  $tmp,[0x8000000000000000]\n\t"
8746             "psrlq   $tmp,$shift\n\t"
8747             "pxor    $dst,$tmp\n\t"


< prev index next >