1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   // If any floating point comparison instruction is used, unordered case always triggers jump
 1703   // for below condition, CF=1 is true when at least one input is NaN
 1704   Label done;
 1705   __ movl(dst, -1);
 1706   __ jcc(Assembler::below, done);
 1707   __ setcc(Assembler::notEqual, dst);
 1708   __ bind(done);
 1709 }
 1710 
 1711 enum FP_PREC {
 1712   fp_prec_hlf,
 1713   fp_prec_flt,
 1714   fp_prec_dbl
 1715 };
 1716 
 1717 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
 1718                                 XMMRegister p, XMMRegister q) {
 1719   if (pt == fp_prec_hlf) {
 1720     __ evucomish(p, q);
 1721   } else if (pt == fp_prec_flt) {
 1722     __ ucomiss(p, q);
 1723   } else {
 1724     __ ucomisd(p, q);
 1725   }
 1726 }
 1727 
 1728 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
 1729                          XMMRegister dst, XMMRegister src, Register scratch) {
 1730   if (pt == fp_prec_hlf) {
 1731     __ movhlf(dst, src, scratch);
 1732   } else if (pt == fp_prec_flt) {
 1733     __ movflt(dst, src);
 1734   } else {
 1735     __ movdbl(dst, src);
 1736   }
 1737 }
 1738 
 1739 // Math.min()          # Math.max()
 1740 // -----------------------------
 1741 // (v)ucomis[h/s/d]    #
 1742 // ja   -> b           # a
 1743 // jp   -> NaN         # NaN
 1744 // jb   -> a           # b
 1745 // je   -> a | b       # a & b
 1746 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1747                             XMMRegister a, XMMRegister b, Register rt,
 1748                             bool min, enum FP_PREC pt) {
 1749   Label nan, zero, below, above, done;
 1750 
 1751   emit_fp_ucom(masm, pt, a, b);
 1752 
 1753   if (dst->encoding() != (min ? b : a)->encoding()) {
 1754     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1755   } else {
 1756     __ jccb(Assembler::above, done);
 1757   }
 1758   __ jccb(Assembler::parity, nan);  // PF=1
 1759   __ jccb(Assembler::below, below); // CF=1
 1760 
 1761   // equal
 1762   // Using bitwise operations is a low cost way to compute the correct result
 1763   // for zero and non-zero inputs in this scenario except for NaN, which is
 1764   // handled separately. The mantissa and exponent are valid with either
 1765   // bitwise operation. For zero inputs, the sign bit is chosen according to
 1766   // whether a minimum or maximum value is required.
 1767   if (min) {
 1768     // Negative sign preserved when available (e.g., min(+0, -0) -> -0)
 1769     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1770   } else {
 1771     // Positive sign preserved when available (e.g., max(+0, -0) -> +0)
 1772     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1773   }
 1774   __ jmp(done);
 1775 
 1776   __ bind(above);
 1777   movfp(masm, pt, dst, min ? b : a, rt);
 1778   __ jmp(done);
 1779 
 1780   __ bind(nan);
 1781   if (pt == fp_prec_hlf) {
 1782     __ movl(rt, 0x00007e00); // Float16.NaN
 1783     __ evmovw(dst, rt);
 1784   } else if (pt == fp_prec_flt) {
 1785     __ movl(rt, 0x7fc00000); // Float.NaN
 1786     __ movdl(dst, rt);
 1787   } else {
 1788     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1789     __ movdq(dst, rt);
 1790   }
 1791   __ jmp(done);
 1792 
 1793   __ bind(below);
 1794   movfp(masm, pt, dst, min ? a : b, rt);
 1795 
 1796   __ bind(done);
 1797 }
 1798 
 1799 //=============================================================================
 1800 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1801 
 1802 int ConstantTable::calculate_table_base_offset() const {
 1803   return 0;  // absolute addressing, no offset
 1804 }
 1805 
 1806 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1807 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1808   ShouldNotReachHere();
 1809 }
 1810 
 1811 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1812   // Empty encoding
 1813 }
 1814 
 1815 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1816   return 0;
 1817 }
 1818 
 1819 #ifndef PRODUCT
 1820 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1821   st->print("# MachConstantBaseNode (empty encoding)");
 1822 }
 1823 #endif
 1824 
 1825 
 1826 //=============================================================================
 1827 #ifndef PRODUCT
 1828 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1829   Compile* C = ra_->C;
 1830 
 1831   int framesize = C->output()->frame_size_in_bytes();
 1832   int bangsize = C->output()->bang_size_in_bytes();
 1833   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1834   // Remove wordSize for return addr which is already pushed.
 1835   framesize -= wordSize;
 1836 
 1837   if (C->output()->need_stack_bang(bangsize)) {
 1838     framesize -= wordSize;
 1839     st->print("# stack bang (%d bytes)", bangsize);
 1840     st->print("\n\t");
 1841     st->print("pushq   rbp\t# Save rbp");
 1842     if (PreserveFramePointer) {
 1843         st->print("\n\t");
 1844         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1845     }
 1846     if (framesize) {
 1847       st->print("\n\t");
 1848       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1849     }
 1850   } else {
 1851     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1852     st->print("\n\t");
 1853     framesize -= wordSize;
 1854     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1855     if (PreserveFramePointer) {
 1856       st->print("\n\t");
 1857       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1858       if (framesize > 0) {
 1859         st->print("\n\t");
 1860         st->print("addq    rbp, #%d", framesize);
 1861       }
 1862     }
 1863   }
 1864 
 1865   if (VerifyStackAtCalls) {
 1866     st->print("\n\t");
 1867     framesize -= wordSize;
 1868     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1869 #ifdef ASSERT
 1870     st->print("\n\t");
 1871     st->print("# stack alignment check");
 1872 #endif
 1873   }
 1874   if (C->stub_function() != nullptr) {
 1875     st->print("\n\t");
 1876     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1877     st->print("\n\t");
 1878     st->print("je      fast_entry\t");
 1879     st->print("\n\t");
 1880     st->print("call    #nmethod_entry_barrier_stub\t");
 1881     st->print("\n\tfast_entry:");
 1882   }
 1883   st->cr();
 1884 }
 1885 #endif
 1886 
 1887 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1888   Compile* C = ra_->C;
 1889 
 1890   int framesize = C->output()->frame_size_in_bytes();
 1891   int bangsize = C->output()->bang_size_in_bytes();
 1892 
 1893   if (C->clinit_barrier_on_entry()) {
 1894     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1895     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1896 
 1897     Label L_skip_barrier;
 1898     Register klass = rscratch1;
 1899 
 1900     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1901     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1902 
 1903     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1904 
 1905     __ bind(L_skip_barrier);
 1906   }
 1907 
 1908   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1909 
 1910   C->output()->set_frame_complete(__ offset());
 1911 
 1912   if (C->has_mach_constant_base_node()) {
 1913     // NOTE: We set the table base offset here because users might be
 1914     // emitted before MachConstantBaseNode.
 1915     ConstantTable& constant_table = C->output()->constant_table();
 1916     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1917   }
 1918 }
 1919 
 1920 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1921 {
 1922   return MachNode::size(ra_); // too many variables; just compute it
 1923                               // the hard way
 1924 }
 1925 
 1926 int MachPrologNode::reloc() const
 1927 {
 1928   return 0; // a large enough number
 1929 }
 1930 
 1931 //=============================================================================
 1932 #ifndef PRODUCT
 1933 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1934 {
 1935   Compile* C = ra_->C;
 1936   if (generate_vzeroupper(C)) {
 1937     st->print("vzeroupper");
 1938     st->cr(); st->print("\t");
 1939   }
 1940 
 1941   int framesize = C->output()->frame_size_in_bytes();
 1942   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1943   // Remove word for return adr already pushed
 1944   // and RBP
 1945   framesize -= 2*wordSize;
 1946 
 1947   if (framesize) {
 1948     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1949     st->print("\t");
 1950   }
 1951 
 1952   st->print_cr("popq    rbp");
 1953   if (do_polling() && C->is_method_compilation()) {
 1954     st->print("\t");
 1955     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1956                  "ja      #safepoint_stub\t"
 1957                  "# Safepoint: poll for GC");
 1958   }
 1959 }
 1960 #endif
 1961 
 1962 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1963 {
 1964   Compile* C = ra_->C;
 1965 
 1966   if (generate_vzeroupper(C)) {
 1967     // Clear upper bits of YMM registers when current compiled code uses
 1968     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1969     __ vzeroupper();
 1970   }
 1971 
 1972   int framesize = C->output()->frame_size_in_bytes();
 1973   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1974   // Remove word for return adr already pushed
 1975   // and RBP
 1976   framesize -= 2*wordSize;
 1977 
 1978   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1979 
 1980   if (framesize) {
 1981     __ addq(rsp, framesize);
 1982   }
 1983 
 1984   __ popq(rbp);
 1985 
 1986   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1987     __ reserved_stack_check();
 1988   }
 1989 
 1990   if (do_polling() && C->is_method_compilation()) {
 1991     Label dummy_label;
 1992     Label* code_stub = &dummy_label;
 1993     if (!C->output()->in_scratch_emit_size()) {
 1994       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 1995       C->output()->add_stub(stub);
 1996       code_stub = &stub->entry();
 1997     }
 1998     __ relocate(relocInfo::poll_return_type);
 1999     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 2000   }
 2001 }
 2002 
 2003 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 2004 {
 2005   return MachNode::size(ra_); // too many variables; just compute it
 2006                               // the hard way
 2007 }
 2008 
 2009 int MachEpilogNode::reloc() const
 2010 {
 2011   return 2; // a large enough number
 2012 }
 2013 
 2014 const Pipeline* MachEpilogNode::pipeline() const
 2015 {
 2016   return MachNode::pipeline_class();
 2017 }
 2018 
 2019 //=============================================================================
 2020 
 2021 enum RC {
 2022   rc_bad,
 2023   rc_int,
 2024   rc_kreg,
 2025   rc_float,
 2026   rc_stack
 2027 };
 2028 
 2029 static enum RC rc_class(OptoReg::Name reg)
 2030 {
 2031   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2032 
 2033   if (OptoReg::is_stack(reg)) return rc_stack;
 2034 
 2035   VMReg r = OptoReg::as_VMReg(reg);
 2036 
 2037   if (r->is_Register()) return rc_int;
 2038 
 2039   if (r->is_KRegister()) return rc_kreg;
 2040 
 2041   assert(r->is_XMMRegister(), "must be");
 2042   return rc_float;
 2043 }
 2044 
 2045 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2046 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2047                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2048 
 2049 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2050                      int stack_offset, int reg, uint ireg, outputStream* st);
 2051 
 2052 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2053                                       int dst_offset, uint ireg, outputStream* st) {
 2054   if (masm) {
 2055     switch (ireg) {
 2056     case Op_VecS:
 2057       __ movq(Address(rsp, -8), rax);
 2058       __ movl(rax, Address(rsp, src_offset));
 2059       __ movl(Address(rsp, dst_offset), rax);
 2060       __ movq(rax, Address(rsp, -8));
 2061       break;
 2062     case Op_VecD:
 2063       __ pushq(Address(rsp, src_offset));
 2064       __ popq (Address(rsp, dst_offset));
 2065       break;
 2066     case Op_VecX:
 2067       __ pushq(Address(rsp, src_offset));
 2068       __ popq (Address(rsp, dst_offset));
 2069       __ pushq(Address(rsp, src_offset+8));
 2070       __ popq (Address(rsp, dst_offset+8));
 2071       break;
 2072     case Op_VecY:
 2073       __ vmovdqu(Address(rsp, -32), xmm0);
 2074       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2075       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2076       __ vmovdqu(xmm0, Address(rsp, -32));
 2077       break;
 2078     case Op_VecZ:
 2079       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2080       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2081       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2082       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2083       break;
 2084     default:
 2085       ShouldNotReachHere();
 2086     }
 2087 #ifndef PRODUCT
 2088   } else {
 2089     switch (ireg) {
 2090     case Op_VecS:
 2091       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2092                 "movl    rax, [rsp + #%d]\n\t"
 2093                 "movl    [rsp + #%d], rax\n\t"
 2094                 "movq    rax, [rsp - #8]",
 2095                 src_offset, dst_offset);
 2096       break;
 2097     case Op_VecD:
 2098       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2099                 "popq    [rsp + #%d]",
 2100                 src_offset, dst_offset);
 2101       break;
 2102      case Op_VecX:
 2103       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2104                 "popq    [rsp + #%d]\n\t"
 2105                 "pushq   [rsp + #%d]\n\t"
 2106                 "popq    [rsp + #%d]",
 2107                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2108       break;
 2109     case Op_VecY:
 2110       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2111                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2112                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2113                 "vmovdqu xmm0, [rsp - #32]",
 2114                 src_offset, dst_offset);
 2115       break;
 2116     case Op_VecZ:
 2117       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2118                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2119                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2120                 "vmovdqu xmm0, [rsp - #64]",
 2121                 src_offset, dst_offset);
 2122       break;
 2123     default:
 2124       ShouldNotReachHere();
 2125     }
 2126 #endif
 2127   }
 2128 }
 2129 
 2130 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2131                                        PhaseRegAlloc* ra_,
 2132                                        bool do_size,
 2133                                        outputStream* st) const {
 2134   assert(masm != nullptr || st  != nullptr, "sanity");
 2135   // Get registers to move
 2136   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2137   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2138   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2139   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2140 
 2141   enum RC src_second_rc = rc_class(src_second);
 2142   enum RC src_first_rc = rc_class(src_first);
 2143   enum RC dst_second_rc = rc_class(dst_second);
 2144   enum RC dst_first_rc = rc_class(dst_first);
 2145 
 2146   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2147          "must move at least 1 register" );
 2148 
 2149   if (src_first == dst_first && src_second == dst_second) {
 2150     // Self copy, no move
 2151     return 0;
 2152   }
 2153   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_pvectmask() == nullptr) {
 2154     uint ireg = ideal_reg();
 2155     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2156     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2157     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2158       // mem -> mem
 2159       int src_offset = ra_->reg2offset(src_first);
 2160       int dst_offset = ra_->reg2offset(dst_first);
 2161       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2162     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2163       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2164     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2165       int stack_offset = ra_->reg2offset(dst_first);
 2166       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2167     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2168       int stack_offset = ra_->reg2offset(src_first);
 2169       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2170     } else {
 2171       ShouldNotReachHere();
 2172     }
 2173     return 0;
 2174   }
 2175   if (src_first_rc == rc_stack) {
 2176     // mem ->
 2177     if (dst_first_rc == rc_stack) {
 2178       // mem -> mem
 2179       assert(src_second != dst_first, "overlap");
 2180       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2181           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2182         // 64-bit
 2183         int src_offset = ra_->reg2offset(src_first);
 2184         int dst_offset = ra_->reg2offset(dst_first);
 2185         if (masm) {
 2186           __ pushq(Address(rsp, src_offset));
 2187           __ popq (Address(rsp, dst_offset));
 2188 #ifndef PRODUCT
 2189         } else {
 2190           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2191                     "popq    [rsp + #%d]",
 2192                      src_offset, dst_offset);
 2193 #endif
 2194         }
 2195       } else {
 2196         // 32-bit
 2197         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2198         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2199         // No pushl/popl, so:
 2200         int src_offset = ra_->reg2offset(src_first);
 2201         int dst_offset = ra_->reg2offset(dst_first);
 2202         if (masm) {
 2203           __ movq(Address(rsp, -8), rax);
 2204           __ movl(rax, Address(rsp, src_offset));
 2205           __ movl(Address(rsp, dst_offset), rax);
 2206           __ movq(rax, Address(rsp, -8));
 2207 #ifndef PRODUCT
 2208         } else {
 2209           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2210                     "movl    rax, [rsp + #%d]\n\t"
 2211                     "movl    [rsp + #%d], rax\n\t"
 2212                     "movq    rax, [rsp - #8]",
 2213                      src_offset, dst_offset);
 2214 #endif
 2215         }
 2216       }
 2217       return 0;
 2218     } else if (dst_first_rc == rc_int) {
 2219       // mem -> gpr
 2220       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2221           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2222         // 64-bit
 2223         int offset = ra_->reg2offset(src_first);
 2224         if (masm) {
 2225           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2226 #ifndef PRODUCT
 2227         } else {
 2228           st->print("movq    %s, [rsp + #%d]\t# spill",
 2229                      Matcher::regName[dst_first],
 2230                      offset);
 2231 #endif
 2232         }
 2233       } else {
 2234         // 32-bit
 2235         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2236         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2237         int offset = ra_->reg2offset(src_first);
 2238         if (masm) {
 2239           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2240 #ifndef PRODUCT
 2241         } else {
 2242           st->print("movl    %s, [rsp + #%d]\t# spill",
 2243                      Matcher::regName[dst_first],
 2244                      offset);
 2245 #endif
 2246         }
 2247       }
 2248       return 0;
 2249     } else if (dst_first_rc == rc_float) {
 2250       // mem-> xmm
 2251       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2252           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2253         // 64-bit
 2254         int offset = ra_->reg2offset(src_first);
 2255         if (masm) {
 2256           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2257 #ifndef PRODUCT
 2258         } else {
 2259           st->print("%s  %s, [rsp + #%d]\t# spill",
 2260                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2261                      Matcher::regName[dst_first],
 2262                      offset);
 2263 #endif
 2264         }
 2265       } else {
 2266         // 32-bit
 2267         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2268         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2269         int offset = ra_->reg2offset(src_first);
 2270         if (masm) {
 2271           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2272 #ifndef PRODUCT
 2273         } else {
 2274           st->print("movss   %s, [rsp + #%d]\t# spill",
 2275                      Matcher::regName[dst_first],
 2276                      offset);
 2277 #endif
 2278         }
 2279       }
 2280       return 0;
 2281     } else if (dst_first_rc == rc_kreg) {
 2282       // mem -> kreg
 2283       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2284           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2285         // 64-bit
 2286         int offset = ra_->reg2offset(src_first);
 2287         if (masm) {
 2288           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2289 #ifndef PRODUCT
 2290         } else {
 2291           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2292                      Matcher::regName[dst_first],
 2293                      offset);
 2294 #endif
 2295         }
 2296       }
 2297       return 0;
 2298     }
 2299   } else if (src_first_rc == rc_int) {
 2300     // gpr ->
 2301     if (dst_first_rc == rc_stack) {
 2302       // gpr -> mem
 2303       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2304           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2305         // 64-bit
 2306         int offset = ra_->reg2offset(dst_first);
 2307         if (masm) {
 2308           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2309 #ifndef PRODUCT
 2310         } else {
 2311           st->print("movq    [rsp + #%d], %s\t# spill",
 2312                      offset,
 2313                      Matcher::regName[src_first]);
 2314 #endif
 2315         }
 2316       } else {
 2317         // 32-bit
 2318         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2319         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2320         int offset = ra_->reg2offset(dst_first);
 2321         if (masm) {
 2322           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2323 #ifndef PRODUCT
 2324         } else {
 2325           st->print("movl    [rsp + #%d], %s\t# spill",
 2326                      offset,
 2327                      Matcher::regName[src_first]);
 2328 #endif
 2329         }
 2330       }
 2331       return 0;
 2332     } else if (dst_first_rc == rc_int) {
 2333       // gpr -> gpr
 2334       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2335           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2336         // 64-bit
 2337         if (masm) {
 2338           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2339                   as_Register(Matcher::_regEncode[src_first]));
 2340 #ifndef PRODUCT
 2341         } else {
 2342           st->print("movq    %s, %s\t# spill",
 2343                      Matcher::regName[dst_first],
 2344                      Matcher::regName[src_first]);
 2345 #endif
 2346         }
 2347         return 0;
 2348       } else {
 2349         // 32-bit
 2350         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2351         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2352         if (masm) {
 2353           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2354                   as_Register(Matcher::_regEncode[src_first]));
 2355 #ifndef PRODUCT
 2356         } else {
 2357           st->print("movl    %s, %s\t# spill",
 2358                      Matcher::regName[dst_first],
 2359                      Matcher::regName[src_first]);
 2360 #endif
 2361         }
 2362         return 0;
 2363       }
 2364     } else if (dst_first_rc == rc_float) {
 2365       // gpr -> xmm
 2366       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2367           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2368         // 64-bit
 2369         if (masm) {
 2370           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2371 #ifndef PRODUCT
 2372         } else {
 2373           st->print("movdq   %s, %s\t# spill",
 2374                      Matcher::regName[dst_first],
 2375                      Matcher::regName[src_first]);
 2376 #endif
 2377         }
 2378       } else {
 2379         // 32-bit
 2380         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2381         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2382         if (masm) {
 2383           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2384 #ifndef PRODUCT
 2385         } else {
 2386           st->print("movdl   %s, %s\t# spill",
 2387                      Matcher::regName[dst_first],
 2388                      Matcher::regName[src_first]);
 2389 #endif
 2390         }
 2391       }
 2392       return 0;
 2393     } else if (dst_first_rc == rc_kreg) {
 2394       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2395           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2396         // 64-bit
 2397         if (masm) {
 2398           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2399   #ifndef PRODUCT
 2400         } else {
 2401            st->print("kmovq   %s, %s\t# spill",
 2402                        Matcher::regName[dst_first],
 2403                        Matcher::regName[src_first]);
 2404   #endif
 2405         }
 2406       }
 2407       Unimplemented();
 2408       return 0;
 2409     }
 2410   } else if (src_first_rc == rc_float) {
 2411     // xmm ->
 2412     if (dst_first_rc == rc_stack) {
 2413       // xmm -> mem
 2414       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2415           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2416         // 64-bit
 2417         int offset = ra_->reg2offset(dst_first);
 2418         if (masm) {
 2419           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2420 #ifndef PRODUCT
 2421         } else {
 2422           st->print("movsd   [rsp + #%d], %s\t# spill",
 2423                      offset,
 2424                      Matcher::regName[src_first]);
 2425 #endif
 2426         }
 2427       } else {
 2428         // 32-bit
 2429         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2430         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2431         int offset = ra_->reg2offset(dst_first);
 2432         if (masm) {
 2433           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2434 #ifndef PRODUCT
 2435         } else {
 2436           st->print("movss   [rsp + #%d], %s\t# spill",
 2437                      offset,
 2438                      Matcher::regName[src_first]);
 2439 #endif
 2440         }
 2441       }
 2442       return 0;
 2443     } else if (dst_first_rc == rc_int) {
 2444       // xmm -> gpr
 2445       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2446           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2447         // 64-bit
 2448         if (masm) {
 2449           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2450 #ifndef PRODUCT
 2451         } else {
 2452           st->print("movdq   %s, %s\t# spill",
 2453                      Matcher::regName[dst_first],
 2454                      Matcher::regName[src_first]);
 2455 #endif
 2456         }
 2457       } else {
 2458         // 32-bit
 2459         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2460         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2461         if (masm) {
 2462           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2463 #ifndef PRODUCT
 2464         } else {
 2465           st->print("movdl   %s, %s\t# spill",
 2466                      Matcher::regName[dst_first],
 2467                      Matcher::regName[src_first]);
 2468 #endif
 2469         }
 2470       }
 2471       return 0;
 2472     } else if (dst_first_rc == rc_float) {
 2473       // xmm -> xmm
 2474       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2475           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2476         // 64-bit
 2477         if (masm) {
 2478           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2479 #ifndef PRODUCT
 2480         } else {
 2481           st->print("%s  %s, %s\t# spill",
 2482                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2483                      Matcher::regName[dst_first],
 2484                      Matcher::regName[src_first]);
 2485 #endif
 2486         }
 2487       } else {
 2488         // 32-bit
 2489         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2490         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2491         if (masm) {
 2492           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2493 #ifndef PRODUCT
 2494         } else {
 2495           st->print("%s  %s, %s\t# spill",
 2496                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2497                      Matcher::regName[dst_first],
 2498                      Matcher::regName[src_first]);
 2499 #endif
 2500         }
 2501       }
 2502       return 0;
 2503     } else if (dst_first_rc == rc_kreg) {
 2504       assert(false, "Illegal spilling");
 2505       return 0;
 2506     }
 2507   } else if (src_first_rc == rc_kreg) {
 2508     if (dst_first_rc == rc_stack) {
 2509       // mem -> kreg
 2510       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2511           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2512         // 64-bit
 2513         int offset = ra_->reg2offset(dst_first);
 2514         if (masm) {
 2515           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2516 #ifndef PRODUCT
 2517         } else {
 2518           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2519                      offset,
 2520                      Matcher::regName[src_first]);
 2521 #endif
 2522         }
 2523       }
 2524       return 0;
 2525     } else if (dst_first_rc == rc_int) {
 2526       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2527           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2528         // 64-bit
 2529         if (masm) {
 2530           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2531 #ifndef PRODUCT
 2532         } else {
 2533          st->print("kmovq   %s, %s\t# spill",
 2534                      Matcher::regName[dst_first],
 2535                      Matcher::regName[src_first]);
 2536 #endif
 2537         }
 2538       }
 2539       Unimplemented();
 2540       return 0;
 2541     } else if (dst_first_rc == rc_kreg) {
 2542       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2543           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2544         // 64-bit
 2545         if (masm) {
 2546           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2547 #ifndef PRODUCT
 2548         } else {
 2549          st->print("kmovq   %s, %s\t# spill",
 2550                      Matcher::regName[dst_first],
 2551                      Matcher::regName[src_first]);
 2552 #endif
 2553         }
 2554       }
 2555       return 0;
 2556     } else if (dst_first_rc == rc_float) {
 2557       assert(false, "Illegal spill");
 2558       return 0;
 2559     }
 2560   }
 2561 
 2562   assert(0," foo ");
 2563   Unimplemented();
 2564   return 0;
 2565 }
 2566 
 2567 #ifndef PRODUCT
 2568 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2569   implementation(nullptr, ra_, false, st);
 2570 }
 2571 #endif
 2572 
 2573 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2574   implementation(masm, ra_, false, nullptr);
 2575 }
 2576 
 2577 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2578   return MachNode::size(ra_);
 2579 }
 2580 
 2581 //=============================================================================
 2582 #ifndef PRODUCT
 2583 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2584 {
 2585   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2586   int reg = ra_->get_reg_first(this);
 2587   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2588             Matcher::regName[reg], offset);
 2589 }
 2590 #endif
 2591 
 2592 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2593 {
 2594   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2595   int reg = ra_->get_encode(this);
 2596 
 2597   __ lea(as_Register(reg), Address(rsp, offset));
 2598 }
 2599 
 2600 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2601 {
 2602   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2603   if (ra_->get_encode(this) > 15) {
 2604     return (offset < 0x80) ? 6 : 9; // REX2
 2605   } else {
 2606     return (offset < 0x80) ? 5 : 8; // REX
 2607   }
 2608 }
 2609 
 2610 //=============================================================================
 2611 #ifndef PRODUCT
 2612 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2613 {
 2614   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2615   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2616   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2617 }
 2618 #endif
 2619 
 2620 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2621 {
 2622   __ ic_check(InteriorEntryAlignment);
 2623 }
 2624 
 2625 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2626 {
 2627   return MachNode::size(ra_); // too many variables; just compute it
 2628                               // the hard way
 2629 }
 2630 
 2631 
 2632 //=============================================================================
 2633 
 2634 bool Matcher::supports_vector_calling_convention(void) {
 2635   return EnableVectorSupport;
 2636 }
 2637 
 2638 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2639   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2640 }
 2641 
 2642 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2643   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2644 }
 2645 
 2646 #ifdef ASSERT
 2647 static bool is_ndd_demotable(const MachNode* mdef) {
 2648   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2649 }
 2650 #endif
 2651 
 2652 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2653                                             int oper_index) {
 2654   if (mdef == nullptr) {
 2655     return false;
 2656   }
 2657 
 2658   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2659       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2660     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2661     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2662     return false;
 2663   }
 2664 
 2665   // Complex memory operand covers multiple incoming edges needed for
 2666   // address computation. Biasing def towards any address component will not
 2667   // result in NDD demotion by assembler.
 2668   if (mdef->operand_num_edges(oper_index) != 1) {
 2669     return false;
 2670   }
 2671 
 2672   // Demotion candidate must be register mask compatible with definition.
 2673   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2674   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2675     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2676     return false;
 2677   }
 2678 
 2679   switch (oper_index) {
 2680   // First operand of MachNode corresponding to Intel APX NDD selection
 2681   // pattern can share its assigned register with definition operand if
 2682   // their live ranges do not overlap. In such a scenario we can demote
 2683   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2684   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2685   // are decorated with a special flag by instruction selector.
 2686   case 1:
 2687     return is_ndd_demotable_opr1(mdef);
 2688 
 2689   // Definition operand of commutative operation can be biased towards second
 2690   // operand.
 2691   case 2:
 2692     return is_ndd_demotable_opr2(mdef);
 2693 
 2694   // Current scheme only selects up to two biasing candidates
 2695   default:
 2696     assert(false, "unhandled operand index: %s", mdef->Name());
 2697     break;
 2698   }
 2699 
 2700   return false;
 2701 }
 2702 
 2703 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2704   assert(EnableVectorSupport, "sanity");
 2705   int lo = XMM0_num;
 2706   int hi = XMM0b_num;
 2707   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2708   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2709   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2710   return OptoRegPair(hi, lo);
 2711 }
 2712 
 2713 // Is this branch offset short enough that a short branch can be used?
 2714 //
 2715 // NOTE: If the platform does not provide any short branch variants, then
 2716 //       this method should return false for offset 0.
 2717 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2718   // The passed offset is relative to address of the branch.
 2719   // On 86 a branch displacement is calculated relative to address
 2720   // of a next instruction.
 2721   offset -= br_size;
 2722 
 2723   // the short version of jmpConUCF2 contains multiple branches,
 2724   // making the reach slightly less
 2725   if (rule == jmpConUCF2_rule)
 2726     return (-126 <= offset && offset <= 125);
 2727   return (-128 <= offset && offset <= 127);
 2728 }
 2729 
 2730 #ifdef ASSERT
 2731 // Return whether or not this register is ever used as an argument.
 2732 bool Matcher::can_be_java_arg(int reg)
 2733 {
 2734   return
 2735     reg ==  RDI_num || reg == RDI_H_num ||
 2736     reg ==  RSI_num || reg == RSI_H_num ||
 2737     reg ==  RDX_num || reg == RDX_H_num ||
 2738     reg ==  RCX_num || reg == RCX_H_num ||
 2739     reg ==   R8_num || reg ==  R8_H_num ||
 2740     reg ==   R9_num || reg ==  R9_H_num ||
 2741     reg ==  R12_num || reg == R12_H_num ||
 2742     reg == XMM0_num || reg == XMM0b_num ||
 2743     reg == XMM1_num || reg == XMM1b_num ||
 2744     reg == XMM2_num || reg == XMM2b_num ||
 2745     reg == XMM3_num || reg == XMM3b_num ||
 2746     reg == XMM4_num || reg == XMM4b_num ||
 2747     reg == XMM5_num || reg == XMM5b_num ||
 2748     reg == XMM6_num || reg == XMM6b_num ||
 2749     reg == XMM7_num || reg == XMM7b_num;
 2750 }
 2751 #endif
 2752 
 2753 uint Matcher::int_pressure_limit()
 2754 {
 2755   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2756 }
 2757 
 2758 uint Matcher::float_pressure_limit()
 2759 {
 2760   // After experiment around with different values, the following default threshold
 2761   // works best for LCM's register pressure scheduling on x64.
 2762   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2763   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2764   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2765 }
 2766 
 2767 // Register for DIVI projection of divmodI
 2768 const RegMask& Matcher::divI_proj_mask() {
 2769   return INT_RAX_REG_mask();
 2770 }
 2771 
 2772 // Register for MODI projection of divmodI
 2773 const RegMask& Matcher::modI_proj_mask() {
 2774   return INT_RDX_REG_mask();
 2775 }
 2776 
 2777 // Register for DIVL projection of divmodL
 2778 const RegMask& Matcher::divL_proj_mask() {
 2779   return LONG_RAX_REG_mask();
 2780 }
 2781 
 2782 // Register for MODL projection of divmodL
 2783 const RegMask& Matcher::modL_proj_mask() {
 2784   return LONG_RDX_REG_mask();
 2785 }
 2786 
 2787 %}
 2788 
 2789 source_hpp %{
 2790 // Header information of the source block.
 2791 // Method declarations/definitions which are used outside
 2792 // the ad-scope can conveniently be defined here.
 2793 //
 2794 // To keep related declarations/definitions/uses close together,
 2795 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2796 
 2797 #include "runtime/vm_version.hpp"
 2798 
 2799 class NativeJump;
 2800 
 2801 class CallStubImpl {
 2802 
 2803   //--------------------------------------------------------------
 2804   //---<  Used for optimization in Compile::shorten_branches  >---
 2805   //--------------------------------------------------------------
 2806 
 2807  public:
 2808   // Size of call trampoline stub.
 2809   static uint size_call_trampoline() {
 2810     return 0; // no call trampolines on this platform
 2811   }
 2812 
 2813   // number of relocations needed by a call trampoline stub
 2814   static uint reloc_call_trampoline() {
 2815     return 0; // no call trampolines on this platform
 2816   }
 2817 };
 2818 
 2819 class HandlerImpl {
 2820 
 2821  public:
 2822 
 2823   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2824 
 2825   static uint size_deopt_handler() {
 2826     // one call and one jmp.
 2827     return 7;
 2828   }
 2829 };
 2830 
 2831 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2832   switch(bytes) {
 2833     case  4: // fall-through
 2834     case  8: // fall-through
 2835     case 16: return Assembler::AVX_128bit;
 2836     case 32: return Assembler::AVX_256bit;
 2837     case 64: return Assembler::AVX_512bit;
 2838 
 2839     default: {
 2840       ShouldNotReachHere();
 2841       return Assembler::AVX_NoVec;
 2842     }
 2843   }
 2844 }
 2845 
 2846 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2847   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2848 }
 2849 
 2850 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2851   uint def_idx = use->operand_index(opnd);
 2852   Node* def = use->in(def_idx);
 2853   return vector_length_encoding(def);
 2854 }
 2855 
 2856 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2857   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2858          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2859 }
 2860 
 2861 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2862   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2863            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2864 }
 2865 
 2866 class Node::PD {
 2867 public:
 2868   enum NodeFlags : uint64_t {
 2869     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2870     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2871     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2872     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2873     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2874     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2875     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2876     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2877     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2878     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2879     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2880     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2881     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2882     _last_flag                = Flag_ndd_demotable_opr2
 2883   };
 2884 };
 2885 
 2886 %} // end source_hpp
 2887 
 2888 source %{
 2889 
 2890 #include "opto/addnode.hpp"
 2891 #include "c2_intelJccErratum_x86.hpp"
 2892 
 2893 void PhaseOutput::pd_perform_mach_node_analysis() {
 2894   if (VM_Version::has_intel_jcc_erratum()) {
 2895     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2896     _buf_sizes._code += extra_padding;
 2897   }
 2898 }
 2899 
 2900 int MachNode::pd_alignment_required() const {
 2901   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2902     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2903     return IntelJccErratum::largest_jcc_size() + 1;
 2904   } else {
 2905     return 1;
 2906   }
 2907 }
 2908 
 2909 int MachNode::compute_padding(int current_offset) const {
 2910   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2911     Compile* C = Compile::current();
 2912     PhaseOutput* output = C->output();
 2913     Block* block = output->block();
 2914     int index = output->index();
 2915     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2916   } else {
 2917     return 0;
 2918   }
 2919 }
 2920 
 2921 // Emit deopt handler code.
 2922 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2923 
 2924   // Note that the code buffer's insts_mark is always relative to insts.
 2925   // That's why we must use the macroassembler to generate a handler.
 2926   address base = __ start_a_stub(size_deopt_handler());
 2927   if (base == nullptr) {
 2928     ciEnv::current()->record_failure("CodeCache is full");
 2929     return 0;  // CodeBuffer::expand failed
 2930   }
 2931   int offset = __ offset();
 2932 
 2933   Label start;
 2934   __ bind(start);
 2935 
 2936   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2937 
 2938   int entry_offset = __ offset();
 2939 
 2940   __ jmp(start);
 2941 
 2942   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2943   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2944          "out of bounds read in post-call NOP check");
 2945   __ end_a_stub();
 2946   return entry_offset;
 2947 }
 2948 
 2949 static Assembler::Width widthForType(BasicType bt) {
 2950   if (bt == T_BYTE) {
 2951     return Assembler::B;
 2952   } else if (bt == T_SHORT) {
 2953     return Assembler::W;
 2954   } else if (bt == T_INT) {
 2955     return Assembler::D;
 2956   } else {
 2957     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2958     return Assembler::Q;
 2959   }
 2960 }
 2961 
 2962 //=============================================================================
 2963 
 2964   // Float masks come from different places depending on platform.
 2965   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2966   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2967   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2968   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2969   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2970   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2971   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2972   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2973   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2974   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2975   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2976   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2977   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2978   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2979   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2980   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2981   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2982   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2983   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2984 
 2985 //=============================================================================
 2986 bool Matcher::match_rule_supported(int opcode) {
 2987   if (!has_match_rule(opcode)) {
 2988     return false; // no match rule present
 2989   }
 2990   switch (opcode) {
 2991     case Op_AbsVL:
 2992     case Op_StoreVectorScatter:
 2993       if (UseAVX < 3) {
 2994         return false;
 2995       }
 2996       break;
 2997     case Op_PopCountI:
 2998     case Op_PopCountL:
 2999       if (!UsePopCountInstruction) {
 3000         return false;
 3001       }
 3002       break;
 3003     case Op_PopCountVI:
 3004       if (UseAVX < 2) {
 3005         return false;
 3006       }
 3007       break;
 3008     case Op_CompressV:
 3009     case Op_ExpandV:
 3010     case Op_PopCountVL:
 3011       if (UseAVX < 2) {
 3012         return false;
 3013       }
 3014       break;
 3015     case Op_MulVI:
 3016       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3017         return false;
 3018       }
 3019       break;
 3020     case Op_MulVL:
 3021       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3022         return false;
 3023       }
 3024       break;
 3025     case Op_MulReductionVL:
 3026       if (VM_Version::supports_avx512dq() == false) {
 3027         return false;
 3028       }
 3029       break;
 3030     case Op_AbsVB:
 3031     case Op_AbsVS:
 3032     case Op_AbsVI:
 3033     case Op_AddReductionVI:
 3034     case Op_AndReductionV:
 3035     case Op_OrReductionV:
 3036     case Op_XorReductionV:
 3037       if (UseSSE < 3) { // requires at least SSSE3
 3038         return false;
 3039       }
 3040       break;
 3041     case Op_MaxHF:
 3042     case Op_MinHF:
 3043       if (!VM_Version::supports_avx512vlbw()) {
 3044         return false;
 3045       }  // fallthrough
 3046     case Op_AddHF:
 3047     case Op_DivHF:
 3048     case Op_FmaHF:
 3049     case Op_MulHF:
 3050     case Op_ReinterpretS2HF:
 3051     case Op_ReinterpretHF2S:
 3052     case Op_SubHF:
 3053     case Op_SqrtHF:
 3054       if (!VM_Version::supports_avx512_fp16()) {
 3055         return false;
 3056       }
 3057       break;
 3058     case Op_VectorLoadShuffle:
 3059     case Op_VectorRearrange:
 3060     case Op_MulReductionVI:
 3061       if (UseSSE < 4) { // requires at least SSE4
 3062         return false;
 3063       }
 3064       break;
 3065     case Op_IsInfiniteF:
 3066     case Op_IsInfiniteD:
 3067       if (!VM_Version::supports_avx512dq()) {
 3068         return false;
 3069       }
 3070       break;
 3071     case Op_SqrtVD:
 3072     case Op_SqrtVF:
 3073     case Op_VectorMaskCmp:
 3074     case Op_VectorCastB2X:
 3075     case Op_VectorCastS2X:
 3076     case Op_VectorCastI2X:
 3077     case Op_VectorCastL2X:
 3078     case Op_VectorCastF2X:
 3079     case Op_VectorCastD2X:
 3080     case Op_VectorUCastB2X:
 3081     case Op_VectorUCastS2X:
 3082     case Op_VectorUCastI2X:
 3083     case Op_VectorMaskCast:
 3084       if (UseAVX < 1) { // enabled for AVX only
 3085         return false;
 3086       }
 3087       break;
 3088     case Op_PopulateIndex:
 3089       if (UseAVX < 2) {
 3090         return false;
 3091       }
 3092       break;
 3093     case Op_RoundVF:
 3094       if (UseAVX < 2) { // enabled for AVX2 only
 3095         return false;
 3096       }
 3097       break;
 3098     case Op_RoundVD:
 3099       if (UseAVX < 3) {
 3100         return false;  // enabled for AVX3 only
 3101       }
 3102       break;
 3103     case Op_CompareAndSwapL:
 3104     case Op_CompareAndSwapP:
 3105       break;
 3106     case Op_StrIndexOf:
 3107       if (!UseSSE42Intrinsics) {
 3108         return false;
 3109       }
 3110       break;
 3111     case Op_StrIndexOfChar:
 3112       if (!UseSSE42Intrinsics) {
 3113         return false;
 3114       }
 3115       break;
 3116     case Op_OnSpinWait:
 3117       if (VM_Version::supports_on_spin_wait() == false) {
 3118         return false;
 3119       }
 3120       break;
 3121     case Op_MulVB:
 3122     case Op_LShiftVB:
 3123     case Op_RShiftVB:
 3124     case Op_URShiftVB:
 3125     case Op_VectorInsert:
 3126     case Op_VectorLoadMask:
 3127     case Op_VectorStoreMask:
 3128     case Op_VectorBlend:
 3129       if (UseSSE < 4) {
 3130         return false;
 3131       }
 3132       break;
 3133     case Op_MaxD:
 3134     case Op_MaxF:
 3135     case Op_MinD:
 3136     case Op_MinF:
 3137       if (UseAVX < 1) { // enabled for AVX only
 3138         return false;
 3139       }
 3140       break;
 3141     case Op_CacheWB:
 3142     case Op_CacheWBPreSync:
 3143     case Op_CacheWBPostSync:
 3144       if (!VM_Version::supports_data_cache_line_flush()) {
 3145         return false;
 3146       }
 3147       break;
 3148     case Op_ExtractB:
 3149     case Op_ExtractL:
 3150     case Op_ExtractI:
 3151     case Op_RoundDoubleMode:
 3152       if (UseSSE < 4) {
 3153         return false;
 3154       }
 3155       break;
 3156     case Op_RoundDoubleModeV:
 3157       if (VM_Version::supports_avx() == false) {
 3158         return false; // 128bit vroundpd is not available
 3159       }
 3160       break;
 3161     case Op_LoadVectorGather:
 3162     case Op_LoadVectorGatherMasked:
 3163       if (UseAVX < 2) {
 3164         return false;
 3165       }
 3166       break;
 3167     case Op_FmaF:
 3168     case Op_FmaD:
 3169     case Op_FmaVD:
 3170     case Op_FmaVF:
 3171       if (!UseFMA) {
 3172         return false;
 3173       }
 3174       break;
 3175     case Op_MacroLogicV:
 3176       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3177         return false;
 3178       }
 3179       break;
 3180 
 3181     case Op_VectorCmpMasked:
 3182     case Op_VectorMaskGen:
 3183       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3184         return false;
 3185       }
 3186       break;
 3187     case Op_VectorMaskFirstTrue:
 3188     case Op_VectorMaskLastTrue:
 3189     case Op_VectorMaskTrueCount:
 3190     case Op_VectorMaskToLong:
 3191       if (UseAVX < 1) {
 3192          return false;
 3193       }
 3194       break;
 3195     case Op_RoundF:
 3196     case Op_RoundD:
 3197       break;
 3198     case Op_CopySignD:
 3199     case Op_CopySignF:
 3200       if (UseAVX < 3)  {
 3201         return false;
 3202       }
 3203       if (!VM_Version::supports_avx512vl()) {
 3204         return false;
 3205       }
 3206       break;
 3207     case Op_CompressBits:
 3208     case Op_ExpandBits:
 3209       if (!VM_Version::supports_bmi2()) {
 3210         return false;
 3211       }
 3212       break;
 3213     case Op_CompressM:
 3214       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3215         return false;
 3216       }
 3217       break;
 3218     case Op_ConvF2HF:
 3219     case Op_ConvHF2F:
 3220       if (!VM_Version::supports_float16()) {
 3221         return false;
 3222       }
 3223       break;
 3224     case Op_VectorCastF2HF:
 3225     case Op_VectorCastHF2F:
 3226       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3227         return false;
 3228       }
 3229       break;
 3230   }
 3231   return true;  // Match rules are supported by default.
 3232 }
 3233 
 3234 //------------------------------------------------------------------------
 3235 
 3236 static inline bool is_pop_count_instr_target(BasicType bt) {
 3237   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3238          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3239 }
 3240 
 3241 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3242   return match_rule_supported_vector(opcode, vlen, bt);
 3243 }
 3244 
 3245 // Identify extra cases that we might want to provide match rules for vector nodes and
 3246 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3247 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3248   if (!match_rule_supported(opcode)) {
 3249     return false;
 3250   }
 3251   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3252   //   * SSE2 supports 128bit vectors for all types;
 3253   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3254   //   * AVX2 supports 256bit vectors for all types;
 3255   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3256   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3257   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3258   // And MaxVectorSize is taken into account as well.
 3259   if (!vector_size_supported(bt, vlen)) {
 3260     return false;
 3261   }
 3262   // Special cases which require vector length follow:
 3263   //   * implementation limitations
 3264   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3265   //   * 128bit vroundpd instruction is present only in AVX1
 3266   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3267   switch (opcode) {
 3268     case Op_MaxVHF:
 3269     case Op_MinVHF:
 3270       if (!VM_Version::supports_avx512bw()) {
 3271         return false;
 3272       }
 3273     case Op_AddVHF:
 3274     case Op_DivVHF:
 3275     case Op_FmaVHF:
 3276     case Op_MulVHF:
 3277     case Op_SubVHF:
 3278     case Op_SqrtVHF:
 3279       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3280         return false;
 3281       }
 3282       if (!VM_Version::supports_avx512_fp16()) {
 3283         return false;
 3284       }
 3285       break;
 3286     case Op_AbsVF:
 3287     case Op_NegVF:
 3288       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3289         return false; // 512bit vandps and vxorps are not available
 3290       }
 3291       break;
 3292     case Op_AbsVD:
 3293     case Op_NegVD:
 3294       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3295         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3296       }
 3297       break;
 3298     case Op_RotateRightV:
 3299     case Op_RotateLeftV:
 3300       if (bt != T_INT && bt != T_LONG) {
 3301         return false;
 3302       } // fallthrough
 3303     case Op_MacroLogicV:
 3304       if (!VM_Version::supports_evex() ||
 3305           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3306         return false;
 3307       }
 3308       break;
 3309     case Op_ClearArray:
 3310     case Op_VectorMaskGen:
 3311     case Op_VectorCmpMasked:
 3312       if (!VM_Version::supports_avx512bw()) {
 3313         return false;
 3314       }
 3315       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3316         return false;
 3317       }
 3318       break;
 3319     case Op_LoadVectorMasked:
 3320     case Op_StoreVectorMasked:
 3321       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3322         return false;
 3323       }
 3324       break;
 3325     case Op_UMinV:
 3326     case Op_UMaxV:
 3327       if (UseAVX == 0) {
 3328         return false;
 3329       }
 3330       break;
 3331     case Op_UMinReductionV:
 3332     case Op_UMaxReductionV:
 3333       if (UseAVX == 0) {
 3334         return false;
 3335       }
 3336       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3337         return false;
 3338       }
 3339       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3340         return false;
 3341       }
 3342       break;
 3343     case Op_MaxV:
 3344     case Op_MinV:
 3345       if (UseSSE < 4 && is_integral_type(bt)) {
 3346         return false;
 3347       }
 3348       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3349           // Float/Double intrinsics are enabled for AVX family currently.
 3350           if (UseAVX == 0) {
 3351             return false;
 3352           }
 3353           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3354             return false;
 3355           }
 3356       }
 3357       break;
 3358     case Op_CallLeafVector:
 3359       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3360         return false;
 3361       }
 3362       break;
 3363     case Op_AddReductionVI:
 3364       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3365         return false;
 3366       }
 3367       // fallthrough
 3368     case Op_AndReductionV:
 3369     case Op_OrReductionV:
 3370     case Op_XorReductionV:
 3371       if (is_subword_type(bt) && (UseSSE < 4)) {
 3372         return false;
 3373       }
 3374       break;
 3375     case Op_MinReductionV:
 3376     case Op_MaxReductionV:
 3377       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3378         return false;
 3379       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3380         return false;
 3381       }
 3382       // Float/Double intrinsics enabled for AVX family.
 3383       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3384         return false;
 3385       }
 3386       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3387         return false;
 3388       }
 3389       break;
 3390     case Op_VectorBlend:
 3391       if (UseAVX == 0 && size_in_bits < 128) {
 3392         return false;
 3393       }
 3394       break;
 3395     case Op_VectorTest:
 3396       if (UseSSE < 4) {
 3397         return false; // Implementation limitation
 3398       } else if (size_in_bits < 32) {
 3399         return false; // Implementation limitation
 3400       }
 3401       break;
 3402     case Op_VectorLoadShuffle:
 3403     case Op_VectorRearrange:
 3404       if(vlen == 2) {
 3405         return false; // Implementation limitation due to how shuffle is loaded
 3406       } else if (size_in_bits == 256 && UseAVX < 2) {
 3407         return false; // Implementation limitation
 3408       }
 3409       break;
 3410     case Op_VectorLoadMask:
 3411     case Op_VectorMaskCast:
 3412       if (size_in_bits == 256 && UseAVX < 2) {
 3413         return false; // Implementation limitation
 3414       }
 3415       // fallthrough
 3416     case Op_VectorStoreMask:
 3417       if (vlen == 2) {
 3418         return false; // Implementation limitation
 3419       }
 3420       break;
 3421     case Op_PopulateIndex:
 3422       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3423         return false;
 3424       }
 3425       break;
 3426     case Op_VectorCastB2X:
 3427     case Op_VectorCastS2X:
 3428     case Op_VectorCastI2X:
 3429       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3430         return false;
 3431       }
 3432       break;
 3433     case Op_VectorCastL2X:
 3434       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3435         return false;
 3436       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3437         return false;
 3438       }
 3439       break;
 3440     case Op_VectorCastF2X: {
 3441         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3442         // happen after intermediate conversion to integer and special handling
 3443         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3444         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3445         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3446           return false;
 3447         }
 3448       }
 3449       // fallthrough
 3450     case Op_VectorCastD2X:
 3451       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3452         return false;
 3453       }
 3454       break;
 3455     case Op_VectorCastF2HF:
 3456     case Op_VectorCastHF2F:
 3457       if (!VM_Version::supports_f16c() &&
 3458          ((!VM_Version::supports_evex() ||
 3459          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3460         return false;
 3461       }
 3462       break;
 3463     case Op_RoundVD:
 3464       if (!VM_Version::supports_avx512dq()) {
 3465         return false;
 3466       }
 3467       break;
 3468     case Op_MulReductionVI:
 3469       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3470         return false;
 3471       }
 3472       break;
 3473     case Op_LoadVectorGatherMasked:
 3474       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3475         return false;
 3476       }
 3477       if (is_subword_type(bt) &&
 3478          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3479           (size_in_bits < 64)                                      ||
 3480           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3481         return false;
 3482       }
 3483       break;
 3484     case Op_StoreVectorScatterMasked:
 3485     case Op_StoreVectorScatter:
 3486       if (is_subword_type(bt)) {
 3487         return false;
 3488       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3489         return false;
 3490       }
 3491       // fallthrough
 3492     case Op_LoadVectorGather:
 3493       if (!is_subword_type(bt) && size_in_bits == 64) {
 3494         return false;
 3495       }
 3496       if (is_subword_type(bt) && size_in_bits < 64) {
 3497         return false;
 3498       }
 3499       break;
 3500     case Op_SaturatingAddV:
 3501     case Op_SaturatingSubV:
 3502       if (UseAVX < 1) {
 3503         return false; // Implementation limitation
 3504       }
 3505       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3506         return false;
 3507       }
 3508       break;
 3509     case Op_SelectFromTwoVector:
 3510        if (size_in_bits < 128) {
 3511          return false;
 3512        }
 3513        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3514          return false;
 3515        }
 3516        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3517          return false;
 3518        }
 3519        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3520          return false;
 3521        }
 3522        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3523          return false;
 3524        }
 3525        break;
 3526     case Op_MaskAll:
 3527       if (!VM_Version::supports_evex()) {
 3528         return false;
 3529       }
 3530       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3531         return false;
 3532       }
 3533       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3534         return false;
 3535       }
 3536       break;
 3537     case Op_VectorMaskCmp:
 3538       if (vlen < 2 || size_in_bits < 32) {
 3539         return false;
 3540       }
 3541       break;
 3542     case Op_CompressM:
 3543       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3544         return false;
 3545       }
 3546       break;
 3547     case Op_CompressV:
 3548     case Op_ExpandV:
 3549       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3550         return false;
 3551       }
 3552       if (size_in_bits < 128 ) {
 3553         return false;
 3554       }
 3555     case Op_VectorLongToMask:
 3556       if (UseAVX < 1) {
 3557         return false;
 3558       }
 3559       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3560         return false;
 3561       }
 3562       break;
 3563     case Op_SignumVD:
 3564     case Op_SignumVF:
 3565       if (UseAVX < 1) {
 3566         return false;
 3567       }
 3568       break;
 3569     case Op_PopCountVI:
 3570     case Op_PopCountVL: {
 3571         if (!is_pop_count_instr_target(bt) &&
 3572             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3573           return false;
 3574         }
 3575       }
 3576       break;
 3577     case Op_ReverseV:
 3578     case Op_ReverseBytesV:
 3579       if (UseAVX < 2) {
 3580         return false;
 3581       }
 3582       break;
 3583     case Op_CountTrailingZerosV:
 3584     case Op_CountLeadingZerosV:
 3585       if (UseAVX < 2) {
 3586         return false;
 3587       }
 3588       break;
 3589   }
 3590   return true;  // Per default match rules are supported.
 3591 }
 3592 
 3593 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3594   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3595   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3596   // of their non-masked counterpart with mask edge being the differentiator.
 3597   // This routine does a strict check on the existence of masked operation patterns
 3598   // by returning a default false value for all the other opcodes apart from the
 3599   // ones whose masked instruction patterns are defined in this file.
 3600   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3601     return false;
 3602   }
 3603 
 3604   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3605   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3606     return false;
 3607   }
 3608   switch(opcode) {
 3609     // Unary masked operations
 3610     case Op_AbsVB:
 3611     case Op_AbsVS:
 3612       if(!VM_Version::supports_avx512bw()) {
 3613         return false;  // Implementation limitation
 3614       }
 3615     case Op_AbsVI:
 3616     case Op_AbsVL:
 3617       return true;
 3618 
 3619     // Ternary masked operations
 3620     case Op_FmaVF:
 3621     case Op_FmaVD:
 3622       return true;
 3623 
 3624     case Op_MacroLogicV:
 3625       if(bt != T_INT && bt != T_LONG) {
 3626         return false;
 3627       }
 3628       return true;
 3629 
 3630     // Binary masked operations
 3631     case Op_AddVB:
 3632     case Op_AddVS:
 3633     case Op_SubVB:
 3634     case Op_SubVS:
 3635     case Op_MulVS:
 3636     case Op_LShiftVS:
 3637     case Op_RShiftVS:
 3638     case Op_URShiftVS:
 3639       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3640       if (!VM_Version::supports_avx512bw()) {
 3641         return false;  // Implementation limitation
 3642       }
 3643       return true;
 3644 
 3645     case Op_MulVL:
 3646       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3647       if (!VM_Version::supports_avx512dq()) {
 3648         return false;  // Implementation limitation
 3649       }
 3650       return true;
 3651 
 3652     case Op_AndV:
 3653     case Op_OrV:
 3654     case Op_XorV:
 3655     case Op_RotateRightV:
 3656     case Op_RotateLeftV:
 3657       if (bt != T_INT && bt != T_LONG) {
 3658         return false; // Implementation limitation
 3659       }
 3660       return true;
 3661 
 3662     case Op_VectorLoadMask:
 3663       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3664       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3665         return false;
 3666       }
 3667       return true;
 3668 
 3669     case Op_AddVI:
 3670     case Op_AddVL:
 3671     case Op_AddVF:
 3672     case Op_AddVD:
 3673     case Op_SubVI:
 3674     case Op_SubVL:
 3675     case Op_SubVF:
 3676     case Op_SubVD:
 3677     case Op_MulVI:
 3678     case Op_MulVF:
 3679     case Op_MulVD:
 3680     case Op_DivVF:
 3681     case Op_DivVD:
 3682     case Op_SqrtVF:
 3683     case Op_SqrtVD:
 3684     case Op_LShiftVI:
 3685     case Op_LShiftVL:
 3686     case Op_RShiftVI:
 3687     case Op_RShiftVL:
 3688     case Op_URShiftVI:
 3689     case Op_URShiftVL:
 3690     case Op_LoadVectorMasked:
 3691     case Op_StoreVectorMasked:
 3692     case Op_LoadVectorGatherMasked:
 3693     case Op_StoreVectorScatterMasked:
 3694       return true;
 3695 
 3696     case Op_UMinV:
 3697     case Op_UMaxV:
 3698       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3699         return false;
 3700       } // fallthrough
 3701     case Op_MaxV:
 3702     case Op_MinV:
 3703       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3704         return false; // Implementation limitation
 3705       }
 3706       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3707         return false; // Implementation limitation
 3708       }
 3709       return true;
 3710     case Op_SaturatingAddV:
 3711     case Op_SaturatingSubV:
 3712       if (!is_subword_type(bt)) {
 3713         return false;
 3714       }
 3715       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3716         return false; // Implementation limitation
 3717       }
 3718       return true;
 3719 
 3720     case Op_VectorMaskCmp:
 3721       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3722         return false; // Implementation limitation
 3723       }
 3724       return true;
 3725 
 3726     case Op_VectorRearrange:
 3727       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3728         return false; // Implementation limitation
 3729       }
 3730       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3731         return false; // Implementation limitation
 3732       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3733         return false; // Implementation limitation
 3734       }
 3735       return true;
 3736 
 3737     // Binary Logical operations
 3738     case Op_AndVMask:
 3739     case Op_OrVMask:
 3740     case Op_XorVMask:
 3741       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3742         return false; // Implementation limitation
 3743       }
 3744       return true;
 3745 
 3746     case Op_PopCountVI:
 3747     case Op_PopCountVL:
 3748       if (!is_pop_count_instr_target(bt)) {
 3749         return false;
 3750       }
 3751       return true;
 3752 
 3753     case Op_MaskAll:
 3754       return true;
 3755 
 3756     case Op_CountLeadingZerosV:
 3757       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3758         return true;
 3759       }
 3760     default:
 3761       return false;
 3762   }
 3763 }
 3764 
 3765 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3766   return false;
 3767 }
 3768 
 3769 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3770 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3771   switch (elem_bt) {
 3772     case T_BYTE:  return false;
 3773     case T_SHORT: return !VM_Version::supports_avx512bw();
 3774     case T_INT:   return !VM_Version::supports_avx();
 3775     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3776     default:
 3777       ShouldNotReachHere();
 3778       return false;
 3779   }
 3780 }
 3781 
 3782 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3783   // Prefer predicate if the mask type is "TypePVectMask".
 3784   return vt->isa_pvectmask() != nullptr;
 3785 }
 3786 
 3787 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3788   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3789   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3790   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3791       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3792     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3793     return new legVecZOper();
 3794   }
 3795   if (legacy) {
 3796     switch (ideal_reg) {
 3797       case Op_VecS: return new legVecSOper();
 3798       case Op_VecD: return new legVecDOper();
 3799       case Op_VecX: return new legVecXOper();
 3800       case Op_VecY: return new legVecYOper();
 3801       case Op_VecZ: return new legVecZOper();
 3802     }
 3803   } else {
 3804     switch (ideal_reg) {
 3805       case Op_VecS: return new vecSOper();
 3806       case Op_VecD: return new vecDOper();
 3807       case Op_VecX: return new vecXOper();
 3808       case Op_VecY: return new vecYOper();
 3809       case Op_VecZ: return new vecZOper();
 3810     }
 3811   }
 3812   ShouldNotReachHere();
 3813   return nullptr;
 3814 }
 3815 
 3816 bool Matcher::is_reg2reg_move(MachNode* m) {
 3817   switch (m->rule()) {
 3818     case MoveVec2Leg_rule:
 3819     case MoveLeg2Vec_rule:
 3820     case MoveF2VL_rule:
 3821     case MoveF2LEG_rule:
 3822     case MoveVL2F_rule:
 3823     case MoveLEG2F_rule:
 3824     case MoveD2VL_rule:
 3825     case MoveD2LEG_rule:
 3826     case MoveVL2D_rule:
 3827     case MoveLEG2D_rule:
 3828       return true;
 3829     default:
 3830       return false;
 3831   }
 3832 }
 3833 
 3834 bool Matcher::is_generic_vector(MachOper* opnd) {
 3835   switch (opnd->opcode()) {
 3836     case VEC:
 3837     case LEGVEC:
 3838       return true;
 3839     default:
 3840       return false;
 3841   }
 3842 }
 3843 
 3844 //------------------------------------------------------------------------
 3845 
 3846 const RegMask* Matcher::predicate_reg_mask(void) {
 3847   return &_VECTMASK_REG_mask;
 3848 }
 3849 
 3850 // Max vector size in bytes. 0 if not supported.
 3851 int Matcher::vector_width_in_bytes(BasicType bt) {
 3852   assert(is_java_primitive(bt), "only primitive type vectors");
 3853   // SSE2 supports 128bit vectors for all types.
 3854   // AVX2 supports 256bit vectors for all types.
 3855   // AVX2/EVEX supports 512bit vectors for all types.
 3856   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3857   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3858   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3859     size = (UseAVX > 2) ? 64 : 32;
 3860   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3861     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3862   // Use flag to limit vector size.
 3863   size = MIN2(size,(int)MaxVectorSize);
 3864   // Minimum 2 values in vector (or 4 for bytes).
 3865   switch (bt) {
 3866   case T_DOUBLE:
 3867   case T_LONG:
 3868     if (size < 16) return 0;
 3869     break;
 3870   case T_FLOAT:
 3871   case T_INT:
 3872     if (size < 8) return 0;
 3873     break;
 3874   case T_BOOLEAN:
 3875     if (size < 4) return 0;
 3876     break;
 3877   case T_CHAR:
 3878     if (size < 4) return 0;
 3879     break;
 3880   case T_BYTE:
 3881     if (size < 4) return 0;
 3882     break;
 3883   case T_SHORT:
 3884     if (size < 4) return 0;
 3885     break;
 3886   default:
 3887     ShouldNotReachHere();
 3888   }
 3889   return size;
 3890 }
 3891 
 3892 // Limits on vector size (number of elements) loaded into vector.
 3893 int Matcher::max_vector_size(const BasicType bt) {
 3894   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3895 }
 3896 int Matcher::min_vector_size(const BasicType bt) {
 3897   int max_size = max_vector_size(bt);
 3898   // Min size which can be loaded into vector is 4 bytes.
 3899   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3900   // Support for calling svml double64 vectors
 3901   if (bt == T_DOUBLE) {
 3902     size = 1;
 3903   }
 3904   return MIN2(size,max_size);
 3905 }
 3906 
 3907 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3908   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3909   // by default on Cascade Lake
 3910   if (VM_Version::is_default_intel_cascade_lake()) {
 3911     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3912   }
 3913   return Matcher::max_vector_size(bt);
 3914 }
 3915 
 3916 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3917   return -1;
 3918 }
 3919 
 3920 // Vector ideal reg corresponding to specified size in bytes
 3921 uint Matcher::vector_ideal_reg(int size) {
 3922   assert(MaxVectorSize >= size, "");
 3923   switch(size) {
 3924     case  4: return Op_VecS;
 3925     case  8: return Op_VecD;
 3926     case 16: return Op_VecX;
 3927     case 32: return Op_VecY;
 3928     case 64: return Op_VecZ;
 3929   }
 3930   ShouldNotReachHere();
 3931   return 0;
 3932 }
 3933 
 3934 // Check for shift by small constant as well
 3935 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3936   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3937       shift->in(2)->get_int() <= 3 &&
 3938       // Are there other uses besides address expressions?
 3939       !matcher->is_visited(shift)) {
 3940     address_visited.set(shift->_idx); // Flag as address_visited
 3941     mstack.push(shift->in(2), Matcher::Visit);
 3942     Node *conv = shift->in(1);
 3943     // Allow Matcher to match the rule which bypass
 3944     // ConvI2L operation for an array index on LP64
 3945     // if the index value is positive.
 3946     if (conv->Opcode() == Op_ConvI2L &&
 3947         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3948         // Are there other uses besides address expressions?
 3949         !matcher->is_visited(conv)) {
 3950       address_visited.set(conv->_idx); // Flag as address_visited
 3951       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3952     } else {
 3953       mstack.push(conv, Matcher::Pre_Visit);
 3954     }
 3955     return true;
 3956   }
 3957   return false;
 3958 }
 3959 
 3960 // This function identifies sub-graphs in which a 'load' node is
 3961 // input to two different nodes, and such that it can be matched
 3962 // with BMI instructions like blsi, blsr, etc.
 3963 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3964 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3965 // refers to the same node.
 3966 //
 3967 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3968 // This is a temporary solution until we make DAGs expressible in ADL.
 3969 template<typename ConType>
 3970 class FusedPatternMatcher {
 3971   Node* _op1_node;
 3972   Node* _mop_node;
 3973   int _con_op;
 3974 
 3975   static int match_next(Node* n, int next_op, int next_op_idx) {
 3976     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3977       return -1;
 3978     }
 3979 
 3980     if (next_op_idx == -1) { // n is commutative, try rotations
 3981       if (n->in(1)->Opcode() == next_op) {
 3982         return 1;
 3983       } else if (n->in(2)->Opcode() == next_op) {
 3984         return 2;
 3985       }
 3986     } else {
 3987       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3988       if (n->in(next_op_idx)->Opcode() == next_op) {
 3989         return next_op_idx;
 3990       }
 3991     }
 3992     return -1;
 3993   }
 3994 
 3995  public:
 3996   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 3997     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 3998 
 3999   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4000              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4001              typename ConType::NativeType con_value) {
 4002     if (_op1_node->Opcode() != op1) {
 4003       return false;
 4004     }
 4005     if (_mop_node->outcnt() > 2) {
 4006       return false;
 4007     }
 4008     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4009     if (op1_op2_idx == -1) {
 4010       return false;
 4011     }
 4012     // Memory operation must be the other edge
 4013     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4014 
 4015     // Check that the mop node is really what we want
 4016     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4017       Node* op2_node = _op1_node->in(op1_op2_idx);
 4018       if (op2_node->outcnt() > 1) {
 4019         return false;
 4020       }
 4021       assert(op2_node->Opcode() == op2, "Should be");
 4022       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4023       if (op2_con_idx == -1) {
 4024         return false;
 4025       }
 4026       // Memory operation must be the other edge
 4027       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4028       // Check that the memory operation is the same node
 4029       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4030         // Now check the constant
 4031         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4032         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4033           return true;
 4034         }
 4035       }
 4036     }
 4037     return false;
 4038   }
 4039 };
 4040 
 4041 static bool is_bmi_pattern(Node* n, Node* m) {
 4042   assert(VM_Version::supports_bmi1() && VM_Version::supports_avx(), "sanity");
 4043   if (n != nullptr && m != nullptr) {
 4044     if (m->Opcode() == Op_LoadI) {
 4045       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4046       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4047              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4048              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4049     } else if (m->Opcode() == Op_LoadL) {
 4050       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4051       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4052              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4053              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4054     }
 4055   }
 4056   return false;
 4057 }
 4058 
 4059 // Should the matcher clone input 'm' of node 'n'?
 4060 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4061   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4062   if (VM_Version::supports_bmi1() && VM_Version::supports_avx() && is_bmi_pattern(n, m)) {
 4063     mstack.push(m, Visit);
 4064     return true;
 4065   }
 4066   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4067     mstack.push(m, Visit);           // m = ShiftCntV
 4068     return true;
 4069   }
 4070   if (is_encode_and_store_pattern(n, m)) {
 4071     mstack.push(m, Visit);
 4072     return true;
 4073   }
 4074   return false;
 4075 }
 4076 
 4077 // Should the Matcher clone shifts on addressing modes, expecting them
 4078 // to be subsumed into complex addressing expressions or compute them
 4079 // into registers?
 4080 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4081   Node *off = m->in(AddPNode::Offset);
 4082   if (off->is_Con()) {
 4083     address_visited.test_set(m->_idx); // Flag as address_visited
 4084     Node *adr = m->in(AddPNode::Address);
 4085 
 4086     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4087     // AtomicAdd is not an addressing expression.
 4088     // Cheap to find it by looking for screwy base.
 4089     if (adr->is_AddP() &&
 4090         !adr->in(AddPNode::Base)->is_top() &&
 4091         !adr->in(AddPNode::Offset)->is_Con() &&
 4092         off->get_long() == (int) (off->get_long()) && // immL32
 4093         // Are there other uses besides address expressions?
 4094         !is_visited(adr)) {
 4095       address_visited.set(adr->_idx); // Flag as address_visited
 4096       Node *shift = adr->in(AddPNode::Offset);
 4097       if (!clone_shift(shift, this, mstack, address_visited)) {
 4098         mstack.push(shift, Pre_Visit);
 4099       }
 4100       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4101       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4102     } else {
 4103       mstack.push(adr, Pre_Visit);
 4104     }
 4105 
 4106     // Clone X+offset as it also folds into most addressing expressions
 4107     mstack.push(off, Visit);
 4108     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4109     return true;
 4110   } else if (clone_shift(off, this, mstack, address_visited)) {
 4111     address_visited.test_set(m->_idx); // Flag as address_visited
 4112     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4113     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4114     return true;
 4115   }
 4116   return false;
 4117 }
 4118 
 4119 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4120   switch (bt) {
 4121     case BoolTest::eq:
 4122       return Assembler::eq;
 4123     case BoolTest::ne:
 4124       return Assembler::neq;
 4125     case BoolTest::le:
 4126     case BoolTest::ule:
 4127       return Assembler::le;
 4128     case BoolTest::ge:
 4129     case BoolTest::uge:
 4130       return Assembler::nlt;
 4131     case BoolTest::lt:
 4132     case BoolTest::ult:
 4133       return Assembler::lt;
 4134     case BoolTest::gt:
 4135     case BoolTest::ugt:
 4136       return Assembler::nle;
 4137     default : ShouldNotReachHere(); return Assembler::_false;
 4138   }
 4139 }
 4140 
 4141 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4142   switch (bt) {
 4143   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4144   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4145   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4146   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4147   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4148   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4149   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4150   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4151   }
 4152 }
 4153 
 4154 // Helper methods for MachSpillCopyNode::implementation().
 4155 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4156                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4157   assert(ireg == Op_VecS || // 32bit vector
 4158          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4159           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4160          "no non-adjacent vector moves" );
 4161   if (masm) {
 4162     switch (ireg) {
 4163     case Op_VecS: // copy whole register
 4164     case Op_VecD:
 4165     case Op_VecX:
 4166       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4167         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4168       } else {
 4169         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4170      }
 4171       break;
 4172     case Op_VecY:
 4173       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4174         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4175       } else {
 4176         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4177      }
 4178       break;
 4179     case Op_VecZ:
 4180       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4181       break;
 4182     default:
 4183       ShouldNotReachHere();
 4184     }
 4185 #ifndef PRODUCT
 4186   } else {
 4187     switch (ireg) {
 4188     case Op_VecS:
 4189     case Op_VecD:
 4190     case Op_VecX:
 4191       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4192       break;
 4193     case Op_VecY:
 4194     case Op_VecZ:
 4195       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4196       break;
 4197     default:
 4198       ShouldNotReachHere();
 4199     }
 4200 #endif
 4201   }
 4202 }
 4203 
 4204 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4205                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4206   if (masm) {
 4207     if (is_load) {
 4208       switch (ireg) {
 4209       case Op_VecS:
 4210         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4211         break;
 4212       case Op_VecD:
 4213         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4214         break;
 4215       case Op_VecX:
 4216         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4217           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4218         } else {
 4219           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4220           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4221         }
 4222         break;
 4223       case Op_VecY:
 4224         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4225           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4226         } else {
 4227           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4228           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4229         }
 4230         break;
 4231       case Op_VecZ:
 4232         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4233         break;
 4234       default:
 4235         ShouldNotReachHere();
 4236       }
 4237     } else { // store
 4238       switch (ireg) {
 4239       case Op_VecS:
 4240         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4241         break;
 4242       case Op_VecD:
 4243         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4244         break;
 4245       case Op_VecX:
 4246         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4247           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4248         }
 4249         else {
 4250           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4251         }
 4252         break;
 4253       case Op_VecY:
 4254         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4255           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4256         }
 4257         else {
 4258           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4259         }
 4260         break;
 4261       case Op_VecZ:
 4262         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4263         break;
 4264       default:
 4265         ShouldNotReachHere();
 4266       }
 4267     }
 4268 #ifndef PRODUCT
 4269   } else {
 4270     if (is_load) {
 4271       switch (ireg) {
 4272       case Op_VecS:
 4273         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4274         break;
 4275       case Op_VecD:
 4276         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4277         break;
 4278        case Op_VecX:
 4279         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4280         break;
 4281       case Op_VecY:
 4282       case Op_VecZ:
 4283         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4284         break;
 4285       default:
 4286         ShouldNotReachHere();
 4287       }
 4288     } else { // store
 4289       switch (ireg) {
 4290       case Op_VecS:
 4291         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4292         break;
 4293       case Op_VecD:
 4294         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4295         break;
 4296        case Op_VecX:
 4297         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4298         break;
 4299       case Op_VecY:
 4300       case Op_VecZ:
 4301         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4302         break;
 4303       default:
 4304         ShouldNotReachHere();
 4305       }
 4306     }
 4307 #endif
 4308   }
 4309 }
 4310 
 4311 template <class T>
 4312 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4313   int size = type2aelembytes(bt) * len;
 4314   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4315   for (int i = 0; i < len; i++) {
 4316     int offset = i * type2aelembytes(bt);
 4317     switch (bt) {
 4318       case T_BYTE: val->at(i) = con; break;
 4319       case T_SHORT: {
 4320         jshort c = con;
 4321         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4322         break;
 4323       }
 4324       case T_INT: {
 4325         jint c = con;
 4326         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4327         break;
 4328       }
 4329       case T_LONG: {
 4330         jlong c = con;
 4331         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4332         break;
 4333       }
 4334       case T_FLOAT: {
 4335         jfloat c = con;
 4336         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4337         break;
 4338       }
 4339       case T_DOUBLE: {
 4340         jdouble c = con;
 4341         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4342         break;
 4343       }
 4344       default: assert(false, "%s", type2name(bt));
 4345     }
 4346   }
 4347   return val;
 4348 }
 4349 
 4350 static inline jlong high_bit_set(BasicType bt) {
 4351   switch (bt) {
 4352     case T_BYTE:  return 0x8080808080808080;
 4353     case T_SHORT: return 0x8000800080008000;
 4354     case T_INT:   return 0x8000000080000000;
 4355     case T_LONG:  return 0x8000000000000000;
 4356     default:
 4357       ShouldNotReachHere();
 4358       return 0;
 4359   }
 4360 }
 4361 
 4362 #ifndef PRODUCT
 4363   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4364     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4365   }
 4366 #endif
 4367 
 4368   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4369     __ nop(_count);
 4370   }
 4371 
 4372   uint MachNopNode::size(PhaseRegAlloc*) const {
 4373     return _count;
 4374   }
 4375 
 4376 #ifndef PRODUCT
 4377   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4378     st->print("# breakpoint");
 4379   }
 4380 #endif
 4381 
 4382   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4383     __ int3();
 4384   }
 4385 
 4386   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4387     return MachNode::size(ra_);
 4388   }
 4389 
 4390 %}
 4391 
 4392 //----------ENCODING BLOCK-----------------------------------------------------
 4393 // This block specifies the encoding classes used by the compiler to
 4394 // output byte streams.  Encoding classes are parameterized macros
 4395 // used by Machine Instruction Nodes in order to generate the bit
 4396 // encoding of the instruction.  Operands specify their base encoding
 4397 // interface with the interface keyword.  There are currently
 4398 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4399 // COND_INTER.  REG_INTER causes an operand to generate a function
 4400 // which returns its register number when queried.  CONST_INTER causes
 4401 // an operand to generate a function which returns the value of the
 4402 // constant when queried.  MEMORY_INTER causes an operand to generate
 4403 // four functions which return the Base Register, the Index Register,
 4404 // the Scale Value, and the Offset Value of the operand when queried.
 4405 // COND_INTER causes an operand to generate six functions which return
 4406 // the encoding code (ie - encoding bits for the instruction)
 4407 // associated with each basic boolean condition for a conditional
 4408 // instruction.
 4409 //
 4410 // Instructions specify two basic values for encoding.  Again, a
 4411 // function is available to check if the constant displacement is an
 4412 // oop. They use the ins_encode keyword to specify their encoding
 4413 // classes (which must be a sequence of enc_class names, and their
 4414 // parameters, specified in the encoding block), and they use the
 4415 // opcode keyword to specify, in order, their primary, secondary, and
 4416 // tertiary opcode.  Only the opcode sections which a particular
 4417 // instruction needs for encoding need to be specified.
 4418 encode %{
 4419   enc_class cdql_enc(no_rax_rdx_RegI div)
 4420   %{
 4421     // Full implementation of Java idiv and irem; checks for
 4422     // special case as described in JVM spec., p.243 & p.271.
 4423     //
 4424     //         normal case                           special case
 4425     //
 4426     // input : rax: dividend                         min_int
 4427     //         reg: divisor                          -1
 4428     //
 4429     // output: rax: quotient  (= rax idiv reg)       min_int
 4430     //         rdx: remainder (= rax irem reg)       0
 4431     //
 4432     //  Code sequnce:
 4433     //
 4434     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4435     //    5:   75 07/08                jne    e <normal>
 4436     //    7:   33 d2                   xor    %edx,%edx
 4437     //  [div >= 8 -> offset + 1]
 4438     //  [REX_B]
 4439     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4440     //    c:   74 03/04                je     11 <done>
 4441     // 000000000000000e <normal>:
 4442     //    e:   99                      cltd
 4443     //  [div >= 8 -> offset + 1]
 4444     //  [REX_B]
 4445     //    f:   f7 f9                   idiv   $div
 4446     // 0000000000000011 <done>:
 4447     Label normal;
 4448     Label done;
 4449 
 4450     // cmp    $0x80000000,%eax
 4451     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4452 
 4453     // jne    e <normal>
 4454     __ jccb(Assembler::notEqual, normal);
 4455 
 4456     // xor    %edx,%edx
 4457     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4458 
 4459     // cmp    $0xffffffffffffffff,%ecx
 4460     __ cmpl($div$$Register, -1);
 4461 
 4462     // je     11 <done>
 4463     __ jccb(Assembler::equal, done);
 4464 
 4465     // <normal>
 4466     // cltd
 4467     __ bind(normal);
 4468     __ cdql();
 4469 
 4470     // idivl
 4471     // <done>
 4472     __ idivl($div$$Register);
 4473     __ bind(done);
 4474   %}
 4475 
 4476   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4477   %{
 4478     // Full implementation of Java ldiv and lrem; checks for
 4479     // special case as described in JVM spec., p.243 & p.271.
 4480     //
 4481     //         normal case                           special case
 4482     //
 4483     // input : rax: dividend                         min_long
 4484     //         reg: divisor                          -1
 4485     //
 4486     // output: rax: quotient  (= rax idiv reg)       min_long
 4487     //         rdx: remainder (= rax irem reg)       0
 4488     //
 4489     //  Code sequnce:
 4490     //
 4491     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4492     //    7:   00 00 80
 4493     //    a:   48 39 d0                cmp    %rdx,%rax
 4494     //    d:   75 08                   jne    17 <normal>
 4495     //    f:   33 d2                   xor    %edx,%edx
 4496     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4497     //   15:   74 05                   je     1c <done>
 4498     // 0000000000000017 <normal>:
 4499     //   17:   48 99                   cqto
 4500     //   19:   48 f7 f9                idiv   $div
 4501     // 000000000000001c <done>:
 4502     Label normal;
 4503     Label done;
 4504 
 4505     // mov    $0x8000000000000000,%rdx
 4506     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4507 
 4508     // cmp    %rdx,%rax
 4509     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4510 
 4511     // jne    17 <normal>
 4512     __ jccb(Assembler::notEqual, normal);
 4513 
 4514     // xor    %edx,%edx
 4515     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4516 
 4517     // cmp    $0xffffffffffffffff,$div
 4518     __ cmpq($div$$Register, -1);
 4519 
 4520     // je     1e <done>
 4521     __ jccb(Assembler::equal, done);
 4522 
 4523     // <normal>
 4524     // cqto
 4525     __ bind(normal);
 4526     __ cdqq();
 4527 
 4528     // idivq (note: must be emitted by the user of this rule)
 4529     // <done>
 4530     __ idivq($div$$Register);
 4531     __ bind(done);
 4532   %}
 4533 
 4534   enc_class clear_avx %{
 4535     DEBUG_ONLY(int off0 = __ offset());
 4536     if (generate_vzeroupper(Compile::current())) {
 4537       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4538       // Clear upper bits of YMM registers when current compiled code uses
 4539       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4540       __ vzeroupper();
 4541     }
 4542     DEBUG_ONLY(int off1 = __ offset());
 4543     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4544   %}
 4545 
 4546   enc_class Java_To_Runtime(method meth) %{
 4547     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4548     __ call(r10);
 4549     __ post_call_nop();
 4550   %}
 4551 
 4552   enc_class Java_Static_Call(method meth)
 4553   %{
 4554     // JAVA STATIC CALL
 4555     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4556     // determine who we intended to call.
 4557     if (!_method) {
 4558       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4559     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4560       // The NOP here is purely to ensure that eliding a call to
 4561       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4562       __ nop(5);
 4563       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4564     } else {
 4565       int method_index = resolved_method_index(masm);
 4566       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4567                                                   : static_call_Relocation::spec(method_index);
 4568       address mark = __ pc();
 4569       int call_offset = __ offset();
 4570       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4571       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4572         // Calls of the same statically bound method can share
 4573         // a stub to the interpreter.
 4574         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4575       } else {
 4576         // Emit stubs for static call.
 4577         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4578         __ clear_inst_mark();
 4579         if (stub == nullptr) {
 4580           ciEnv::current()->record_failure("CodeCache is full");
 4581           return;
 4582         }
 4583       }
 4584     }
 4585     __ post_call_nop();
 4586   %}
 4587 
 4588   enc_class Java_Dynamic_Call(method meth) %{
 4589     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4590     __ post_call_nop();
 4591   %}
 4592 
 4593   enc_class call_epilog %{
 4594     if (VerifyStackAtCalls) {
 4595       // Check that stack depth is unchanged: find majik cookie on stack
 4596       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4597       Label L;
 4598       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4599       __ jccb(Assembler::equal, L);
 4600       // Die if stack mismatch
 4601       __ int3();
 4602       __ bind(L);
 4603     }
 4604   %}
 4605 
 4606 %}
 4607 
 4608 //----------FRAME--------------------------------------------------------------
 4609 // Definition of frame structure and management information.
 4610 //
 4611 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4612 //                             |   (to get allocators register number
 4613 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4614 //  r   CALLER     |        |
 4615 //  o     |        +--------+      pad to even-align allocators stack-slot
 4616 //  w     V        |  pad0  |        numbers; owned by CALLER
 4617 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4618 //  h     ^        |   in   |  5
 4619 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4620 //  |     |        |        |  3
 4621 //  |     |        +--------+
 4622 //  V     |        | old out|      Empty on Intel, window on Sparc
 4623 //        |    old |preserve|      Must be even aligned.
 4624 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4625 //        |        |   in   |  3   area for Intel ret address
 4626 //     Owned by    |preserve|      Empty on Sparc.
 4627 //       SELF      +--------+
 4628 //        |        |  pad2  |  2   pad to align old SP
 4629 //        |        +--------+  1
 4630 //        |        | locks  |  0
 4631 //        |        +--------+----> OptoReg::stack0(), even aligned
 4632 //        |        |  pad1  | 11   pad to align new SP
 4633 //        |        +--------+
 4634 //        |        |        | 10
 4635 //        |        | spills |  9   spills
 4636 //        V        |        |  8   (pad0 slot for callee)
 4637 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4638 //        ^        |  out   |  7
 4639 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4640 //     Owned by    +--------+
 4641 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4642 //        |    new |preserve|      Must be even-aligned.
 4643 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4644 //        |        |        |
 4645 //
 4646 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4647 //         known from SELF's arguments and the Java calling convention.
 4648 //         Region 6-7 is determined per call site.
 4649 // Note 2: If the calling convention leaves holes in the incoming argument
 4650 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4651 //         are owned by the CALLEE.  Holes should not be necessary in the
 4652 //         incoming area, as the Java calling convention is completely under
 4653 //         the control of the AD file.  Doubles can be sorted and packed to
 4654 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4655 //         varargs C calling conventions.
 4656 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4657 //         even aligned with pad0 as needed.
 4658 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4659 //         region 6-11 is even aligned; it may be padded out more so that
 4660 //         the region from SP to FP meets the minimum stack alignment.
 4661 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4662 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4663 //         SP meets the minimum alignment.
 4664 
 4665 frame
 4666 %{
 4667   // These three registers define part of the calling convention
 4668   // between compiled code and the interpreter.
 4669   inline_cache_reg(RAX);                // Inline Cache Register
 4670 
 4671   // Optional: name the operand used by cisc-spilling to access
 4672   // [stack_pointer + offset]
 4673   cisc_spilling_operand_name(indOffset32);
 4674 
 4675   // Number of stack slots consumed by locking an object
 4676   sync_stack_slots(2);
 4677 
 4678   // Compiled code's Frame Pointer
 4679   frame_pointer(RSP);
 4680 
 4681   // Stack alignment requirement
 4682   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4683 
 4684   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4685   // for calls to C.  Supports the var-args backing area for register parms.
 4686   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4687 
 4688   // The after-PROLOG location of the return address.  Location of
 4689   // return address specifies a type (REG or STACK) and a number
 4690   // representing the register number (i.e. - use a register name) or
 4691   // stack slot.
 4692   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4693   // Otherwise, it is above the locks and verification slot and alignment word
 4694   return_addr(STACK - 2 +
 4695               align_up((Compile::current()->in_preserve_stack_slots() +
 4696                         Compile::current()->fixed_slots()),
 4697                        stack_alignment_in_slots()));
 4698 
 4699   // Location of compiled Java return values.  Same as C for now.
 4700   return_value
 4701   %{
 4702     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4703            "only return normal values");
 4704 
 4705     static const int lo[Op_RegL + 1] = {
 4706       0,
 4707       0,
 4708       RAX_num,  // Op_RegN
 4709       RAX_num,  // Op_RegI
 4710       RAX_num,  // Op_RegP
 4711       XMM0_num, // Op_RegF
 4712       XMM0_num, // Op_RegD
 4713       RAX_num   // Op_RegL
 4714     };
 4715     static const int hi[Op_RegL + 1] = {
 4716       0,
 4717       0,
 4718       OptoReg::Bad, // Op_RegN
 4719       OptoReg::Bad, // Op_RegI
 4720       RAX_H_num,    // Op_RegP
 4721       OptoReg::Bad, // Op_RegF
 4722       XMM0b_num,    // Op_RegD
 4723       RAX_H_num     // Op_RegL
 4724     };
 4725     // Excluded flags and vector registers.
 4726     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4727     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4728   %}
 4729 %}
 4730 
 4731 //----------ATTRIBUTES---------------------------------------------------------
 4732 //----------Operand Attributes-------------------------------------------------
 4733 op_attrib op_cost(0);        // Required cost attribute
 4734 
 4735 //----------Instruction Attributes---------------------------------------------
 4736 ins_attrib ins_cost(100);       // Required cost attribute
 4737 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4738 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4739                                 // a non-matching short branch variant
 4740                                 // of some long branch?
 4741 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4742                                 // be a power of 2) specifies the
 4743                                 // alignment that some part of the
 4744                                 // instruction (not necessarily the
 4745                                 // start) requires.  If > 1, a
 4746                                 // compute_padding() function must be
 4747                                 // provided for the instruction
 4748 
 4749 // Whether this node is expanded during code emission into a sequence of
 4750 // instructions and the first instruction can perform an implicit null check.
 4751 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4752 
 4753 //----------OPERANDS-----------------------------------------------------------
 4754 // Operand definitions must precede instruction definitions for correct parsing
 4755 // in the ADLC because operands constitute user defined types which are used in
 4756 // instruction definitions.
 4757 
 4758 //----------Simple Operands----------------------------------------------------
 4759 // Immediate Operands
 4760 // Integer Immediate
 4761 operand immI()
 4762 %{
 4763   match(ConI);
 4764 
 4765   op_cost(10);
 4766   format %{ %}
 4767   interface(CONST_INTER);
 4768 %}
 4769 
 4770 // Constant for test vs zero
 4771 operand immI_0()
 4772 %{
 4773   predicate(n->get_int() == 0);
 4774   match(ConI);
 4775 
 4776   op_cost(0);
 4777   format %{ %}
 4778   interface(CONST_INTER);
 4779 %}
 4780 
 4781 // Constant for increment
 4782 operand immI_1()
 4783 %{
 4784   predicate(n->get_int() == 1);
 4785   match(ConI);
 4786 
 4787   op_cost(0);
 4788   format %{ %}
 4789   interface(CONST_INTER);
 4790 %}
 4791 
 4792 // Constant for decrement
 4793 operand immI_M1()
 4794 %{
 4795   predicate(n->get_int() == -1);
 4796   match(ConI);
 4797 
 4798   op_cost(0);
 4799   format %{ %}
 4800   interface(CONST_INTER);
 4801 %}
 4802 
 4803 operand immI_2()
 4804 %{
 4805   predicate(n->get_int() == 2);
 4806   match(ConI);
 4807 
 4808   op_cost(0);
 4809   format %{ %}
 4810   interface(CONST_INTER);
 4811 %}
 4812 
 4813 operand immI_4()
 4814 %{
 4815   predicate(n->get_int() == 4);
 4816   match(ConI);
 4817 
 4818   op_cost(0);
 4819   format %{ %}
 4820   interface(CONST_INTER);
 4821 %}
 4822 
 4823 operand immI_8()
 4824 %{
 4825   predicate(n->get_int() == 8);
 4826   match(ConI);
 4827 
 4828   op_cost(0);
 4829   format %{ %}
 4830   interface(CONST_INTER);
 4831 %}
 4832 
 4833 // Valid scale values for addressing modes
 4834 operand immI2()
 4835 %{
 4836   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4837   match(ConI);
 4838 
 4839   format %{ %}
 4840   interface(CONST_INTER);
 4841 %}
 4842 
 4843 operand immU7()
 4844 %{
 4845   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4846   match(ConI);
 4847 
 4848   op_cost(5);
 4849   format %{ %}
 4850   interface(CONST_INTER);
 4851 %}
 4852 
 4853 operand immI8()
 4854 %{
 4855   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4856   match(ConI);
 4857 
 4858   op_cost(5);
 4859   format %{ %}
 4860   interface(CONST_INTER);
 4861 %}
 4862 
 4863 operand immU8()
 4864 %{
 4865   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4866   match(ConI);
 4867 
 4868   op_cost(5);
 4869   format %{ %}
 4870   interface(CONST_INTER);
 4871 %}
 4872 
 4873 operand immI16()
 4874 %{
 4875   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4876   match(ConI);
 4877 
 4878   op_cost(10);
 4879   format %{ %}
 4880   interface(CONST_INTER);
 4881 %}
 4882 
 4883 // Int Immediate non-negative
 4884 operand immU31()
 4885 %{
 4886   predicate(n->get_int() >= 0);
 4887   match(ConI);
 4888 
 4889   op_cost(0);
 4890   format %{ %}
 4891   interface(CONST_INTER);
 4892 %}
 4893 
 4894 // Pointer Immediate
 4895 operand immP()
 4896 %{
 4897   match(ConP);
 4898 
 4899   op_cost(10);
 4900   format %{ %}
 4901   interface(CONST_INTER);
 4902 %}
 4903 
 4904 // Null Pointer Immediate
 4905 operand immP0()
 4906 %{
 4907   predicate(n->get_ptr() == 0);
 4908   match(ConP);
 4909 
 4910   op_cost(5);
 4911   format %{ %}
 4912   interface(CONST_INTER);
 4913 %}
 4914 
 4915 // Pointer Immediate
 4916 operand immN() %{
 4917   match(ConN);
 4918 
 4919   op_cost(10);
 4920   format %{ %}
 4921   interface(CONST_INTER);
 4922 %}
 4923 
 4924 operand immNKlass() %{
 4925   match(ConNKlass);
 4926 
 4927   op_cost(10);
 4928   format %{ %}
 4929   interface(CONST_INTER);
 4930 %}
 4931 
 4932 // Null Pointer Immediate
 4933 operand immN0() %{
 4934   predicate(n->get_narrowcon() == 0);
 4935   match(ConN);
 4936 
 4937   op_cost(5);
 4938   format %{ %}
 4939   interface(CONST_INTER);
 4940 %}
 4941 
 4942 operand immP31()
 4943 %{
 4944   predicate(n->as_Type()->type()->is_ptr()->reloc() == relocInfo::none
 4945             && (n->get_ptr() >> 31) == 0);
 4946   match(ConP);
 4947 
 4948   op_cost(5);
 4949   format %{ %}
 4950   interface(CONST_INTER);
 4951 %}
 4952 
 4953 
 4954 // Long Immediate
 4955 operand immL()
 4956 %{
 4957   match(ConL);
 4958 
 4959   op_cost(20);
 4960   format %{ %}
 4961   interface(CONST_INTER);
 4962 %}
 4963 
 4964 // Long Immediate 8-bit
 4965 operand immL8()
 4966 %{
 4967   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4968   match(ConL);
 4969 
 4970   op_cost(5);
 4971   format %{ %}
 4972   interface(CONST_INTER);
 4973 %}
 4974 
 4975 // Long Immediate 32-bit unsigned
 4976 operand immUL32()
 4977 %{
 4978   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4979   match(ConL);
 4980 
 4981   op_cost(10);
 4982   format %{ %}
 4983   interface(CONST_INTER);
 4984 %}
 4985 
 4986 // Long Immediate 32-bit signed
 4987 operand immL32()
 4988 %{
 4989   predicate(n->get_long() == (int) (n->get_long()));
 4990   match(ConL);
 4991 
 4992   op_cost(15);
 4993   format %{ %}
 4994   interface(CONST_INTER);
 4995 %}
 4996 
 4997 operand immL_Pow2()
 4998 %{
 4999   predicate(is_power_of_2((julong)n->get_long()));
 5000   match(ConL);
 5001 
 5002   op_cost(15);
 5003   format %{ %}
 5004   interface(CONST_INTER);
 5005 %}
 5006 
 5007 operand immL_NotPow2()
 5008 %{
 5009   predicate(is_power_of_2((julong)~n->get_long()));
 5010   match(ConL);
 5011 
 5012   op_cost(15);
 5013   format %{ %}
 5014   interface(CONST_INTER);
 5015 %}
 5016 
 5017 // Long Immediate zero
 5018 operand immL0()
 5019 %{
 5020   predicate(n->get_long() == 0L);
 5021   match(ConL);
 5022 
 5023   op_cost(10);
 5024   format %{ %}
 5025   interface(CONST_INTER);
 5026 %}
 5027 
 5028 // Constant for increment
 5029 operand immL1()
 5030 %{
 5031   predicate(n->get_long() == 1);
 5032   match(ConL);
 5033 
 5034   format %{ %}
 5035   interface(CONST_INTER);
 5036 %}
 5037 
 5038 // Constant for decrement
 5039 operand immL_M1()
 5040 %{
 5041   predicate(n->get_long() == -1);
 5042   match(ConL);
 5043 
 5044   format %{ %}
 5045   interface(CONST_INTER);
 5046 %}
 5047 
 5048 // Long Immediate: low 32-bit mask
 5049 operand immL_32bits()
 5050 %{
 5051   predicate(n->get_long() == 0xFFFFFFFFL);
 5052   match(ConL);
 5053   op_cost(20);
 5054 
 5055   format %{ %}
 5056   interface(CONST_INTER);
 5057 %}
 5058 
 5059 // Int Immediate: 2^n-1, positive
 5060 operand immI_Pow2M1()
 5061 %{
 5062   predicate((n->get_int() > 0)
 5063             && is_power_of_2((juint)n->get_int() + 1));
 5064   match(ConI);
 5065 
 5066   op_cost(20);
 5067   format %{ %}
 5068   interface(CONST_INTER);
 5069 %}
 5070 
 5071 // Float Immediate zero
 5072 operand immF0()
 5073 %{
 5074   predicate(jint_cast(n->getf()) == 0);
 5075   match(ConF);
 5076 
 5077   op_cost(5);
 5078   format %{ %}
 5079   interface(CONST_INTER);
 5080 %}
 5081 
 5082 // Float Immediate
 5083 operand immF()
 5084 %{
 5085   match(ConF);
 5086 
 5087   op_cost(15);
 5088   format %{ %}
 5089   interface(CONST_INTER);
 5090 %}
 5091 
 5092 // Half Float Immediate
 5093 operand immH()
 5094 %{
 5095   match(ConH);
 5096 
 5097   op_cost(15);
 5098   format %{ %}
 5099   interface(CONST_INTER);
 5100 %}
 5101 
 5102 // Double Immediate zero
 5103 operand immD0()
 5104 %{
 5105   predicate(jlong_cast(n->getd()) == 0);
 5106   match(ConD);
 5107 
 5108   op_cost(5);
 5109   format %{ %}
 5110   interface(CONST_INTER);
 5111 %}
 5112 
 5113 // Double Immediate
 5114 operand immD()
 5115 %{
 5116   match(ConD);
 5117 
 5118   op_cost(15);
 5119   format %{ %}
 5120   interface(CONST_INTER);
 5121 %}
 5122 
 5123 // Immediates for special shifts (sign extend)
 5124 
 5125 // Constants for increment
 5126 operand immI_16()
 5127 %{
 5128   predicate(n->get_int() == 16);
 5129   match(ConI);
 5130 
 5131   format %{ %}
 5132   interface(CONST_INTER);
 5133 %}
 5134 
 5135 operand immI_24()
 5136 %{
 5137   predicate(n->get_int() == 24);
 5138   match(ConI);
 5139 
 5140   format %{ %}
 5141   interface(CONST_INTER);
 5142 %}
 5143 
 5144 // Constant for byte-wide masking
 5145 operand immI_255()
 5146 %{
 5147   predicate(n->get_int() == 255);
 5148   match(ConI);
 5149 
 5150   format %{ %}
 5151   interface(CONST_INTER);
 5152 %}
 5153 
 5154 // Constant for short-wide masking
 5155 operand immI_65535()
 5156 %{
 5157   predicate(n->get_int() == 65535);
 5158   match(ConI);
 5159 
 5160   format %{ %}
 5161   interface(CONST_INTER);
 5162 %}
 5163 
 5164 // Constant for byte-wide masking
 5165 operand immL_255()
 5166 %{
 5167   predicate(n->get_long() == 255);
 5168   match(ConL);
 5169 
 5170   format %{ %}
 5171   interface(CONST_INTER);
 5172 %}
 5173 
 5174 // Constant for short-wide masking
 5175 operand immL_65535()
 5176 %{
 5177   predicate(n->get_long() == 65535);
 5178   match(ConL);
 5179 
 5180   format %{ %}
 5181   interface(CONST_INTER);
 5182 %}
 5183 
 5184 // AOT Runtime Constants Address
 5185 operand immAOTRuntimeConstantsAddress()
 5186 %{
 5187   // Check if the address is in the range of AOT Runtime Constants
 5188   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5189   match(ConP);
 5190 
 5191   op_cost(0);
 5192   format %{ %}
 5193   interface(CONST_INTER);
 5194 %}
 5195 
 5196 operand kReg()
 5197 %{
 5198   constraint(ALLOC_IN_RC(vectmask_reg));
 5199   match(RegVectMask);
 5200   format %{%}
 5201   interface(REG_INTER);
 5202 %}
 5203 
 5204 // Register Operands
 5205 // Integer Register
 5206 operand rRegI()
 5207 %{
 5208   constraint(ALLOC_IN_RC(int_reg));
 5209   match(RegI);
 5210 
 5211   match(rax_RegI);
 5212   match(rbx_RegI);
 5213   match(rcx_RegI);
 5214   match(rdx_RegI);
 5215   match(rdi_RegI);
 5216 
 5217   format %{ %}
 5218   interface(REG_INTER);
 5219 %}
 5220 
 5221 // Special Registers
 5222 operand rax_RegI()
 5223 %{
 5224   constraint(ALLOC_IN_RC(int_rax_reg));
 5225   match(RegI);
 5226   match(rRegI);
 5227 
 5228   format %{ "RAX" %}
 5229   interface(REG_INTER);
 5230 %}
 5231 
 5232 // Special Registers
 5233 operand rbx_RegI()
 5234 %{
 5235   constraint(ALLOC_IN_RC(int_rbx_reg));
 5236   match(RegI);
 5237   match(rRegI);
 5238 
 5239   format %{ "RBX" %}
 5240   interface(REG_INTER);
 5241 %}
 5242 
 5243 operand rcx_RegI()
 5244 %{
 5245   constraint(ALLOC_IN_RC(int_rcx_reg));
 5246   match(RegI);
 5247   match(rRegI);
 5248 
 5249   format %{ "RCX" %}
 5250   interface(REG_INTER);
 5251 %}
 5252 
 5253 operand rdx_RegI()
 5254 %{
 5255   constraint(ALLOC_IN_RC(int_rdx_reg));
 5256   match(RegI);
 5257   match(rRegI);
 5258 
 5259   format %{ "RDX" %}
 5260   interface(REG_INTER);
 5261 %}
 5262 
 5263 operand rdi_RegI()
 5264 %{
 5265   constraint(ALLOC_IN_RC(int_rdi_reg));
 5266   match(RegI);
 5267   match(rRegI);
 5268 
 5269   format %{ "RDI" %}
 5270   interface(REG_INTER);
 5271 %}
 5272 
 5273 operand no_rax_rdx_RegI()
 5274 %{
 5275   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5276   match(RegI);
 5277   match(rbx_RegI);
 5278   match(rcx_RegI);
 5279   match(rdi_RegI);
 5280 
 5281   format %{ %}
 5282   interface(REG_INTER);
 5283 %}
 5284 
 5285 operand no_rbp_r13_RegI()
 5286 %{
 5287   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5288   match(RegI);
 5289   match(rRegI);
 5290   match(rax_RegI);
 5291   match(rbx_RegI);
 5292   match(rcx_RegI);
 5293   match(rdx_RegI);
 5294   match(rdi_RegI);
 5295 
 5296   format %{ %}
 5297   interface(REG_INTER);
 5298 %}
 5299 
 5300 // Pointer Register
 5301 operand any_RegP()
 5302 %{
 5303   constraint(ALLOC_IN_RC(any_reg));
 5304   match(RegP);
 5305   match(rax_RegP);
 5306   match(rbx_RegP);
 5307   match(rdi_RegP);
 5308   match(rsi_RegP);
 5309   match(rbp_RegP);
 5310   match(r15_RegP);
 5311   match(rRegP);
 5312 
 5313   format %{ %}
 5314   interface(REG_INTER);
 5315 %}
 5316 
 5317 operand rRegP()
 5318 %{
 5319   constraint(ALLOC_IN_RC(ptr_reg));
 5320   match(RegP);
 5321   match(rax_RegP);
 5322   match(rbx_RegP);
 5323   match(rdi_RegP);
 5324   match(rsi_RegP);
 5325   match(rbp_RegP);  // See Q&A below about
 5326   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5327 
 5328   format %{ %}
 5329   interface(REG_INTER);
 5330 %}
 5331 
 5332 operand rRegN() %{
 5333   constraint(ALLOC_IN_RC(int_reg));
 5334   match(RegN);
 5335 
 5336   format %{ %}
 5337   interface(REG_INTER);
 5338 %}
 5339 
 5340 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5341 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5342 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5343 // The output of an instruction is controlled by the allocator, which respects
 5344 // register class masks, not match rules.  Unless an instruction mentions
 5345 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5346 // by the allocator as an input.
 5347 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5348 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5349 // result, RBP is not included in the output of the instruction either.
 5350 
 5351 // This operand is not allowed to use RBP even if
 5352 // RBP is not used to hold the frame pointer.
 5353 operand no_rbp_RegP()
 5354 %{
 5355   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5356   match(RegP);
 5357   match(rbx_RegP);
 5358   match(rsi_RegP);
 5359   match(rdi_RegP);
 5360 
 5361   format %{ %}
 5362   interface(REG_INTER);
 5363 %}
 5364 
 5365 // Special Registers
 5366 // Return a pointer value
 5367 operand rax_RegP()
 5368 %{
 5369   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5370   match(RegP);
 5371   match(rRegP);
 5372 
 5373   format %{ %}
 5374   interface(REG_INTER);
 5375 %}
 5376 
 5377 // Special Registers
 5378 // Return a compressed pointer value
 5379 operand rax_RegN()
 5380 %{
 5381   constraint(ALLOC_IN_RC(int_rax_reg));
 5382   match(RegN);
 5383   match(rRegN);
 5384 
 5385   format %{ %}
 5386   interface(REG_INTER);
 5387 %}
 5388 
 5389 // Used in AtomicAdd
 5390 operand rbx_RegP()
 5391 %{
 5392   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5393   match(RegP);
 5394   match(rRegP);
 5395 
 5396   format %{ %}
 5397   interface(REG_INTER);
 5398 %}
 5399 
 5400 operand rsi_RegP()
 5401 %{
 5402   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5403   match(RegP);
 5404   match(rRegP);
 5405 
 5406   format %{ %}
 5407   interface(REG_INTER);
 5408 %}
 5409 
 5410 operand rbp_RegP()
 5411 %{
 5412   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5413   match(RegP);
 5414   match(rRegP);
 5415 
 5416   format %{ %}
 5417   interface(REG_INTER);
 5418 %}
 5419 
 5420 // Used in rep stosq
 5421 operand rdi_RegP()
 5422 %{
 5423   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5424   match(RegP);
 5425   match(rRegP);
 5426 
 5427   format %{ %}
 5428   interface(REG_INTER);
 5429 %}
 5430 
 5431 operand r15_RegP()
 5432 %{
 5433   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5434   match(RegP);
 5435   match(rRegP);
 5436 
 5437   format %{ %}
 5438   interface(REG_INTER);
 5439 %}
 5440 
 5441 operand rRegL()
 5442 %{
 5443   constraint(ALLOC_IN_RC(long_reg));
 5444   match(RegL);
 5445   match(rax_RegL);
 5446   match(rdx_RegL);
 5447 
 5448   format %{ %}
 5449   interface(REG_INTER);
 5450 %}
 5451 
 5452 // Special Registers
 5453 operand no_rax_rdx_RegL()
 5454 %{
 5455   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5456   match(RegL);
 5457   match(rRegL);
 5458 
 5459   format %{ %}
 5460   interface(REG_INTER);
 5461 %}
 5462 
 5463 operand rax_RegL()
 5464 %{
 5465   constraint(ALLOC_IN_RC(long_rax_reg));
 5466   match(RegL);
 5467   match(rRegL);
 5468 
 5469   format %{ "RAX" %}
 5470   interface(REG_INTER);
 5471 %}
 5472 
 5473 operand rcx_RegL()
 5474 %{
 5475   constraint(ALLOC_IN_RC(long_rcx_reg));
 5476   match(RegL);
 5477   match(rRegL);
 5478 
 5479   format %{ %}
 5480   interface(REG_INTER);
 5481 %}
 5482 
 5483 operand rdx_RegL()
 5484 %{
 5485   constraint(ALLOC_IN_RC(long_rdx_reg));
 5486   match(RegL);
 5487   match(rRegL);
 5488 
 5489   format %{ %}
 5490   interface(REG_INTER);
 5491 %}
 5492 
 5493 operand r11_RegL()
 5494 %{
 5495   constraint(ALLOC_IN_RC(long_r11_reg));
 5496   match(RegL);
 5497   match(rRegL);
 5498 
 5499   format %{ %}
 5500   interface(REG_INTER);
 5501 %}
 5502 
 5503 operand no_rbp_r13_RegL()
 5504 %{
 5505   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5506   match(RegL);
 5507   match(rRegL);
 5508   match(rax_RegL);
 5509   match(rcx_RegL);
 5510   match(rdx_RegL);
 5511 
 5512   format %{ %}
 5513   interface(REG_INTER);
 5514 %}
 5515 
 5516 // Flags register, used as output of compare instructions
 5517 operand rFlagsReg()
 5518 %{
 5519   constraint(ALLOC_IN_RC(int_flags));
 5520   match(RegFlags);
 5521 
 5522   format %{ "RFLAGS" %}
 5523   interface(REG_INTER);
 5524 %}
 5525 
 5526 // Flags register, used as output of FLOATING POINT compare instructions
 5527 operand rFlagsRegU()
 5528 %{
 5529   constraint(ALLOC_IN_RC(int_flags));
 5530   match(RegFlags);
 5531 
 5532   format %{ "RFLAGS_U" %}
 5533   interface(REG_INTER);
 5534 %}
 5535 
 5536 operand rFlagsRegUCF() %{
 5537   constraint(ALLOC_IN_RC(int_flags));
 5538   match(RegFlags);
 5539   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5540 
 5541   format %{ "RFLAGS_U_CF" %}
 5542   interface(REG_INTER);
 5543 %}
 5544 
 5545 operand rFlagsRegUCFE() %{
 5546   constraint(ALLOC_IN_RC(int_flags));
 5547   match(RegFlags);
 5548   predicate(UseAPX && VM_Version::supports_avx10_2());
 5549 
 5550   format %{ "RFLAGS_U_CFE" %}
 5551   interface(REG_INTER);
 5552 %}
 5553 
 5554 // Float register operands
 5555 operand regF() %{
 5556    constraint(ALLOC_IN_RC(float_reg));
 5557    match(RegF);
 5558 
 5559    format %{ %}
 5560    interface(REG_INTER);
 5561 %}
 5562 
 5563 // Float register operands
 5564 operand legRegF() %{
 5565    constraint(ALLOC_IN_RC(float_reg_legacy));
 5566    match(RegF);
 5567 
 5568    format %{ %}
 5569    interface(REG_INTER);
 5570 %}
 5571 
 5572 // Float register operands
 5573 operand vlRegF() %{
 5574    constraint(ALLOC_IN_RC(float_reg_vl));
 5575    match(RegF);
 5576 
 5577    format %{ %}
 5578    interface(REG_INTER);
 5579 %}
 5580 
 5581 // Double register operands
 5582 operand regD() %{
 5583    constraint(ALLOC_IN_RC(double_reg));
 5584    match(RegD);
 5585 
 5586    format %{ %}
 5587    interface(REG_INTER);
 5588 %}
 5589 
 5590 // Double register operands
 5591 operand legRegD() %{
 5592    constraint(ALLOC_IN_RC(double_reg_legacy));
 5593    match(RegD);
 5594 
 5595    format %{ %}
 5596    interface(REG_INTER);
 5597 %}
 5598 
 5599 // Double register operands
 5600 operand vlRegD() %{
 5601    constraint(ALLOC_IN_RC(double_reg_vl));
 5602    match(RegD);
 5603 
 5604    format %{ %}
 5605    interface(REG_INTER);
 5606 %}
 5607 
 5608 //----------Memory Operands----------------------------------------------------
 5609 // Direct Memory Operand
 5610 // operand direct(immP addr)
 5611 // %{
 5612 //   match(addr);
 5613 
 5614 //   format %{ "[$addr]" %}
 5615 //   interface(MEMORY_INTER) %{
 5616 //     base(0xFFFFFFFF);
 5617 //     index(0x4);
 5618 //     scale(0x0);
 5619 //     disp($addr);
 5620 //   %}
 5621 // %}
 5622 
 5623 // Indirect Memory Operand
 5624 operand indirect(any_RegP reg)
 5625 %{
 5626   constraint(ALLOC_IN_RC(ptr_reg));
 5627   match(reg);
 5628 
 5629   format %{ "[$reg]" %}
 5630   interface(MEMORY_INTER) %{
 5631     base($reg);
 5632     index(0x4);
 5633     scale(0x0);
 5634     disp(0x0);
 5635   %}
 5636 %}
 5637 
 5638 // Indirect Memory Plus Short Offset Operand
 5639 operand indOffset8(any_RegP reg, immL8 off)
 5640 %{
 5641   constraint(ALLOC_IN_RC(ptr_reg));
 5642   match(AddP reg off);
 5643 
 5644   format %{ "[$reg + $off (8-bit)]" %}
 5645   interface(MEMORY_INTER) %{
 5646     base($reg);
 5647     index(0x4);
 5648     scale(0x0);
 5649     disp($off);
 5650   %}
 5651 %}
 5652 
 5653 // Indirect Memory Plus Long Offset Operand
 5654 operand indOffset32(any_RegP reg, immL32 off)
 5655 %{
 5656   constraint(ALLOC_IN_RC(ptr_reg));
 5657   match(AddP reg off);
 5658 
 5659   format %{ "[$reg + $off (32-bit)]" %}
 5660   interface(MEMORY_INTER) %{
 5661     base($reg);
 5662     index(0x4);
 5663     scale(0x0);
 5664     disp($off);
 5665   %}
 5666 %}
 5667 
 5668 // Indirect Memory Plus Index Register Plus Offset Operand
 5669 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5670 %{
 5671   constraint(ALLOC_IN_RC(ptr_reg));
 5672   match(AddP (AddP reg lreg) off);
 5673 
 5674   op_cost(10);
 5675   format %{"[$reg + $off + $lreg]" %}
 5676   interface(MEMORY_INTER) %{
 5677     base($reg);
 5678     index($lreg);
 5679     scale(0x0);
 5680     disp($off);
 5681   %}
 5682 %}
 5683 
 5684 // Indirect Memory Plus Index Register Plus Offset Operand
 5685 operand indIndex(any_RegP reg, rRegL lreg)
 5686 %{
 5687   constraint(ALLOC_IN_RC(ptr_reg));
 5688   match(AddP reg lreg);
 5689 
 5690   op_cost(10);
 5691   format %{"[$reg + $lreg]" %}
 5692   interface(MEMORY_INTER) %{
 5693     base($reg);
 5694     index($lreg);
 5695     scale(0x0);
 5696     disp(0x0);
 5697   %}
 5698 %}
 5699 
 5700 // Indirect Memory Times Scale Plus Index Register
 5701 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5702 %{
 5703   constraint(ALLOC_IN_RC(ptr_reg));
 5704   match(AddP reg (LShiftL lreg scale));
 5705 
 5706   op_cost(10);
 5707   format %{"[$reg + $lreg << $scale]" %}
 5708   interface(MEMORY_INTER) %{
 5709     base($reg);
 5710     index($lreg);
 5711     scale($scale);
 5712     disp(0x0);
 5713   %}
 5714 %}
 5715 
 5716 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5717 %{
 5718   constraint(ALLOC_IN_RC(ptr_reg));
 5719   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5720   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5721 
 5722   op_cost(10);
 5723   format %{"[$reg + pos $idx << $scale]" %}
 5724   interface(MEMORY_INTER) %{
 5725     base($reg);
 5726     index($idx);
 5727     scale($scale);
 5728     disp(0x0);
 5729   %}
 5730 %}
 5731 
 5732 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5733 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5734 %{
 5735   constraint(ALLOC_IN_RC(ptr_reg));
 5736   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5737 
 5738   op_cost(10);
 5739   format %{"[$reg + $off + $lreg << $scale]" %}
 5740   interface(MEMORY_INTER) %{
 5741     base($reg);
 5742     index($lreg);
 5743     scale($scale);
 5744     disp($off);
 5745   %}
 5746 %}
 5747 
 5748 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5749 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5750 %{
 5751   constraint(ALLOC_IN_RC(ptr_reg));
 5752   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5753   match(AddP (AddP reg (ConvI2L idx)) off);
 5754 
 5755   op_cost(10);
 5756   format %{"[$reg + $off + $idx]" %}
 5757   interface(MEMORY_INTER) %{
 5758     base($reg);
 5759     index($idx);
 5760     scale(0x0);
 5761     disp($off);
 5762   %}
 5763 %}
 5764 
 5765 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5766 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5767 %{
 5768   constraint(ALLOC_IN_RC(ptr_reg));
 5769   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5770   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5771 
 5772   op_cost(10);
 5773   format %{"[$reg + $off + $idx << $scale]" %}
 5774   interface(MEMORY_INTER) %{
 5775     base($reg);
 5776     index($idx);
 5777     scale($scale);
 5778     disp($off);
 5779   %}
 5780 %}
 5781 
 5782 // Indirect Narrow Oop Plus Offset Operand
 5783 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5784 // we can't free r12 even with CompressedOops::base() == nullptr.
 5785 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5786   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5787   constraint(ALLOC_IN_RC(ptr_reg));
 5788   match(AddP (DecodeN reg) off);
 5789 
 5790   op_cost(10);
 5791   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5792   interface(MEMORY_INTER) %{
 5793     base(0xc); // R12
 5794     index($reg);
 5795     scale(0x3);
 5796     disp($off);
 5797   %}
 5798 %}
 5799 
 5800 // Indirect Memory Operand
 5801 operand indirectNarrow(rRegN reg)
 5802 %{
 5803   predicate(CompressedOops::shift() == 0);
 5804   constraint(ALLOC_IN_RC(ptr_reg));
 5805   match(DecodeN reg);
 5806 
 5807   format %{ "[$reg]" %}
 5808   interface(MEMORY_INTER) %{
 5809     base($reg);
 5810     index(0x4);
 5811     scale(0x0);
 5812     disp(0x0);
 5813   %}
 5814 %}
 5815 
 5816 // Indirect Memory Plus Short Offset Operand
 5817 operand indOffset8Narrow(rRegN reg, immL8 off)
 5818 %{
 5819   predicate(CompressedOops::shift() == 0);
 5820   constraint(ALLOC_IN_RC(ptr_reg));
 5821   match(AddP (DecodeN reg) off);
 5822 
 5823   format %{ "[$reg + $off (8-bit)]" %}
 5824   interface(MEMORY_INTER) %{
 5825     base($reg);
 5826     index(0x4);
 5827     scale(0x0);
 5828     disp($off);
 5829   %}
 5830 %}
 5831 
 5832 // Indirect Memory Plus Long Offset Operand
 5833 operand indOffset32Narrow(rRegN reg, immL32 off)
 5834 %{
 5835   predicate(CompressedOops::shift() == 0);
 5836   constraint(ALLOC_IN_RC(ptr_reg));
 5837   match(AddP (DecodeN reg) off);
 5838 
 5839   format %{ "[$reg + $off (32-bit)]" %}
 5840   interface(MEMORY_INTER) %{
 5841     base($reg);
 5842     index(0x4);
 5843     scale(0x0);
 5844     disp($off);
 5845   %}
 5846 %}
 5847 
 5848 // Indirect Memory Plus Index Register Plus Offset Operand
 5849 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5850 %{
 5851   predicate(CompressedOops::shift() == 0);
 5852   constraint(ALLOC_IN_RC(ptr_reg));
 5853   match(AddP (AddP (DecodeN reg) lreg) off);
 5854 
 5855   op_cost(10);
 5856   format %{"[$reg + $off + $lreg]" %}
 5857   interface(MEMORY_INTER) %{
 5858     base($reg);
 5859     index($lreg);
 5860     scale(0x0);
 5861     disp($off);
 5862   %}
 5863 %}
 5864 
 5865 // Indirect Memory Plus Index Register Plus Offset Operand
 5866 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5867 %{
 5868   predicate(CompressedOops::shift() == 0);
 5869   constraint(ALLOC_IN_RC(ptr_reg));
 5870   match(AddP (DecodeN reg) lreg);
 5871 
 5872   op_cost(10);
 5873   format %{"[$reg + $lreg]" %}
 5874   interface(MEMORY_INTER) %{
 5875     base($reg);
 5876     index($lreg);
 5877     scale(0x0);
 5878     disp(0x0);
 5879   %}
 5880 %}
 5881 
 5882 // Indirect Memory Times Scale Plus Index Register
 5883 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5884 %{
 5885   predicate(CompressedOops::shift() == 0);
 5886   constraint(ALLOC_IN_RC(ptr_reg));
 5887   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5888 
 5889   op_cost(10);
 5890   format %{"[$reg + $lreg << $scale]" %}
 5891   interface(MEMORY_INTER) %{
 5892     base($reg);
 5893     index($lreg);
 5894     scale($scale);
 5895     disp(0x0);
 5896   %}
 5897 %}
 5898 
 5899 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5900 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5901 %{
 5902   predicate(CompressedOops::shift() == 0);
 5903   constraint(ALLOC_IN_RC(ptr_reg));
 5904   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5905 
 5906   op_cost(10);
 5907   format %{"[$reg + $off + $lreg << $scale]" %}
 5908   interface(MEMORY_INTER) %{
 5909     base($reg);
 5910     index($lreg);
 5911     scale($scale);
 5912     disp($off);
 5913   %}
 5914 %}
 5915 
 5916 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5917 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5918 %{
 5919   constraint(ALLOC_IN_RC(ptr_reg));
 5920   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5921   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5922 
 5923   op_cost(10);
 5924   format %{"[$reg + $off + $idx]" %}
 5925   interface(MEMORY_INTER) %{
 5926     base($reg);
 5927     index($idx);
 5928     scale(0x0);
 5929     disp($off);
 5930   %}
 5931 %}
 5932 
 5933 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5934 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5935 %{
 5936   constraint(ALLOC_IN_RC(ptr_reg));
 5937   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5938   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5939 
 5940   op_cost(10);
 5941   format %{"[$reg + $off + $idx << $scale]" %}
 5942   interface(MEMORY_INTER) %{
 5943     base($reg);
 5944     index($idx);
 5945     scale($scale);
 5946     disp($off);
 5947   %}
 5948 %}
 5949 
 5950 //----------Special Memory Operands--------------------------------------------
 5951 // Stack Slot Operand - This operand is used for loading and storing temporary
 5952 //                      values on the stack where a match requires a value to
 5953 //                      flow through memory.
 5954 operand stackSlotP(sRegP reg)
 5955 %{
 5956   constraint(ALLOC_IN_RC(stack_slots));
 5957   // No match rule because this operand is only generated in matching
 5958 
 5959   format %{ "[$reg]" %}
 5960   interface(MEMORY_INTER) %{
 5961     base(0x4);   // RSP
 5962     index(0x4);  // No Index
 5963     scale(0x0);  // No Scale
 5964     disp($reg);  // Stack Offset
 5965   %}
 5966 %}
 5967 
 5968 operand stackSlotI(sRegI reg)
 5969 %{
 5970   constraint(ALLOC_IN_RC(stack_slots));
 5971   // No match rule because this operand is only generated in matching
 5972 
 5973   format %{ "[$reg]" %}
 5974   interface(MEMORY_INTER) %{
 5975     base(0x4);   // RSP
 5976     index(0x4);  // No Index
 5977     scale(0x0);  // No Scale
 5978     disp($reg);  // Stack Offset
 5979   %}
 5980 %}
 5981 
 5982 operand stackSlotF(sRegF reg)
 5983 %{
 5984   constraint(ALLOC_IN_RC(stack_slots));
 5985   // No match rule because this operand is only generated in matching
 5986 
 5987   format %{ "[$reg]" %}
 5988   interface(MEMORY_INTER) %{
 5989     base(0x4);   // RSP
 5990     index(0x4);  // No Index
 5991     scale(0x0);  // No Scale
 5992     disp($reg);  // Stack Offset
 5993   %}
 5994 %}
 5995 
 5996 operand stackSlotD(sRegD reg)
 5997 %{
 5998   constraint(ALLOC_IN_RC(stack_slots));
 5999   // No match rule because this operand is only generated in matching
 6000 
 6001   format %{ "[$reg]" %}
 6002   interface(MEMORY_INTER) %{
 6003     base(0x4);   // RSP
 6004     index(0x4);  // No Index
 6005     scale(0x0);  // No Scale
 6006     disp($reg);  // Stack Offset
 6007   %}
 6008 %}
 6009 operand stackSlotL(sRegL reg)
 6010 %{
 6011   constraint(ALLOC_IN_RC(stack_slots));
 6012   // No match rule because this operand is only generated in matching
 6013 
 6014   format %{ "[$reg]" %}
 6015   interface(MEMORY_INTER) %{
 6016     base(0x4);   // RSP
 6017     index(0x4);  // No Index
 6018     scale(0x0);  // No Scale
 6019     disp($reg);  // Stack Offset
 6020   %}
 6021 %}
 6022 
 6023 //----------Conditional Branch Operands----------------------------------------
 6024 // Comparison Op  - This is the operation of the comparison, and is limited to
 6025 //                  the following set of codes:
 6026 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6027 //
 6028 // Other attributes of the comparison, such as unsignedness, are specified
 6029 // by the comparison instruction that sets a condition code flags register.
 6030 // That result is represented by a flags operand whose subtype is appropriate
 6031 // to the unsignedness (etc.) of the comparison.
 6032 //
 6033 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6034 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6035 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6036 
 6037 // Comparison Code
 6038 operand cmpOp()
 6039 %{
 6040   match(Bool);
 6041 
 6042   format %{ "" %}
 6043   interface(COND_INTER) %{
 6044     equal(0x4, "e");
 6045     not_equal(0x5, "ne");
 6046     less(0xc, "l");
 6047     greater_equal(0xd, "ge");
 6048     less_equal(0xe, "le");
 6049     greater(0xf, "g");
 6050     overflow(0x0, "o");
 6051     no_overflow(0x1, "no");
 6052   %}
 6053 %}
 6054 
 6055 // Comparison Code, unsigned compare.  Used by FP also, with
 6056 // C2 (unordered) turned into GT or LT already.  The other bits
 6057 // C0 and C3 are turned into Carry & Zero flags.
 6058 operand cmpOpU()
 6059 %{
 6060   match(Bool);
 6061 
 6062   format %{ "" %}
 6063   interface(COND_INTER) %{
 6064     equal(0x4, "e");
 6065     not_equal(0x5, "ne");
 6066     less(0x2, "b");
 6067     greater_equal(0x3, "ae");
 6068     less_equal(0x6, "be");
 6069     greater(0x7, "a");
 6070     overflow(0x0, "o");
 6071     no_overflow(0x1, "no");
 6072   %}
 6073 %}
 6074 
 6075 
 6076 // Floating comparisons that don't require any fixup for the unordered case,
 6077 // If both inputs of the comparison are the same, ZF is always set so we
 6078 // don't need to use cmpOpUCF2 for eq/ne
 6079 operand cmpOpUCF() %{
 6080   match(Bool);
 6081   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6082             (n->as_Bool()->_test._test == BoolTest::lt ||
 6083              n->as_Bool()->_test._test == BoolTest::ge ||
 6084              n->as_Bool()->_test._test == BoolTest::le ||
 6085              n->as_Bool()->_test._test == BoolTest::gt ||
 6086              n->in(1)->in(1) == n->in(1)->in(2)));
 6087   format %{ "" %}
 6088   interface(COND_INTER) %{
 6089     equal(0xb, "np");
 6090     not_equal(0xa, "p");
 6091     less(0x2, "b");
 6092     greater_equal(0x3, "ae");
 6093     less_equal(0x6, "be");
 6094     greater(0x7, "a");
 6095     overflow(0x0, "o");
 6096     no_overflow(0x1, "no");
 6097   %}
 6098 %}
 6099 
 6100 
 6101 // Floating comparisons that can be fixed up with extra conditional jumps
 6102 operand cmpOpUCF2() %{
 6103   match(Bool);
 6104   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6105             (n->as_Bool()->_test._test == BoolTest::ne ||
 6106              n->as_Bool()->_test._test == BoolTest::eq) &&
 6107             n->in(1)->in(1) != n->in(1)->in(2));
 6108   format %{ "" %}
 6109   interface(COND_INTER) %{
 6110     equal(0x4, "e");
 6111     not_equal(0x5, "ne");
 6112     less(0x2, "b");
 6113     greater_equal(0x3, "ae");
 6114     less_equal(0x6, "be");
 6115     greater(0x7, "a");
 6116     overflow(0x0, "o");
 6117     no_overflow(0x1, "no");
 6118   %}
 6119 %}
 6120 
 6121 
 6122 // Floating point comparisons that set condition flags to test more directly,
 6123 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6124 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6125 // latter conditions to ones that use unsigned tests before passing into an
 6126 // instruction because the preceding comparison might be based on a three way
 6127 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6128 operand cmpOpUCFE()
 6129 %{
 6130   match(Bool);
 6131   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6132             (n->as_Bool()->_test._test == BoolTest::ne ||
 6133              n->as_Bool()->_test._test == BoolTest::eq ||
 6134              n->as_Bool()->_test._test == BoolTest::lt ||
 6135              n->as_Bool()->_test._test == BoolTest::ge ||
 6136              n->as_Bool()->_test._test == BoolTest::le ||
 6137              n->as_Bool()->_test._test == BoolTest::gt));
 6138 
 6139   format %{ "" %}
 6140   interface(COND_INTER) %{
 6141     equal(0x4, "e");
 6142     not_equal(0x5, "ne");
 6143     less(0x2, "b");
 6144     greater_equal(0x3, "ae");
 6145     less_equal(0x6, "be");
 6146     greater(0x7, "a");
 6147     overflow(0x0, "o");
 6148     no_overflow(0x1, "no");
 6149   %}
 6150 %}
 6151 
 6152 // Operands for bound floating pointer register arguments
 6153 operand rxmm0() %{
 6154   constraint(ALLOC_IN_RC(xmm0_reg));
 6155   match(VecX);
 6156   format%{%}
 6157   interface(REG_INTER);
 6158 %}
 6159 
 6160 // Vectors
 6161 
 6162 // Dummy generic vector class. Should be used for all vector operands.
 6163 // Replaced with vec[SDXYZ] during post-selection pass.
 6164 operand vec() %{
 6165   constraint(ALLOC_IN_RC(dynamic));
 6166   match(VecX);
 6167   match(VecY);
 6168   match(VecZ);
 6169   match(VecS);
 6170   match(VecD);
 6171 
 6172   format %{ %}
 6173   interface(REG_INTER);
 6174 %}
 6175 
 6176 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6177 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6178 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6179 // runtime code generation via reg_class_dynamic.
 6180 operand legVec() %{
 6181   constraint(ALLOC_IN_RC(dynamic));
 6182   match(VecX);
 6183   match(VecY);
 6184   match(VecZ);
 6185   match(VecS);
 6186   match(VecD);
 6187 
 6188   format %{ %}
 6189   interface(REG_INTER);
 6190 %}
 6191 
 6192 // Replaces vec during post-selection cleanup. See above.
 6193 operand vecS() %{
 6194   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6195   match(VecS);
 6196 
 6197   format %{ %}
 6198   interface(REG_INTER);
 6199 %}
 6200 
 6201 // Replaces legVec during post-selection cleanup. See above.
 6202 operand legVecS() %{
 6203   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6204   match(VecS);
 6205 
 6206   format %{ %}
 6207   interface(REG_INTER);
 6208 %}
 6209 
 6210 // Replaces vec during post-selection cleanup. See above.
 6211 operand vecD() %{
 6212   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6213   match(VecD);
 6214 
 6215   format %{ %}
 6216   interface(REG_INTER);
 6217 %}
 6218 
 6219 // Replaces legVec during post-selection cleanup. See above.
 6220 operand legVecD() %{
 6221   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6222   match(VecD);
 6223 
 6224   format %{ %}
 6225   interface(REG_INTER);
 6226 %}
 6227 
 6228 // Replaces vec during post-selection cleanup. See above.
 6229 operand vecX() %{
 6230   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6231   match(VecX);
 6232 
 6233   format %{ %}
 6234   interface(REG_INTER);
 6235 %}
 6236 
 6237 // Replaces legVec during post-selection cleanup. See above.
 6238 operand legVecX() %{
 6239   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6240   match(VecX);
 6241 
 6242   format %{ %}
 6243   interface(REG_INTER);
 6244 %}
 6245 
 6246 // Replaces vec during post-selection cleanup. See above.
 6247 operand vecY() %{
 6248   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6249   match(VecY);
 6250 
 6251   format %{ %}
 6252   interface(REG_INTER);
 6253 %}
 6254 
 6255 // Replaces legVec during post-selection cleanup. See above.
 6256 operand legVecY() %{
 6257   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6258   match(VecY);
 6259 
 6260   format %{ %}
 6261   interface(REG_INTER);
 6262 %}
 6263 
 6264 // Replaces vec during post-selection cleanup. See above.
 6265 operand vecZ() %{
 6266   constraint(ALLOC_IN_RC(vectorz_reg));
 6267   match(VecZ);
 6268 
 6269   format %{ %}
 6270   interface(REG_INTER);
 6271 %}
 6272 
 6273 // Replaces legVec during post-selection cleanup. See above.
 6274 operand legVecZ() %{
 6275   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6276   match(VecZ);
 6277 
 6278   format %{ %}
 6279   interface(REG_INTER);
 6280 %}
 6281 
 6282 //----------OPERAND CLASSES----------------------------------------------------
 6283 // Operand Classes are groups of operands that are used as to simplify
 6284 // instruction definitions by not requiring the AD writer to specify separate
 6285 // instructions for every form of operand when the instruction accepts
 6286 // multiple operand types with the same basic encoding and format.  The classic
 6287 // case of this is memory operands.
 6288 
 6289 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6290                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6291                indCompressedOopOffset,
 6292                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6293                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6294                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6295 
 6296 //----------PIPELINE-----------------------------------------------------------
 6297 // Rules which define the behavior of the target architectures pipeline.
 6298 pipeline %{
 6299 
 6300 //----------ATTRIBUTES---------------------------------------------------------
 6301 attributes %{
 6302   variable_size_instructions;        // Fixed size instructions
 6303   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6304   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6305   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6306   instruction_fetch_units = 1;       // of 16 bytes
 6307 %}
 6308 
 6309 //----------RESOURCES----------------------------------------------------------
 6310 // Resources are the functional units available to the machine
 6311 
 6312 // Generic P2/P3 pipeline
 6313 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6314 // 3 instructions decoded per cycle.
 6315 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6316 // 3 ALU op, only ALU0 handles mul instructions.
 6317 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6318            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6319            BR, FPU,
 6320            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6321 
 6322 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6323 // Pipeline Description specifies the stages in the machine's pipeline
 6324 
 6325 // Generic P2/P3 pipeline
 6326 pipe_desc(S0, S1, S2, S3, S4, S5);
 6327 
 6328 //----------PIPELINE CLASSES---------------------------------------------------
 6329 // Pipeline Classes describe the stages in which input and output are
 6330 // referenced by the hardware pipeline.
 6331 
 6332 // Naming convention: ialu or fpu
 6333 // Then: _reg
 6334 // Then: _reg if there is a 2nd register
 6335 // Then: _long if it's a pair of instructions implementing a long
 6336 // Then: _fat if it requires the big decoder
 6337 //   Or: _mem if it requires the big decoder and a memory unit.
 6338 
 6339 // Integer ALU reg operation
 6340 pipe_class ialu_reg(rRegI dst)
 6341 %{
 6342     single_instruction;
 6343     dst    : S4(write);
 6344     dst    : S3(read);
 6345     DECODE : S0;        // any decoder
 6346     ALU    : S3;        // any alu
 6347 %}
 6348 
 6349 // Long ALU reg operation
 6350 pipe_class ialu_reg_long(rRegL dst)
 6351 %{
 6352     instruction_count(2);
 6353     dst    : S4(write);
 6354     dst    : S3(read);
 6355     DECODE : S0(2);     // any 2 decoders
 6356     ALU    : S3(2);     // both alus
 6357 %}
 6358 
 6359 // Integer ALU reg operation using big decoder
 6360 pipe_class ialu_reg_fat(rRegI dst)
 6361 %{
 6362     single_instruction;
 6363     dst    : S4(write);
 6364     dst    : S3(read);
 6365     D0     : S0;        // big decoder only
 6366     ALU    : S3;        // any alu
 6367 %}
 6368 
 6369 // Integer ALU reg-reg operation
 6370 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6371 %{
 6372     single_instruction;
 6373     dst    : S4(write);
 6374     src    : S3(read);
 6375     DECODE : S0;        // any decoder
 6376     ALU    : S3;        // any alu
 6377 %}
 6378 
 6379 // Integer ALU reg-reg operation
 6380 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6381 %{
 6382     single_instruction;
 6383     dst    : S4(write);
 6384     src    : S3(read);
 6385     D0     : S0;        // big decoder only
 6386     ALU    : S3;        // any alu
 6387 %}
 6388 
 6389 // Integer ALU reg-mem operation
 6390 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6391 %{
 6392     single_instruction;
 6393     dst    : S5(write);
 6394     mem    : S3(read);
 6395     D0     : S0;        // big decoder only
 6396     ALU    : S4;        // any alu
 6397     MEM    : S3;        // any mem
 6398 %}
 6399 
 6400 // Integer mem operation (prefetch)
 6401 pipe_class ialu_mem(memory mem)
 6402 %{
 6403     single_instruction;
 6404     mem    : S3(read);
 6405     D0     : S0;        // big decoder only
 6406     MEM    : S3;        // any mem
 6407 %}
 6408 
 6409 // Integer Store to Memory
 6410 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6411 %{
 6412     single_instruction;
 6413     mem    : S3(read);
 6414     src    : S5(read);
 6415     D0     : S0;        // big decoder only
 6416     ALU    : S4;        // any alu
 6417     MEM    : S3;
 6418 %}
 6419 
 6420 // // Long Store to Memory
 6421 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6422 // %{
 6423 //     instruction_count(2);
 6424 //     mem    : S3(read);
 6425 //     src    : S5(read);
 6426 //     D0     : S0(2);          // big decoder only; twice
 6427 //     ALU    : S4(2);     // any 2 alus
 6428 //     MEM    : S3(2);  // Both mems
 6429 // %}
 6430 
 6431 // Integer Store to Memory
 6432 pipe_class ialu_mem_imm(memory mem)
 6433 %{
 6434     single_instruction;
 6435     mem    : S3(read);
 6436     D0     : S0;        // big decoder only
 6437     ALU    : S4;        // any alu
 6438     MEM    : S3;
 6439 %}
 6440 
 6441 // Integer ALU0 reg-reg operation
 6442 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6443 %{
 6444     single_instruction;
 6445     dst    : S4(write);
 6446     src    : S3(read);
 6447     D0     : S0;        // Big decoder only
 6448     ALU0   : S3;        // only alu0
 6449 %}
 6450 
 6451 // Integer ALU0 reg-mem operation
 6452 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6453 %{
 6454     single_instruction;
 6455     dst    : S5(write);
 6456     mem    : S3(read);
 6457     D0     : S0;        // big decoder only
 6458     ALU0   : S4;        // ALU0 only
 6459     MEM    : S3;        // any mem
 6460 %}
 6461 
 6462 // Integer ALU reg-reg operation
 6463 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6464 %{
 6465     single_instruction;
 6466     cr     : S4(write);
 6467     src1   : S3(read);
 6468     src2   : S3(read);
 6469     DECODE : S0;        // any decoder
 6470     ALU    : S3;        // any alu
 6471 %}
 6472 
 6473 // Integer ALU reg-imm operation
 6474 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6475 %{
 6476     single_instruction;
 6477     cr     : S4(write);
 6478     src1   : S3(read);
 6479     DECODE : S0;        // any decoder
 6480     ALU    : S3;        // any alu
 6481 %}
 6482 
 6483 // Integer ALU reg-mem operation
 6484 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6485 %{
 6486     single_instruction;
 6487     cr     : S4(write);
 6488     src1   : S3(read);
 6489     src2   : S3(read);
 6490     D0     : S0;        // big decoder only
 6491     ALU    : S4;        // any alu
 6492     MEM    : S3;
 6493 %}
 6494 
 6495 // Conditional move reg-reg
 6496 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6497 %{
 6498     instruction_count(4);
 6499     y      : S4(read);
 6500     q      : S3(read);
 6501     p      : S3(read);
 6502     DECODE : S0(4);     // any decoder
 6503 %}
 6504 
 6505 // Conditional move reg-reg
 6506 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6507 %{
 6508     single_instruction;
 6509     dst    : S4(write);
 6510     src    : S3(read);
 6511     cr     : S3(read);
 6512     DECODE : S0;        // any decoder
 6513 %}
 6514 
 6515 // Conditional move reg-mem
 6516 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6517 %{
 6518     single_instruction;
 6519     dst    : S4(write);
 6520     src    : S3(read);
 6521     cr     : S3(read);
 6522     DECODE : S0;        // any decoder
 6523     MEM    : S3;
 6524 %}
 6525 
 6526 // Conditional move reg-reg long
 6527 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6528 %{
 6529     single_instruction;
 6530     dst    : S4(write);
 6531     src    : S3(read);
 6532     cr     : S3(read);
 6533     DECODE : S0(2);     // any 2 decoders
 6534 %}
 6535 
 6536 // Float reg-reg operation
 6537 pipe_class fpu_reg(regD dst)
 6538 %{
 6539     instruction_count(2);
 6540     dst    : S3(read);
 6541     DECODE : S0(2);     // any 2 decoders
 6542     FPU    : S3;
 6543 %}
 6544 
 6545 // Float reg-reg operation
 6546 pipe_class fpu_reg_reg(regD dst, regD src)
 6547 %{
 6548     instruction_count(2);
 6549     dst    : S4(write);
 6550     src    : S3(read);
 6551     DECODE : S0(2);     // any 2 decoders
 6552     FPU    : S3;
 6553 %}
 6554 
 6555 // Float reg-reg operation
 6556 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6557 %{
 6558     instruction_count(3);
 6559     dst    : S4(write);
 6560     src1   : S3(read);
 6561     src2   : S3(read);
 6562     DECODE : S0(3);     // any 3 decoders
 6563     FPU    : S3(2);
 6564 %}
 6565 
 6566 // Float reg-reg operation
 6567 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6568 %{
 6569     instruction_count(4);
 6570     dst    : S4(write);
 6571     src1   : S3(read);
 6572     src2   : S3(read);
 6573     src3   : S3(read);
 6574     DECODE : S0(4);     // any 3 decoders
 6575     FPU    : S3(2);
 6576 %}
 6577 
 6578 // Float reg-reg operation
 6579 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6580 %{
 6581     instruction_count(4);
 6582     dst    : S4(write);
 6583     src1   : S3(read);
 6584     src2   : S3(read);
 6585     src3   : S3(read);
 6586     DECODE : S1(3);     // any 3 decoders
 6587     D0     : S0;        // Big decoder only
 6588     FPU    : S3(2);
 6589     MEM    : S3;
 6590 %}
 6591 
 6592 // Float reg-mem operation
 6593 pipe_class fpu_reg_mem(regD dst, memory mem)
 6594 %{
 6595     instruction_count(2);
 6596     dst    : S5(write);
 6597     mem    : S3(read);
 6598     D0     : S0;        // big decoder only
 6599     DECODE : S1;        // any decoder for FPU POP
 6600     FPU    : S4;
 6601     MEM    : S3;        // any mem
 6602 %}
 6603 
 6604 // Float reg-mem operation
 6605 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6606 %{
 6607     instruction_count(3);
 6608     dst    : S5(write);
 6609     src1   : S3(read);
 6610     mem    : S3(read);
 6611     D0     : S0;        // big decoder only
 6612     DECODE : S1(2);     // any decoder for FPU POP
 6613     FPU    : S4;
 6614     MEM    : S3;        // any mem
 6615 %}
 6616 
 6617 // Float mem-reg operation
 6618 pipe_class fpu_mem_reg(memory mem, regD src)
 6619 %{
 6620     instruction_count(2);
 6621     src    : S5(read);
 6622     mem    : S3(read);
 6623     DECODE : S0;        // any decoder for FPU PUSH
 6624     D0     : S1;        // big decoder only
 6625     FPU    : S4;
 6626     MEM    : S3;        // any mem
 6627 %}
 6628 
 6629 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6630 %{
 6631     instruction_count(3);
 6632     src1   : S3(read);
 6633     src2   : S3(read);
 6634     mem    : S3(read);
 6635     DECODE : S0(2);     // any decoder for FPU PUSH
 6636     D0     : S1;        // big decoder only
 6637     FPU    : S4;
 6638     MEM    : S3;        // any mem
 6639 %}
 6640 
 6641 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6642 %{
 6643     instruction_count(3);
 6644     src1   : S3(read);
 6645     src2   : S3(read);
 6646     mem    : S4(read);
 6647     DECODE : S0;        // any decoder for FPU PUSH
 6648     D0     : S0(2);     // big decoder only
 6649     FPU    : S4;
 6650     MEM    : S3(2);     // any mem
 6651 %}
 6652 
 6653 pipe_class fpu_mem_mem(memory dst, memory src1)
 6654 %{
 6655     instruction_count(2);
 6656     src1   : S3(read);
 6657     dst    : S4(read);
 6658     D0     : S0(2);     // big decoder only
 6659     MEM    : S3(2);     // any mem
 6660 %}
 6661 
 6662 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6663 %{
 6664     instruction_count(3);
 6665     src1   : S3(read);
 6666     src2   : S3(read);
 6667     dst    : S4(read);
 6668     D0     : S0(3);     // big decoder only
 6669     FPU    : S4;
 6670     MEM    : S3(3);     // any mem
 6671 %}
 6672 
 6673 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6674 %{
 6675     instruction_count(3);
 6676     src1   : S4(read);
 6677     mem    : S4(read);
 6678     DECODE : S0;        // any decoder for FPU PUSH
 6679     D0     : S0(2);     // big decoder only
 6680     FPU    : S4;
 6681     MEM    : S3(2);     // any mem
 6682 %}
 6683 
 6684 // Float load constant
 6685 pipe_class fpu_reg_con(regD dst)
 6686 %{
 6687     instruction_count(2);
 6688     dst    : S5(write);
 6689     D0     : S0;        // big decoder only for the load
 6690     DECODE : S1;        // any decoder for FPU POP
 6691     FPU    : S4;
 6692     MEM    : S3;        // any mem
 6693 %}
 6694 
 6695 // Float load constant
 6696 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6697 %{
 6698     instruction_count(3);
 6699     dst    : S5(write);
 6700     src    : S3(read);
 6701     D0     : S0;        // big decoder only for the load
 6702     DECODE : S1(2);     // any decoder for FPU POP
 6703     FPU    : S4;
 6704     MEM    : S3;        // any mem
 6705 %}
 6706 
 6707 // UnConditional branch
 6708 pipe_class pipe_jmp(label labl)
 6709 %{
 6710     single_instruction;
 6711     BR   : S3;
 6712 %}
 6713 
 6714 // Conditional branch
 6715 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6716 %{
 6717     single_instruction;
 6718     cr    : S1(read);
 6719     BR    : S3;
 6720 %}
 6721 
 6722 // Allocation idiom
 6723 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6724 %{
 6725     instruction_count(1); force_serialization;
 6726     fixed_latency(6);
 6727     heap_ptr : S3(read);
 6728     DECODE   : S0(3);
 6729     D0       : S2;
 6730     MEM      : S3;
 6731     ALU      : S3(2);
 6732     dst      : S5(write);
 6733     BR       : S5;
 6734 %}
 6735 
 6736 // Generic big/slow expanded idiom
 6737 pipe_class pipe_slow()
 6738 %{
 6739     instruction_count(10); multiple_bundles; force_serialization;
 6740     fixed_latency(100);
 6741     D0  : S0(2);
 6742     MEM : S3(2);
 6743 %}
 6744 
 6745 // The real do-nothing guy
 6746 pipe_class empty()
 6747 %{
 6748     instruction_count(0);
 6749 %}
 6750 
 6751 // Define the class for the Nop node
 6752 define
 6753 %{
 6754    MachNop = empty;
 6755 %}
 6756 
 6757 %}
 6758 
 6759 //----------INSTRUCTIONS-------------------------------------------------------
 6760 //
 6761 // match      -- States which machine-independent subtree may be replaced
 6762 //               by this instruction.
 6763 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6764 //               selection to identify a minimum cost tree of machine
 6765 //               instructions that matches a tree of machine-independent
 6766 //               instructions.
 6767 // format     -- A string providing the disassembly for this instruction.
 6768 //               The value of an instruction's operand may be inserted
 6769 //               by referring to it with a '$' prefix.
 6770 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6771 //               to within an encode class as $primary, $secondary, and $tertiary
 6772 //               rrspectively.  The primary opcode is commonly used to
 6773 //               indicate the type of machine instruction, while secondary
 6774 //               and tertiary are often used for prefix options or addressing
 6775 //               modes.
 6776 // ins_encode -- A list of encode classes with parameters. The encode class
 6777 //               name must have been defined in an 'enc_class' specification
 6778 //               in the encode section of the architecture description.
 6779 
 6780 // ============================================================================
 6781 
 6782 instruct ShouldNotReachHere() %{
 6783   match(Halt);
 6784   format %{ "stop\t# ShouldNotReachHere" %}
 6785   ins_encode %{
 6786     if (is_reachable()) {
 6787       const char* str = __ code_string(_halt_reason);
 6788       __ stop(str);
 6789     }
 6790   %}
 6791   ins_pipe(pipe_slow);
 6792 %}
 6793 
 6794 // ============================================================================
 6795 
 6796 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6797 // Load Float
 6798 instruct MoveF2VL(vlRegF dst, regF src) %{
 6799   match(Set dst src);
 6800   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6801   ins_encode %{
 6802     ShouldNotReachHere();
 6803   %}
 6804   ins_pipe( fpu_reg_reg );
 6805 %}
 6806 
 6807 // Load Float
 6808 instruct MoveF2LEG(legRegF dst, regF src) %{
 6809   match(Set dst src);
 6810   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6811   ins_encode %{
 6812     ShouldNotReachHere();
 6813   %}
 6814   ins_pipe( fpu_reg_reg );
 6815 %}
 6816 
 6817 // Load Float
 6818 instruct MoveVL2F(regF dst, vlRegF src) %{
 6819   match(Set dst src);
 6820   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6821   ins_encode %{
 6822     ShouldNotReachHere();
 6823   %}
 6824   ins_pipe( fpu_reg_reg );
 6825 %}
 6826 
 6827 // Load Float
 6828 instruct MoveLEG2F(regF dst, legRegF src) %{
 6829   match(Set dst src);
 6830   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6831   ins_encode %{
 6832     ShouldNotReachHere();
 6833   %}
 6834   ins_pipe( fpu_reg_reg );
 6835 %}
 6836 
 6837 // Load Double
 6838 instruct MoveD2VL(vlRegD dst, regD src) %{
 6839   match(Set dst src);
 6840   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6841   ins_encode %{
 6842     ShouldNotReachHere();
 6843   %}
 6844   ins_pipe( fpu_reg_reg );
 6845 %}
 6846 
 6847 // Load Double
 6848 instruct MoveD2LEG(legRegD dst, regD src) %{
 6849   match(Set dst src);
 6850   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6851   ins_encode %{
 6852     ShouldNotReachHere();
 6853   %}
 6854   ins_pipe( fpu_reg_reg );
 6855 %}
 6856 
 6857 // Load Double
 6858 instruct MoveVL2D(regD dst, vlRegD src) %{
 6859   match(Set dst src);
 6860   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6861   ins_encode %{
 6862     ShouldNotReachHere();
 6863   %}
 6864   ins_pipe( fpu_reg_reg );
 6865 %}
 6866 
 6867 // Load Double
 6868 instruct MoveLEG2D(regD dst, legRegD src) %{
 6869   match(Set dst src);
 6870   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6871   ins_encode %{
 6872     ShouldNotReachHere();
 6873   %}
 6874   ins_pipe( fpu_reg_reg );
 6875 %}
 6876 
 6877 //----------Load/Store/Move Instructions---------------------------------------
 6878 //----------Load Instructions--------------------------------------------------
 6879 
 6880 // Load Byte (8 bit signed)
 6881 instruct loadB(rRegI dst, memory mem)
 6882 %{
 6883   match(Set dst (LoadB mem));
 6884 
 6885   ins_cost(125);
 6886   format %{ "movsbl  $dst, $mem\t# byte" %}
 6887 
 6888   ins_encode %{
 6889     __ movsbl($dst$$Register, $mem$$Address);
 6890   %}
 6891 
 6892   ins_pipe(ialu_reg_mem);
 6893 %}
 6894 
 6895 // Load Byte (8 bit signed) into Long Register
 6896 instruct loadB2L(rRegL dst, memory mem)
 6897 %{
 6898   match(Set dst (ConvI2L (LoadB mem)));
 6899 
 6900   ins_cost(125);
 6901   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6902 
 6903   ins_encode %{
 6904     __ movsbq($dst$$Register, $mem$$Address);
 6905   %}
 6906 
 6907   ins_pipe(ialu_reg_mem);
 6908 %}
 6909 
 6910 // Load Unsigned Byte (8 bit UNsigned)
 6911 instruct loadUB(rRegI dst, memory mem)
 6912 %{
 6913   match(Set dst (LoadUB mem));
 6914 
 6915   ins_cost(125);
 6916   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6917 
 6918   ins_encode %{
 6919     __ movzbl($dst$$Register, $mem$$Address);
 6920   %}
 6921 
 6922   ins_pipe(ialu_reg_mem);
 6923 %}
 6924 
 6925 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6926 instruct loadUB2L(rRegL dst, memory mem)
 6927 %{
 6928   match(Set dst (ConvI2L (LoadUB mem)));
 6929 
 6930   ins_cost(125);
 6931   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6932 
 6933   ins_encode %{
 6934     __ movzbq($dst$$Register, $mem$$Address);
 6935   %}
 6936 
 6937   ins_pipe(ialu_reg_mem);
 6938 %}
 6939 
 6940 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6941 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6942   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6943   effect(KILL cr);
 6944 
 6945   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6946             "andl    $dst, right_n_bits($mask, 8)" %}
 6947   ins_encode %{
 6948     Register Rdst = $dst$$Register;
 6949     __ movzbq(Rdst, $mem$$Address);
 6950     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6951   %}
 6952   ins_pipe(ialu_reg_mem);
 6953 %}
 6954 
 6955 // Load Short (16 bit signed)
 6956 instruct loadS(rRegI dst, memory mem)
 6957 %{
 6958   match(Set dst (LoadS mem));
 6959 
 6960   ins_cost(125);
 6961   format %{ "movswl $dst, $mem\t# short" %}
 6962 
 6963   ins_encode %{
 6964     __ movswl($dst$$Register, $mem$$Address);
 6965   %}
 6966 
 6967   ins_pipe(ialu_reg_mem);
 6968 %}
 6969 
 6970 // Load Short (16 bit signed) to Byte (8 bit signed)
 6971 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6972   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6973 
 6974   ins_cost(125);
 6975   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6976   ins_encode %{
 6977     __ movsbl($dst$$Register, $mem$$Address);
 6978   %}
 6979   ins_pipe(ialu_reg_mem);
 6980 %}
 6981 
 6982 // Load Short (16 bit signed) into Long Register
 6983 instruct loadS2L(rRegL dst, memory mem)
 6984 %{
 6985   match(Set dst (ConvI2L (LoadS mem)));
 6986 
 6987   ins_cost(125);
 6988   format %{ "movswq $dst, $mem\t# short -> long" %}
 6989 
 6990   ins_encode %{
 6991     __ movswq($dst$$Register, $mem$$Address);
 6992   %}
 6993 
 6994   ins_pipe(ialu_reg_mem);
 6995 %}
 6996 
 6997 // Load Unsigned Short/Char (16 bit UNsigned)
 6998 instruct loadUS(rRegI dst, memory mem)
 6999 %{
 7000   match(Set dst (LoadUS mem));
 7001 
 7002   ins_cost(125);
 7003   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7004 
 7005   ins_encode %{
 7006     __ movzwl($dst$$Register, $mem$$Address);
 7007   %}
 7008 
 7009   ins_pipe(ialu_reg_mem);
 7010 %}
 7011 
 7012 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7013 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7014   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7015 
 7016   ins_cost(125);
 7017   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7018   ins_encode %{
 7019     __ movsbl($dst$$Register, $mem$$Address);
 7020   %}
 7021   ins_pipe(ialu_reg_mem);
 7022 %}
 7023 
 7024 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7025 instruct loadUS2L(rRegL dst, memory mem)
 7026 %{
 7027   match(Set dst (ConvI2L (LoadUS mem)));
 7028 
 7029   ins_cost(125);
 7030   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7031 
 7032   ins_encode %{
 7033     __ movzwq($dst$$Register, $mem$$Address);
 7034   %}
 7035 
 7036   ins_pipe(ialu_reg_mem);
 7037 %}
 7038 
 7039 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7040 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7041   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7042 
 7043   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7044   ins_encode %{
 7045     __ movzbq($dst$$Register, $mem$$Address);
 7046   %}
 7047   ins_pipe(ialu_reg_mem);
 7048 %}
 7049 
 7050 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7051 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7052   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7053   effect(KILL cr);
 7054 
 7055   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7056             "andl    $dst, right_n_bits($mask, 16)" %}
 7057   ins_encode %{
 7058     Register Rdst = $dst$$Register;
 7059     __ movzwq(Rdst, $mem$$Address);
 7060     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7061   %}
 7062   ins_pipe(ialu_reg_mem);
 7063 %}
 7064 
 7065 // Load Integer
 7066 instruct loadI(rRegI dst, memory mem)
 7067 %{
 7068   match(Set dst (LoadI mem));
 7069 
 7070   ins_cost(125);
 7071   format %{ "movl    $dst, $mem\t# int" %}
 7072 
 7073   ins_encode %{
 7074     __ movl($dst$$Register, $mem$$Address);
 7075   %}
 7076 
 7077   ins_pipe(ialu_reg_mem);
 7078 %}
 7079 
 7080 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7081 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7082   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7083 
 7084   ins_cost(125);
 7085   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7086   ins_encode %{
 7087     __ movsbl($dst$$Register, $mem$$Address);
 7088   %}
 7089   ins_pipe(ialu_reg_mem);
 7090 %}
 7091 
 7092 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7093 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7094   match(Set dst (AndI (LoadI mem) mask));
 7095 
 7096   ins_cost(125);
 7097   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7098   ins_encode %{
 7099     __ movzbl($dst$$Register, $mem$$Address);
 7100   %}
 7101   ins_pipe(ialu_reg_mem);
 7102 %}
 7103 
 7104 // Load Integer (32 bit signed) to Short (16 bit signed)
 7105 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7106   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7107 
 7108   ins_cost(125);
 7109   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7110   ins_encode %{
 7111     __ movswl($dst$$Register, $mem$$Address);
 7112   %}
 7113   ins_pipe(ialu_reg_mem);
 7114 %}
 7115 
 7116 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7117 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7118   match(Set dst (AndI (LoadI mem) mask));
 7119 
 7120   ins_cost(125);
 7121   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7122   ins_encode %{
 7123     __ movzwl($dst$$Register, $mem$$Address);
 7124   %}
 7125   ins_pipe(ialu_reg_mem);
 7126 %}
 7127 
 7128 // Load Integer into Long Register
 7129 instruct loadI2L(rRegL dst, memory mem)
 7130 %{
 7131   match(Set dst (ConvI2L (LoadI mem)));
 7132 
 7133   ins_cost(125);
 7134   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7135 
 7136   ins_encode %{
 7137     __ movslq($dst$$Register, $mem$$Address);
 7138   %}
 7139 
 7140   ins_pipe(ialu_reg_mem);
 7141 %}
 7142 
 7143 // Load Integer with mask 0xFF into Long Register
 7144 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7145   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7146 
 7147   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7148   ins_encode %{
 7149     __ movzbq($dst$$Register, $mem$$Address);
 7150   %}
 7151   ins_pipe(ialu_reg_mem);
 7152 %}
 7153 
 7154 // Load Integer with mask 0xFFFF into Long Register
 7155 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7156   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7157 
 7158   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7159   ins_encode %{
 7160     __ movzwq($dst$$Register, $mem$$Address);
 7161   %}
 7162   ins_pipe(ialu_reg_mem);
 7163 %}
 7164 
 7165 // Load Integer with a 31-bit mask into Long Register
 7166 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7167   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7168   effect(KILL cr);
 7169 
 7170   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7171             "andl    $dst, $mask" %}
 7172   ins_encode %{
 7173     Register Rdst = $dst$$Register;
 7174     __ movl(Rdst, $mem$$Address);
 7175     __ andl(Rdst, $mask$$constant);
 7176   %}
 7177   ins_pipe(ialu_reg_mem);
 7178 %}
 7179 
 7180 // Load Unsigned Integer into Long Register
 7181 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7182 %{
 7183   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7184 
 7185   ins_cost(125);
 7186   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7187 
 7188   ins_encode %{
 7189     __ movl($dst$$Register, $mem$$Address);
 7190   %}
 7191 
 7192   ins_pipe(ialu_reg_mem);
 7193 %}
 7194 
 7195 // Load Long
 7196 instruct loadL(rRegL dst, memory mem)
 7197 %{
 7198   match(Set dst (LoadL mem));
 7199 
 7200   ins_cost(125);
 7201   format %{ "movq    $dst, $mem\t# long" %}
 7202 
 7203   ins_encode %{
 7204     __ movq($dst$$Register, $mem$$Address);
 7205   %}
 7206 
 7207   ins_pipe(ialu_reg_mem); // XXX
 7208 %}
 7209 
 7210 // Load Range
 7211 instruct loadRange(rRegI dst, memory mem)
 7212 %{
 7213   match(Set dst (LoadRange mem));
 7214 
 7215   ins_cost(125); // XXX
 7216   format %{ "movl    $dst, $mem\t# range" %}
 7217   ins_encode %{
 7218     __ movl($dst$$Register, $mem$$Address);
 7219   %}
 7220   ins_pipe(ialu_reg_mem);
 7221 %}
 7222 
 7223 // Load Pointer
 7224 instruct loadP(rRegP dst, memory mem)
 7225 %{
 7226   match(Set dst (LoadP mem));
 7227   predicate(n->as_Load()->barrier_data() == 0);
 7228 
 7229   ins_cost(125); // XXX
 7230   format %{ "movq    $dst, $mem\t# ptr" %}
 7231   ins_encode %{
 7232     __ movq($dst$$Register, $mem$$Address);
 7233   %}
 7234   ins_pipe(ialu_reg_mem); // XXX
 7235 %}
 7236 
 7237 // Load Compressed Pointer
 7238 instruct loadN(rRegN dst, memory mem)
 7239 %{
 7240    predicate(n->as_Load()->barrier_data() == 0);
 7241    match(Set dst (LoadN mem));
 7242 
 7243    ins_cost(125); // XXX
 7244    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7245    ins_encode %{
 7246      __ movl($dst$$Register, $mem$$Address);
 7247    %}
 7248    ins_pipe(ialu_reg_mem); // XXX
 7249 %}
 7250 
 7251 
 7252 // Load Klass Pointer
 7253 instruct loadKlass(rRegP dst, memory mem)
 7254 %{
 7255   match(Set dst (LoadKlass mem));
 7256 
 7257   ins_cost(125); // XXX
 7258   format %{ "movq    $dst, $mem\t# class" %}
 7259   ins_encode %{
 7260     __ movq($dst$$Register, $mem$$Address);
 7261   %}
 7262   ins_pipe(ialu_reg_mem); // XXX
 7263 %}
 7264 
 7265 // Load narrow Klass Pointer
 7266 instruct loadNKlass(rRegN dst, memory mem)
 7267 %{
 7268   predicate(!UseCompactObjectHeaders);
 7269   match(Set dst (LoadNKlass mem));
 7270 
 7271   ins_cost(125); // XXX
 7272   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7273   ins_encode %{
 7274     __ movl($dst$$Register, $mem$$Address);
 7275   %}
 7276   ins_pipe(ialu_reg_mem); // XXX
 7277 %}
 7278 
 7279 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7280 %{
 7281   predicate(UseCompactObjectHeaders);
 7282   match(Set dst (LoadNKlass mem));
 7283   effect(KILL cr);
 7284   ins_cost(125);
 7285   format %{
 7286     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7287     "shrl    $dst, markWord::klass_shift"
 7288   %}
 7289   ins_encode %{
 7290     // The incoming address is pointing into obj-start + Type::klass_offset(). We need to extract
 7291     // obj-start, so that we can load from the object's mark-word instead.
 7292     Register d = $dst$$Register;
 7293     Address  s = ($mem$$Address).plus_disp(-Type::klass_offset());
 7294     if (UseAPX) {
 7295       __ eshrl(d, s, markWord::klass_shift, false);
 7296     } else {
 7297       __ movl(d, s);
 7298       __ shrl(d, markWord::klass_shift);
 7299     }
 7300   %}
 7301   ins_pipe(ialu_reg_mem);
 7302 %}
 7303 
 7304 // Load Float
 7305 instruct loadF(regF dst, memory mem)
 7306 %{
 7307   match(Set dst (LoadF mem));
 7308 
 7309   ins_cost(145); // XXX
 7310   format %{ "movss   $dst, $mem\t# float" %}
 7311   ins_encode %{
 7312     __ movflt($dst$$XMMRegister, $mem$$Address);
 7313   %}
 7314   ins_pipe(pipe_slow); // XXX
 7315 %}
 7316 
 7317 // Load Double
 7318 instruct loadD_partial(regD dst, memory mem)
 7319 %{
 7320   predicate(!UseXmmLoadAndClearUpper);
 7321   match(Set dst (LoadD mem));
 7322 
 7323   ins_cost(145); // XXX
 7324   format %{ "movlpd  $dst, $mem\t# double" %}
 7325   ins_encode %{
 7326     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7327   %}
 7328   ins_pipe(pipe_slow); // XXX
 7329 %}
 7330 
 7331 instruct loadD(regD dst, memory mem)
 7332 %{
 7333   predicate(UseXmmLoadAndClearUpper);
 7334   match(Set dst (LoadD mem));
 7335 
 7336   ins_cost(145); // XXX
 7337   format %{ "movsd   $dst, $mem\t# double" %}
 7338   ins_encode %{
 7339     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7340   %}
 7341   ins_pipe(pipe_slow); // XXX
 7342 %}
 7343 
 7344 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7345 %{
 7346   match(Set dst con);
 7347 
 7348   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7349 
 7350   ins_encode %{
 7351     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7352   %}
 7353 
 7354   ins_pipe(ialu_reg_fat);
 7355 %}
 7356 
 7357 // min = java.lang.Math.min(float a, float b)
 7358 // max = java.lang.Math.max(float a, float b)
 7359 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
 7360 %{
 7361   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7362   match(Set dst (MaxF a b));
 7363   match(Set dst (MinF a b));
 7364 
 7365   format %{ "minmaxF $dst, $a, $b" %}
 7366   ins_encode %{
 7367     int opcode = this->ideal_Opcode();
 7368     __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7369   %}
 7370   ins_pipe( pipe_slow );
 7371 %}
 7372 
 7373 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, rRegI rtmp, rFlagsReg cr)
 7374 %{
 7375   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7376   match(Set dst (MaxF a b));
 7377   match(Set dst (MinF a b));
 7378   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7379 
 7380   format %{ "minmaxF_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7381   ins_encode %{
 7382     int opcode = this->ideal_Opcode();
 7383     bool min = (opcode == Op_MinF) ? true : false;
 7384     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7385                     min, fp_prec_flt /*pt*/);
 7386   %}
 7387   ins_pipe( pipe_slow );
 7388 %}
 7389 
 7390 // min = java.lang.Math.min(float a, float b)
 7391 // max = java.lang.Math.max(float a, float b)
 7392 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
 7393 %{
 7394   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7395   match(Set dst (MaxF a b));
 7396   match(Set dst (MinF a b));
 7397   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7398 
 7399   format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7400   ins_encode %{
 7401     int opcode = this->ideal_Opcode();
 7402     int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
 7403     __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7404                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7405   %}
 7406   ins_pipe( pipe_slow );
 7407 %}
 7408 
 7409 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, rRegI rtmp, rFlagsReg cr)
 7410 %{
 7411   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7412   match(Set dst (MaxF a b));
 7413   match(Set dst (MinF a b));
 7414   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7415 
 7416   format %{ "minmaxF_reduction $dst, $a, $b \t!using $rtmp as TEMP" %}
 7417   ins_encode %{
 7418     int opcode = this->ideal_Opcode();
 7419     bool min = (opcode == Op_MinF) ? true : false;
 7420     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7421                     min, fp_prec_flt /*pt*/);
 7422   %}
 7423   ins_pipe( pipe_slow );
 7424 %}
 7425 
 7426 // min = java.lang.Math.min(double a, double b)
 7427 // max = java.lang.Math.max(double a, double b)
 7428 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
 7429 %{
 7430   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7431   match(Set dst (MaxD a b));
 7432   match(Set dst (MinD a b));
 7433 
 7434   format %{ "minmaxD $dst, $a, $b" %}
 7435   ins_encode %{
 7436     int opcode = this->ideal_Opcode();
 7437     __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7438   %}
 7439   ins_pipe( pipe_slow );
 7440 %}
 7441 
 7442 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, rRegI rtmp, rFlagsReg cr)
 7443 %{
 7444   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7445   match(Set dst (MaxD a b));
 7446   match(Set dst (MinD a b));
 7447   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7448 
 7449   format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7450   ins_encode %{
 7451     int opcode = this->ideal_Opcode();
 7452     bool min = (opcode == Op_MinD) ? true : false;
 7453     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7454                     min, fp_prec_dbl /*pt*/);
 7455   %}
 7456   ins_pipe( pipe_slow );
 7457 %}
 7458 
 7459 // min = java.lang.Math.min(double a, double b)
 7460 // max = java.lang.Math.max(double a, double b)
 7461 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
 7462 %{
 7463   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7464   match(Set dst (MaxD a b));
 7465   match(Set dst (MinD a b));
 7466   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7467 
 7468   format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7469   ins_encode %{
 7470     int opcode = this->ideal_Opcode();
 7471     int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
 7472     __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7473                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7474   %}
 7475   ins_pipe( pipe_slow );
 7476 %}
 7477 
 7478 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, rRegL rtmp, rFlagsReg cr)
 7479 %{
 7480   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7481   match(Set dst (MaxD a b));
 7482   match(Set dst (MinD a b));
 7483   effect(USE a, USE b, TEMP rtmp, KILL cr);
 7484 
 7485   format %{ "minmaxD_reduction $dst, $a, $b \t! using $rtmp as TEMP" %}
 7486   ins_encode %{
 7487     int opcode = this->ideal_Opcode();
 7488     bool min = (opcode == Op_MinD) ? true : false;
 7489     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $rtmp$$Register,
 7490                     min, fp_prec_dbl /*pt*/);
 7491   %}
 7492   ins_pipe( pipe_slow );
 7493 %}
 7494 
 7495 // Load Effective Address
 7496 instruct leaP8(rRegP dst, indOffset8 mem)
 7497 %{
 7498   match(Set dst mem);
 7499 
 7500   ins_cost(110); // XXX
 7501   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7502   ins_encode %{
 7503     __ leaq($dst$$Register, $mem$$Address);
 7504   %}
 7505   ins_pipe(ialu_reg_reg_fat);
 7506 %}
 7507 
 7508 instruct leaP32(rRegP dst, indOffset32 mem)
 7509 %{
 7510   match(Set dst mem);
 7511 
 7512   ins_cost(110);
 7513   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7514   ins_encode %{
 7515     __ leaq($dst$$Register, $mem$$Address);
 7516   %}
 7517   ins_pipe(ialu_reg_reg_fat);
 7518 %}
 7519 
 7520 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7521 %{
 7522   match(Set dst mem);
 7523 
 7524   ins_cost(110);
 7525   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7526   ins_encode %{
 7527     __ leaq($dst$$Register, $mem$$Address);
 7528   %}
 7529   ins_pipe(ialu_reg_reg_fat);
 7530 %}
 7531 
 7532 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7533 %{
 7534   match(Set dst mem);
 7535 
 7536   ins_cost(110);
 7537   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7538   ins_encode %{
 7539     __ leaq($dst$$Register, $mem$$Address);
 7540   %}
 7541   ins_pipe(ialu_reg_reg_fat);
 7542 %}
 7543 
 7544 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7545 %{
 7546   match(Set dst mem);
 7547 
 7548   ins_cost(110);
 7549   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7550   ins_encode %{
 7551     __ leaq($dst$$Register, $mem$$Address);
 7552   %}
 7553   ins_pipe(ialu_reg_reg_fat);
 7554 %}
 7555 
 7556 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7557 %{
 7558   match(Set dst mem);
 7559 
 7560   ins_cost(110);
 7561   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7562   ins_encode %{
 7563     __ leaq($dst$$Register, $mem$$Address);
 7564   %}
 7565   ins_pipe(ialu_reg_reg_fat);
 7566 %}
 7567 
 7568 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7569 %{
 7570   match(Set dst mem);
 7571 
 7572   ins_cost(110);
 7573   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7574   ins_encode %{
 7575     __ leaq($dst$$Register, $mem$$Address);
 7576   %}
 7577   ins_pipe(ialu_reg_reg_fat);
 7578 %}
 7579 
 7580 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7581 %{
 7582   match(Set dst mem);
 7583 
 7584   ins_cost(110);
 7585   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7586   ins_encode %{
 7587     __ leaq($dst$$Register, $mem$$Address);
 7588   %}
 7589   ins_pipe(ialu_reg_reg_fat);
 7590 %}
 7591 
 7592 // Load Effective Address which uses Narrow (32-bits) oop
 7593 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7594 %{
 7595   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7596   match(Set dst mem);
 7597 
 7598   ins_cost(110);
 7599   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7600   ins_encode %{
 7601     __ leaq($dst$$Register, $mem$$Address);
 7602   %}
 7603   ins_pipe(ialu_reg_reg_fat);
 7604 %}
 7605 
 7606 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7607 %{
 7608   predicate(CompressedOops::shift() == 0);
 7609   match(Set dst mem);
 7610 
 7611   ins_cost(110); // XXX
 7612   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7613   ins_encode %{
 7614     __ leaq($dst$$Register, $mem$$Address);
 7615   %}
 7616   ins_pipe(ialu_reg_reg_fat);
 7617 %}
 7618 
 7619 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7620 %{
 7621   predicate(CompressedOops::shift() == 0);
 7622   match(Set dst mem);
 7623 
 7624   ins_cost(110);
 7625   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7626   ins_encode %{
 7627     __ leaq($dst$$Register, $mem$$Address);
 7628   %}
 7629   ins_pipe(ialu_reg_reg_fat);
 7630 %}
 7631 
 7632 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7633 %{
 7634   predicate(CompressedOops::shift() == 0);
 7635   match(Set dst mem);
 7636 
 7637   ins_cost(110);
 7638   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7639   ins_encode %{
 7640     __ leaq($dst$$Register, $mem$$Address);
 7641   %}
 7642   ins_pipe(ialu_reg_reg_fat);
 7643 %}
 7644 
 7645 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7646 %{
 7647   predicate(CompressedOops::shift() == 0);
 7648   match(Set dst mem);
 7649 
 7650   ins_cost(110);
 7651   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7652   ins_encode %{
 7653     __ leaq($dst$$Register, $mem$$Address);
 7654   %}
 7655   ins_pipe(ialu_reg_reg_fat);
 7656 %}
 7657 
 7658 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7659 %{
 7660   predicate(CompressedOops::shift() == 0);
 7661   match(Set dst mem);
 7662 
 7663   ins_cost(110);
 7664   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7665   ins_encode %{
 7666     __ leaq($dst$$Register, $mem$$Address);
 7667   %}
 7668   ins_pipe(ialu_reg_reg_fat);
 7669 %}
 7670 
 7671 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7672 %{
 7673   predicate(CompressedOops::shift() == 0);
 7674   match(Set dst mem);
 7675 
 7676   ins_cost(110);
 7677   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7678   ins_encode %{
 7679     __ leaq($dst$$Register, $mem$$Address);
 7680   %}
 7681   ins_pipe(ialu_reg_reg_fat);
 7682 %}
 7683 
 7684 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7685 %{
 7686   predicate(CompressedOops::shift() == 0);
 7687   match(Set dst mem);
 7688 
 7689   ins_cost(110);
 7690   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7691   ins_encode %{
 7692     __ leaq($dst$$Register, $mem$$Address);
 7693   %}
 7694   ins_pipe(ialu_reg_reg_fat);
 7695 %}
 7696 
 7697 instruct loadConI(rRegI dst, immI src)
 7698 %{
 7699   match(Set dst src);
 7700 
 7701   format %{ "movl    $dst, $src\t# int" %}
 7702   ins_encode %{
 7703     __ movl($dst$$Register, $src$$constant);
 7704   %}
 7705   ins_pipe(ialu_reg_fat); // XXX
 7706 %}
 7707 
 7708 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7709 %{
 7710   match(Set dst src);
 7711   effect(KILL cr);
 7712 
 7713   ins_cost(50);
 7714   format %{ "xorl    $dst, $dst\t# int" %}
 7715   ins_encode %{
 7716     __ xorl($dst$$Register, $dst$$Register);
 7717   %}
 7718   ins_pipe(ialu_reg);
 7719 %}
 7720 
 7721 instruct loadConL(rRegL dst, immL src)
 7722 %{
 7723   match(Set dst src);
 7724 
 7725   ins_cost(150);
 7726   format %{ "movq    $dst, $src\t# long" %}
 7727   ins_encode %{
 7728     __ mov64($dst$$Register, $src$$constant);
 7729   %}
 7730   ins_pipe(ialu_reg);
 7731 %}
 7732 
 7733 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7734 %{
 7735   match(Set dst src);
 7736   effect(KILL cr);
 7737 
 7738   ins_cost(50);
 7739   format %{ "xorl    $dst, $dst\t# long" %}
 7740   ins_encode %{
 7741     __ xorl($dst$$Register, $dst$$Register);
 7742   %}
 7743   ins_pipe(ialu_reg); // XXX
 7744 %}
 7745 
 7746 instruct loadConUL32(rRegL dst, immUL32 src)
 7747 %{
 7748   match(Set dst src);
 7749 
 7750   ins_cost(60);
 7751   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7752   ins_encode %{
 7753     __ movl($dst$$Register, $src$$constant);
 7754   %}
 7755   ins_pipe(ialu_reg);
 7756 %}
 7757 
 7758 instruct loadConL32(rRegL dst, immL32 src)
 7759 %{
 7760   match(Set dst src);
 7761 
 7762   ins_cost(70);
 7763   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7764   ins_encode %{
 7765     __ movq($dst$$Register, $src$$constant);
 7766   %}
 7767   ins_pipe(ialu_reg);
 7768 %}
 7769 
 7770 instruct loadConP(rRegP dst, immP con) %{
 7771   match(Set dst con);
 7772 
 7773   format %{ "movq    $dst, $con\t# ptr" %}
 7774   ins_encode %{
 7775     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7776   %}
 7777   ins_pipe(ialu_reg_fat); // XXX
 7778 %}
 7779 
 7780 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7781 %{
 7782   match(Set dst src);
 7783   effect(KILL cr);
 7784 
 7785   ins_cost(50);
 7786   format %{ "xorl    $dst, $dst\t# ptr" %}
 7787   ins_encode %{
 7788     __ xorl($dst$$Register, $dst$$Register);
 7789   %}
 7790   ins_pipe(ialu_reg);
 7791 %}
 7792 
 7793 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7794 %{
 7795   match(Set dst src);
 7796   effect(KILL cr);
 7797 
 7798   ins_cost(60);
 7799   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7800   ins_encode %{
 7801     __ movl($dst$$Register, $src$$constant);
 7802   %}
 7803   ins_pipe(ialu_reg);
 7804 %}
 7805 
 7806 instruct loadConF(regF dst, immF con) %{
 7807   match(Set dst con);
 7808   ins_cost(125);
 7809   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7810   ins_encode %{
 7811     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7812   %}
 7813   ins_pipe(pipe_slow);
 7814 %}
 7815 
 7816 instruct loadConH(regF dst, immH con) %{
 7817   match(Set dst con);
 7818   ins_cost(125);
 7819   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7820   ins_encode %{
 7821     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7822   %}
 7823   ins_pipe(pipe_slow);
 7824 %}
 7825 
 7826 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7827   match(Set dst src);
 7828   effect(KILL cr);
 7829   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7830   ins_encode %{
 7831     __ xorq($dst$$Register, $dst$$Register);
 7832   %}
 7833   ins_pipe(ialu_reg);
 7834 %}
 7835 
 7836 instruct loadConN(rRegN dst, immN src) %{
 7837   match(Set dst src);
 7838 
 7839   ins_cost(125);
 7840   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7841   ins_encode %{
 7842     address con = (address)$src$$constant;
 7843     if (con == nullptr) {
 7844       ShouldNotReachHere();
 7845     } else {
 7846       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7847     }
 7848   %}
 7849   ins_pipe(ialu_reg_fat); // XXX
 7850 %}
 7851 
 7852 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7853   match(Set dst src);
 7854 
 7855   ins_cost(125);
 7856   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7857   ins_encode %{
 7858     address con = (address)$src$$constant;
 7859     if (con == nullptr) {
 7860       ShouldNotReachHere();
 7861     } else {
 7862       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7863     }
 7864   %}
 7865   ins_pipe(ialu_reg_fat); // XXX
 7866 %}
 7867 
 7868 instruct loadConF0(regF dst, immF0 src)
 7869 %{
 7870   match(Set dst src);
 7871   ins_cost(100);
 7872 
 7873   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7874   ins_encode %{
 7875     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7876   %}
 7877   ins_pipe(pipe_slow);
 7878 %}
 7879 
 7880 // Use the same format since predicate() can not be used here.
 7881 instruct loadConD(regD dst, immD con) %{
 7882   match(Set dst con);
 7883   ins_cost(125);
 7884   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7885   ins_encode %{
 7886     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7887   %}
 7888   ins_pipe(pipe_slow);
 7889 %}
 7890 
 7891 instruct loadConD0(regD dst, immD0 src)
 7892 %{
 7893   match(Set dst src);
 7894   ins_cost(100);
 7895 
 7896   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7897   ins_encode %{
 7898     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7899   %}
 7900   ins_pipe(pipe_slow);
 7901 %}
 7902 
 7903 instruct loadSSI(rRegI dst, stackSlotI src)
 7904 %{
 7905   match(Set dst src);
 7906 
 7907   ins_cost(125);
 7908   format %{ "movl    $dst, $src\t# int stk" %}
 7909   ins_encode %{
 7910     __ movl($dst$$Register, $src$$Address);
 7911   %}
 7912   ins_pipe(ialu_reg_mem);
 7913 %}
 7914 
 7915 instruct loadSSL(rRegL dst, stackSlotL src)
 7916 %{
 7917   match(Set dst src);
 7918 
 7919   ins_cost(125);
 7920   format %{ "movq    $dst, $src\t# long stk" %}
 7921   ins_encode %{
 7922     __ movq($dst$$Register, $src$$Address);
 7923   %}
 7924   ins_pipe(ialu_reg_mem);
 7925 %}
 7926 
 7927 instruct loadSSP(rRegP dst, stackSlotP src)
 7928 %{
 7929   match(Set dst src);
 7930 
 7931   ins_cost(125);
 7932   format %{ "movq    $dst, $src\t# ptr stk" %}
 7933   ins_encode %{
 7934     __ movq($dst$$Register, $src$$Address);
 7935   %}
 7936   ins_pipe(ialu_reg_mem);
 7937 %}
 7938 
 7939 instruct loadSSF(regF dst, stackSlotF src)
 7940 %{
 7941   match(Set dst src);
 7942 
 7943   ins_cost(125);
 7944   format %{ "movss   $dst, $src\t# float stk" %}
 7945   ins_encode %{
 7946     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7947   %}
 7948   ins_pipe(pipe_slow); // XXX
 7949 %}
 7950 
 7951 // Use the same format since predicate() can not be used here.
 7952 instruct loadSSD(regD dst, stackSlotD src)
 7953 %{
 7954   match(Set dst src);
 7955 
 7956   ins_cost(125);
 7957   format %{ "movsd   $dst, $src\t# double stk" %}
 7958   ins_encode  %{
 7959     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7960   %}
 7961   ins_pipe(pipe_slow); // XXX
 7962 %}
 7963 
 7964 // Prefetch instructions for allocation.
 7965 // Must be safe to execute with invalid address (cannot fault).
 7966 
 7967 instruct prefetchAlloc( memory mem ) %{
 7968   predicate(AllocatePrefetchInstr==3);
 7969   match(PrefetchAllocation mem);
 7970   ins_cost(125);
 7971 
 7972   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7973   ins_encode %{
 7974     __ prefetchw($mem$$Address);
 7975   %}
 7976   ins_pipe(ialu_mem);
 7977 %}
 7978 
 7979 instruct prefetchAllocNTA( memory mem ) %{
 7980   predicate(AllocatePrefetchInstr==0);
 7981   match(PrefetchAllocation mem);
 7982   ins_cost(125);
 7983 
 7984   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7985   ins_encode %{
 7986     __ prefetchnta($mem$$Address);
 7987   %}
 7988   ins_pipe(ialu_mem);
 7989 %}
 7990 
 7991 instruct prefetchAllocT0( memory mem ) %{
 7992   predicate(AllocatePrefetchInstr==1);
 7993   match(PrefetchAllocation mem);
 7994   ins_cost(125);
 7995 
 7996   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 7997   ins_encode %{
 7998     __ prefetcht0($mem$$Address);
 7999   %}
 8000   ins_pipe(ialu_mem);
 8001 %}
 8002 
 8003 instruct prefetchAllocT2( memory mem ) %{
 8004   predicate(AllocatePrefetchInstr==2);
 8005   match(PrefetchAllocation mem);
 8006   ins_cost(125);
 8007 
 8008   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8009   ins_encode %{
 8010     __ prefetcht2($mem$$Address);
 8011   %}
 8012   ins_pipe(ialu_mem);
 8013 %}
 8014 
 8015 //----------Store Instructions-------------------------------------------------
 8016 
 8017 // Store Byte
 8018 instruct storeB(memory mem, rRegI src)
 8019 %{
 8020   match(Set mem (StoreB mem src));
 8021 
 8022   ins_cost(125); // XXX
 8023   format %{ "movb    $mem, $src\t# byte" %}
 8024   ins_encode %{
 8025     __ movb($mem$$Address, $src$$Register);
 8026   %}
 8027   ins_pipe(ialu_mem_reg);
 8028 %}
 8029 
 8030 // Store Char/Short
 8031 instruct storeC(memory mem, rRegI src)
 8032 %{
 8033   match(Set mem (StoreC mem src));
 8034 
 8035   ins_cost(125); // XXX
 8036   format %{ "movw    $mem, $src\t# char/short" %}
 8037   ins_encode %{
 8038     __ movw($mem$$Address, $src$$Register);
 8039   %}
 8040   ins_pipe(ialu_mem_reg);
 8041 %}
 8042 
 8043 // Store Integer
 8044 instruct storeI(memory mem, rRegI src)
 8045 %{
 8046   match(Set mem (StoreI mem src));
 8047 
 8048   ins_cost(125); // XXX
 8049   format %{ "movl    $mem, $src\t# int" %}
 8050   ins_encode %{
 8051     __ movl($mem$$Address, $src$$Register);
 8052   %}
 8053   ins_pipe(ialu_mem_reg);
 8054 %}
 8055 
 8056 // Store Long
 8057 instruct storeL(memory mem, rRegL src)
 8058 %{
 8059   match(Set mem (StoreL mem src));
 8060 
 8061   ins_cost(125); // XXX
 8062   format %{ "movq    $mem, $src\t# long" %}
 8063   ins_encode %{
 8064     __ movq($mem$$Address, $src$$Register);
 8065   %}
 8066   ins_pipe(ialu_mem_reg); // XXX
 8067 %}
 8068 
 8069 // Store Pointer
 8070 instruct storeP(memory mem, any_RegP src)
 8071 %{
 8072   predicate(n->as_Store()->barrier_data() == 0);
 8073   match(Set mem (StoreP mem src));
 8074 
 8075   ins_cost(125); // XXX
 8076   format %{ "movq    $mem, $src\t# ptr" %}
 8077   ins_encode %{
 8078     __ movq($mem$$Address, $src$$Register);
 8079   %}
 8080   ins_pipe(ialu_mem_reg);
 8081 %}
 8082 
 8083 instruct storeImmP0(memory mem, immP0 zero)
 8084 %{
 8085   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8086   match(Set mem (StoreP mem zero));
 8087 
 8088   ins_cost(125); // XXX
 8089   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8090   ins_encode %{
 8091     __ movq($mem$$Address, r12);
 8092   %}
 8093   ins_pipe(ialu_mem_reg);
 8094 %}
 8095 
 8096 // Store Null Pointer, mark word, or other simple pointer constant.
 8097 instruct storeImmP(memory mem, immP31 src)
 8098 %{
 8099   predicate(n->as_Store()->barrier_data() == 0);
 8100   match(Set mem (StoreP mem src));
 8101 
 8102   ins_cost(150); // XXX
 8103   format %{ "movq    $mem, $src\t# ptr" %}
 8104   ins_encode %{
 8105     __ movq($mem$$Address, $src$$constant);
 8106   %}
 8107   ins_pipe(ialu_mem_imm);
 8108 %}
 8109 
 8110 // Store Compressed Pointer
 8111 instruct storeN(memory mem, rRegN src)
 8112 %{
 8113   predicate(n->as_Store()->barrier_data() == 0);
 8114   match(Set mem (StoreN mem src));
 8115 
 8116   ins_cost(125); // XXX
 8117   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8118   ins_encode %{
 8119     __ movl($mem$$Address, $src$$Register);
 8120   %}
 8121   ins_pipe(ialu_mem_reg);
 8122 %}
 8123 
 8124 instruct storeNKlass(memory mem, rRegN src)
 8125 %{
 8126   match(Set mem (StoreNKlass mem src));
 8127 
 8128   ins_cost(125); // XXX
 8129   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8130   ins_encode %{
 8131     __ movl($mem$$Address, $src$$Register);
 8132   %}
 8133   ins_pipe(ialu_mem_reg);
 8134 %}
 8135 
 8136 instruct storeImmN0(memory mem, immN0 zero)
 8137 %{
 8138   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8139   match(Set mem (StoreN mem zero));
 8140 
 8141   ins_cost(125); // XXX
 8142   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8143   ins_encode %{
 8144     __ movl($mem$$Address, r12);
 8145   %}
 8146   ins_pipe(ialu_mem_reg);
 8147 %}
 8148 
 8149 instruct storeImmN(memory mem, immN src)
 8150 %{
 8151   predicate(n->as_Store()->barrier_data() == 0);
 8152   match(Set mem (StoreN mem src));
 8153 
 8154   ins_cost(150); // XXX
 8155   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8156   ins_encode %{
 8157     address con = (address)$src$$constant;
 8158     if (con == nullptr) {
 8159       __ movl($mem$$Address, 0);
 8160     } else {
 8161       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8162     }
 8163   %}
 8164   ins_pipe(ialu_mem_imm);
 8165 %}
 8166 
 8167 instruct storeImmNKlass(memory mem, immNKlass src)
 8168 %{
 8169   match(Set mem (StoreNKlass mem src));
 8170 
 8171   ins_cost(150); // XXX
 8172   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8173   ins_encode %{
 8174     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8175   %}
 8176   ins_pipe(ialu_mem_imm);
 8177 %}
 8178 
 8179 // Store Integer Immediate
 8180 instruct storeImmI0(memory mem, immI_0 zero)
 8181 %{
 8182   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8183   match(Set mem (StoreI mem zero));
 8184 
 8185   ins_cost(125); // XXX
 8186   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8187   ins_encode %{
 8188     __ movl($mem$$Address, r12);
 8189   %}
 8190   ins_pipe(ialu_mem_reg);
 8191 %}
 8192 
 8193 instruct storeImmI(memory mem, immI src)
 8194 %{
 8195   match(Set mem (StoreI mem src));
 8196 
 8197   ins_cost(150);
 8198   format %{ "movl    $mem, $src\t# int" %}
 8199   ins_encode %{
 8200     __ movl($mem$$Address, $src$$constant);
 8201   %}
 8202   ins_pipe(ialu_mem_imm);
 8203 %}
 8204 
 8205 // Store Long Immediate
 8206 instruct storeImmL0(memory mem, immL0 zero)
 8207 %{
 8208   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8209   match(Set mem (StoreL mem zero));
 8210 
 8211   ins_cost(125); // XXX
 8212   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8213   ins_encode %{
 8214     __ movq($mem$$Address, r12);
 8215   %}
 8216   ins_pipe(ialu_mem_reg);
 8217 %}
 8218 
 8219 instruct storeImmL(memory mem, immL32 src)
 8220 %{
 8221   match(Set mem (StoreL mem src));
 8222 
 8223   ins_cost(150);
 8224   format %{ "movq    $mem, $src\t# long" %}
 8225   ins_encode %{
 8226     __ movq($mem$$Address, $src$$constant);
 8227   %}
 8228   ins_pipe(ialu_mem_imm);
 8229 %}
 8230 
 8231 // Store Short/Char Immediate
 8232 instruct storeImmC0(memory mem, immI_0 zero)
 8233 %{
 8234   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8235   match(Set mem (StoreC mem zero));
 8236 
 8237   ins_cost(125); // XXX
 8238   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8239   ins_encode %{
 8240     __ movw($mem$$Address, r12);
 8241   %}
 8242   ins_pipe(ialu_mem_reg);
 8243 %}
 8244 
 8245 instruct storeImmI16(memory mem, immI16 src)
 8246 %{
 8247   predicate(UseStoreImmI16);
 8248   match(Set mem (StoreC mem src));
 8249 
 8250   ins_cost(150);
 8251   format %{ "movw    $mem, $src\t# short/char" %}
 8252   ins_encode %{
 8253     __ movw($mem$$Address, $src$$constant);
 8254   %}
 8255   ins_pipe(ialu_mem_imm);
 8256 %}
 8257 
 8258 // Store Byte Immediate
 8259 instruct storeImmB0(memory mem, immI_0 zero)
 8260 %{
 8261   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8262   match(Set mem (StoreB mem zero));
 8263 
 8264   ins_cost(125); // XXX
 8265   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8266   ins_encode %{
 8267     __ movb($mem$$Address, r12);
 8268   %}
 8269   ins_pipe(ialu_mem_reg);
 8270 %}
 8271 
 8272 instruct storeImmB(memory mem, immI8 src)
 8273 %{
 8274   match(Set mem (StoreB mem src));
 8275 
 8276   ins_cost(150); // XXX
 8277   format %{ "movb    $mem, $src\t# byte" %}
 8278   ins_encode %{
 8279     __ movb($mem$$Address, $src$$constant);
 8280   %}
 8281   ins_pipe(ialu_mem_imm);
 8282 %}
 8283 
 8284 // Store Float
 8285 instruct storeF(memory mem, regF src)
 8286 %{
 8287   match(Set mem (StoreF mem src));
 8288 
 8289   ins_cost(95); // XXX
 8290   format %{ "movss   $mem, $src\t# float" %}
 8291   ins_encode %{
 8292     __ movflt($mem$$Address, $src$$XMMRegister);
 8293   %}
 8294   ins_pipe(pipe_slow); // XXX
 8295 %}
 8296 
 8297 // Store immediate Float value (it is faster than store from XMM register)
 8298 instruct storeF0(memory mem, immF0 zero)
 8299 %{
 8300   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8301   match(Set mem (StoreF mem zero));
 8302 
 8303   ins_cost(25); // XXX
 8304   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8305   ins_encode %{
 8306     __ movl($mem$$Address, r12);
 8307   %}
 8308   ins_pipe(ialu_mem_reg);
 8309 %}
 8310 
 8311 instruct storeF_imm(memory mem, immF src)
 8312 %{
 8313   match(Set mem (StoreF mem src));
 8314 
 8315   ins_cost(50);
 8316   format %{ "movl    $mem, $src\t# float" %}
 8317   ins_encode %{
 8318     __ movl($mem$$Address, jint_cast($src$$constant));
 8319   %}
 8320   ins_pipe(ialu_mem_imm);
 8321 %}
 8322 
 8323 // Store Double
 8324 instruct storeD(memory mem, regD src)
 8325 %{
 8326   match(Set mem (StoreD mem src));
 8327 
 8328   ins_cost(95); // XXX
 8329   format %{ "movsd   $mem, $src\t# double" %}
 8330   ins_encode %{
 8331     __ movdbl($mem$$Address, $src$$XMMRegister);
 8332   %}
 8333   ins_pipe(pipe_slow); // XXX
 8334 %}
 8335 
 8336 // Store immediate double 0.0 (it is faster than store from XMM register)
 8337 instruct storeD0_imm(memory mem, immD0 src)
 8338 %{
 8339   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8340   match(Set mem (StoreD mem src));
 8341 
 8342   ins_cost(50);
 8343   format %{ "movq    $mem, $src\t# double 0." %}
 8344   ins_encode %{
 8345     __ movq($mem$$Address, $src$$constant);
 8346   %}
 8347   ins_pipe(ialu_mem_imm);
 8348 %}
 8349 
 8350 instruct storeD0(memory mem, immD0 zero)
 8351 %{
 8352   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8353   match(Set mem (StoreD mem zero));
 8354 
 8355   ins_cost(25); // XXX
 8356   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8357   ins_encode %{
 8358     __ movq($mem$$Address, r12);
 8359   %}
 8360   ins_pipe(ialu_mem_reg);
 8361 %}
 8362 
 8363 instruct storeSSI(stackSlotI dst, rRegI src)
 8364 %{
 8365   match(Set dst src);
 8366 
 8367   ins_cost(100);
 8368   format %{ "movl    $dst, $src\t# int stk" %}
 8369   ins_encode %{
 8370     __ movl($dst$$Address, $src$$Register);
 8371   %}
 8372   ins_pipe( ialu_mem_reg );
 8373 %}
 8374 
 8375 instruct storeSSL(stackSlotL dst, rRegL src)
 8376 %{
 8377   match(Set dst src);
 8378 
 8379   ins_cost(100);
 8380   format %{ "movq    $dst, $src\t# long stk" %}
 8381   ins_encode %{
 8382     __ movq($dst$$Address, $src$$Register);
 8383   %}
 8384   ins_pipe(ialu_mem_reg);
 8385 %}
 8386 
 8387 instruct storeSSP(stackSlotP dst, rRegP src)
 8388 %{
 8389   match(Set dst src);
 8390 
 8391   ins_cost(100);
 8392   format %{ "movq    $dst, $src\t# ptr stk" %}
 8393   ins_encode %{
 8394     __ movq($dst$$Address, $src$$Register);
 8395   %}
 8396   ins_pipe(ialu_mem_reg);
 8397 %}
 8398 
 8399 instruct storeSSF(stackSlotF dst, regF src)
 8400 %{
 8401   match(Set dst src);
 8402 
 8403   ins_cost(95); // XXX
 8404   format %{ "movss   $dst, $src\t# float stk" %}
 8405   ins_encode %{
 8406     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8407   %}
 8408   ins_pipe(pipe_slow); // XXX
 8409 %}
 8410 
 8411 instruct storeSSD(stackSlotD dst, regD src)
 8412 %{
 8413   match(Set dst src);
 8414 
 8415   ins_cost(95); // XXX
 8416   format %{ "movsd   $dst, $src\t# double stk" %}
 8417   ins_encode %{
 8418     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8419   %}
 8420   ins_pipe(pipe_slow); // XXX
 8421 %}
 8422 
 8423 instruct cacheWB(indirect addr)
 8424 %{
 8425   predicate(VM_Version::supports_data_cache_line_flush());
 8426   match(CacheWB addr);
 8427 
 8428   ins_cost(100);
 8429   format %{"cache wb $addr" %}
 8430   ins_encode %{
 8431     assert($addr->index_position() < 0, "should be");
 8432     assert($addr$$disp == 0, "should be");
 8433     __ cache_wb(Address($addr$$base$$Register, 0));
 8434   %}
 8435   ins_pipe(pipe_slow); // XXX
 8436 %}
 8437 
 8438 instruct cacheWBPreSync()
 8439 %{
 8440   predicate(VM_Version::supports_data_cache_line_flush());
 8441   match(CacheWBPreSync);
 8442 
 8443   ins_cost(100);
 8444   format %{"cache wb presync" %}
 8445   ins_encode %{
 8446     __ cache_wbsync(true);
 8447   %}
 8448   ins_pipe(pipe_slow); // XXX
 8449 %}
 8450 
 8451 instruct cacheWBPostSync()
 8452 %{
 8453   predicate(VM_Version::supports_data_cache_line_flush());
 8454   match(CacheWBPostSync);
 8455 
 8456   ins_cost(100);
 8457   format %{"cache wb postsync" %}
 8458   ins_encode %{
 8459     __ cache_wbsync(false);
 8460   %}
 8461   ins_pipe(pipe_slow); // XXX
 8462 %}
 8463 
 8464 //----------BSWAP Instructions-------------------------------------------------
 8465 instruct bytes_reverse_int(rRegI dst) %{
 8466   match(Set dst (ReverseBytesI dst));
 8467 
 8468   format %{ "bswapl  $dst" %}
 8469   ins_encode %{
 8470     __ bswapl($dst$$Register);
 8471   %}
 8472   ins_pipe( ialu_reg );
 8473 %}
 8474 
 8475 instruct bytes_reverse_long(rRegL dst) %{
 8476   match(Set dst (ReverseBytesL dst));
 8477 
 8478   format %{ "bswapq  $dst" %}
 8479   ins_encode %{
 8480     __ bswapq($dst$$Register);
 8481   %}
 8482   ins_pipe( ialu_reg);
 8483 %}
 8484 
 8485 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8486   match(Set dst (ReverseBytesUS dst));
 8487   effect(KILL cr);
 8488 
 8489   format %{ "bswapl  $dst\n\t"
 8490             "shrl    $dst,16\n\t" %}
 8491   ins_encode %{
 8492     __ bswapl($dst$$Register);
 8493     __ shrl($dst$$Register, 16);
 8494   %}
 8495   ins_pipe( ialu_reg );
 8496 %}
 8497 
 8498 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8499   match(Set dst (ReverseBytesS dst));
 8500   effect(KILL cr);
 8501 
 8502   format %{ "bswapl  $dst\n\t"
 8503             "sar     $dst,16\n\t" %}
 8504   ins_encode %{
 8505     __ bswapl($dst$$Register);
 8506     __ sarl($dst$$Register, 16);
 8507   %}
 8508   ins_pipe( ialu_reg );
 8509 %}
 8510 
 8511 //---------- Zeros Count Instructions ------------------------------------------
 8512 
 8513 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8514   predicate(UseCountLeadingZerosInstruction);
 8515   match(Set dst (CountLeadingZerosI src));
 8516   effect(KILL cr);
 8517 
 8518   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8519   ins_encode %{
 8520     __ lzcntl($dst$$Register, $src$$Register);
 8521   %}
 8522   ins_pipe(ialu_reg);
 8523 %}
 8524 
 8525 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8526   predicate(UseCountLeadingZerosInstruction);
 8527   match(Set dst (CountLeadingZerosI (LoadI src)));
 8528   effect(KILL cr);
 8529   ins_cost(175);
 8530   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8531   ins_encode %{
 8532     __ lzcntl($dst$$Register, $src$$Address);
 8533   %}
 8534   ins_pipe(ialu_reg_mem);
 8535 %}
 8536 
 8537 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8538   predicate(!UseCountLeadingZerosInstruction);
 8539   match(Set dst (CountLeadingZerosI src));
 8540   effect(KILL cr);
 8541 
 8542   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8543             "jnz     skip\n\t"
 8544             "movl    $dst, -1\n"
 8545       "skip:\n\t"
 8546             "negl    $dst\n\t"
 8547             "addl    $dst, 31" %}
 8548   ins_encode %{
 8549     Register Rdst = $dst$$Register;
 8550     Register Rsrc = $src$$Register;
 8551     Label skip;
 8552     __ bsrl(Rdst, Rsrc);
 8553     __ jccb(Assembler::notZero, skip);
 8554     __ movl(Rdst, -1);
 8555     __ bind(skip);
 8556     __ negl(Rdst);
 8557     __ addl(Rdst, BitsPerInt - 1);
 8558   %}
 8559   ins_pipe(ialu_reg);
 8560 %}
 8561 
 8562 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8563   predicate(UseCountLeadingZerosInstruction);
 8564   match(Set dst (CountLeadingZerosL src));
 8565   effect(KILL cr);
 8566 
 8567   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8568   ins_encode %{
 8569     __ lzcntq($dst$$Register, $src$$Register);
 8570   %}
 8571   ins_pipe(ialu_reg);
 8572 %}
 8573 
 8574 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8575   predicate(UseCountLeadingZerosInstruction);
 8576   match(Set dst (CountLeadingZerosL (LoadL src)));
 8577   effect(KILL cr);
 8578   ins_cost(175);
 8579   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8580   ins_encode %{
 8581     __ lzcntq($dst$$Register, $src$$Address);
 8582   %}
 8583   ins_pipe(ialu_reg_mem);
 8584 %}
 8585 
 8586 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8587   predicate(!UseCountLeadingZerosInstruction);
 8588   match(Set dst (CountLeadingZerosL src));
 8589   effect(KILL cr);
 8590 
 8591   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8592             "jnz     skip\n\t"
 8593             "movl    $dst, -1\n"
 8594       "skip:\n\t"
 8595             "negl    $dst\n\t"
 8596             "addl    $dst, 63" %}
 8597   ins_encode %{
 8598     Register Rdst = $dst$$Register;
 8599     Register Rsrc = $src$$Register;
 8600     Label skip;
 8601     __ bsrq(Rdst, Rsrc);
 8602     __ jccb(Assembler::notZero, skip);
 8603     __ movl(Rdst, -1);
 8604     __ bind(skip);
 8605     __ negl(Rdst);
 8606     __ addl(Rdst, BitsPerLong - 1);
 8607   %}
 8608   ins_pipe(ialu_reg);
 8609 %}
 8610 
 8611 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8612   predicate(UseCountTrailingZerosInstruction);
 8613   match(Set dst (CountTrailingZerosI src));
 8614   effect(KILL cr);
 8615 
 8616   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8617   ins_encode %{
 8618     __ tzcntl($dst$$Register, $src$$Register);
 8619   %}
 8620   ins_pipe(ialu_reg);
 8621 %}
 8622 
 8623 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8624   predicate(UseCountTrailingZerosInstruction);
 8625   match(Set dst (CountTrailingZerosI (LoadI src)));
 8626   effect(KILL cr);
 8627   ins_cost(175);
 8628   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8629   ins_encode %{
 8630     __ tzcntl($dst$$Register, $src$$Address);
 8631   %}
 8632   ins_pipe(ialu_reg_mem);
 8633 %}
 8634 
 8635 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8636   predicate(!UseCountTrailingZerosInstruction);
 8637   match(Set dst (CountTrailingZerosI src));
 8638   effect(KILL cr);
 8639 
 8640   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8641             "jnz     done\n\t"
 8642             "movl    $dst, 32\n"
 8643       "done:" %}
 8644   ins_encode %{
 8645     Register Rdst = $dst$$Register;
 8646     Label done;
 8647     __ bsfl(Rdst, $src$$Register);
 8648     __ jccb(Assembler::notZero, done);
 8649     __ movl(Rdst, BitsPerInt);
 8650     __ bind(done);
 8651   %}
 8652   ins_pipe(ialu_reg);
 8653 %}
 8654 
 8655 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8656   predicate(UseCountTrailingZerosInstruction);
 8657   match(Set dst (CountTrailingZerosL src));
 8658   effect(KILL cr);
 8659 
 8660   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8661   ins_encode %{
 8662     __ tzcntq($dst$$Register, $src$$Register);
 8663   %}
 8664   ins_pipe(ialu_reg);
 8665 %}
 8666 
 8667 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8668   predicate(UseCountTrailingZerosInstruction);
 8669   match(Set dst (CountTrailingZerosL (LoadL src)));
 8670   effect(KILL cr);
 8671   ins_cost(175);
 8672   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8673   ins_encode %{
 8674     __ tzcntq($dst$$Register, $src$$Address);
 8675   %}
 8676   ins_pipe(ialu_reg_mem);
 8677 %}
 8678 
 8679 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8680   predicate(!UseCountTrailingZerosInstruction);
 8681   match(Set dst (CountTrailingZerosL src));
 8682   effect(KILL cr);
 8683 
 8684   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8685             "jnz     done\n\t"
 8686             "movl    $dst, 64\n"
 8687       "done:" %}
 8688   ins_encode %{
 8689     Register Rdst = $dst$$Register;
 8690     Label done;
 8691     __ bsfq(Rdst, $src$$Register);
 8692     __ jccb(Assembler::notZero, done);
 8693     __ movl(Rdst, BitsPerLong);
 8694     __ bind(done);
 8695   %}
 8696   ins_pipe(ialu_reg);
 8697 %}
 8698 
 8699 //--------------- Reverse Operation Instructions ----------------
 8700 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8701   predicate(!VM_Version::supports_gfni());
 8702   match(Set dst (ReverseI src));
 8703   effect(TEMP dst, TEMP rtmp, KILL cr);
 8704   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8705   ins_encode %{
 8706     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8707   %}
 8708   ins_pipe( ialu_reg );
 8709 %}
 8710 
 8711 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8712   predicate(VM_Version::supports_gfni());
 8713   match(Set dst (ReverseI src));
 8714   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8715   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8716   ins_encode %{
 8717     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8718   %}
 8719   ins_pipe( ialu_reg );
 8720 %}
 8721 
 8722 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8723   predicate(!VM_Version::supports_gfni());
 8724   match(Set dst (ReverseL src));
 8725   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8726   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8727   ins_encode %{
 8728     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8729   %}
 8730   ins_pipe( ialu_reg );
 8731 %}
 8732 
 8733 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8734   predicate(VM_Version::supports_gfni());
 8735   match(Set dst (ReverseL src));
 8736   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8737   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8738   ins_encode %{
 8739     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8740   %}
 8741   ins_pipe( ialu_reg );
 8742 %}
 8743 
 8744 //---------- Population Count Instructions -------------------------------------
 8745 
 8746 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8747   predicate(UsePopCountInstruction);
 8748   match(Set dst (PopCountI src));
 8749   effect(KILL cr);
 8750 
 8751   format %{ "popcnt  $dst, $src" %}
 8752   ins_encode %{
 8753     __ popcntl($dst$$Register, $src$$Register);
 8754   %}
 8755   ins_pipe(ialu_reg);
 8756 %}
 8757 
 8758 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8759   predicate(UsePopCountInstruction);
 8760   match(Set dst (PopCountI (LoadI mem)));
 8761   effect(KILL cr);
 8762 
 8763   format %{ "popcnt  $dst, $mem" %}
 8764   ins_encode %{
 8765     __ popcntl($dst$$Register, $mem$$Address);
 8766   %}
 8767   ins_pipe(ialu_reg);
 8768 %}
 8769 
 8770 // Note: Long.bitCount(long) returns an int.
 8771 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8772   predicate(UsePopCountInstruction);
 8773   match(Set dst (PopCountL src));
 8774   effect(KILL cr);
 8775 
 8776   format %{ "popcnt  $dst, $src" %}
 8777   ins_encode %{
 8778     __ popcntq($dst$$Register, $src$$Register);
 8779   %}
 8780   ins_pipe(ialu_reg);
 8781 %}
 8782 
 8783 // Note: Long.bitCount(long) returns an int.
 8784 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8785   predicate(UsePopCountInstruction);
 8786   match(Set dst (PopCountL (LoadL mem)));
 8787   effect(KILL cr);
 8788 
 8789   format %{ "popcnt  $dst, $mem" %}
 8790   ins_encode %{
 8791     __ popcntq($dst$$Register, $mem$$Address);
 8792   %}
 8793   ins_pipe(ialu_reg);
 8794 %}
 8795 
 8796 
 8797 //----------MemBar Instructions-----------------------------------------------
 8798 // Memory barrier flavors
 8799 
 8800 instruct membar_acquire()
 8801 %{
 8802   match(MemBarAcquire);
 8803   match(LoadFence);
 8804   ins_cost(0);
 8805 
 8806   size(0);
 8807   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8808   ins_encode();
 8809   ins_pipe(empty);
 8810 %}
 8811 
 8812 instruct membar_acquire_lock()
 8813 %{
 8814   match(MemBarAcquireLock);
 8815   ins_cost(0);
 8816 
 8817   size(0);
 8818   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8819   ins_encode();
 8820   ins_pipe(empty);
 8821 %}
 8822 
 8823 instruct membar_release()
 8824 %{
 8825   match(MemBarRelease);
 8826   match(StoreFence);
 8827   ins_cost(0);
 8828 
 8829   size(0);
 8830   format %{ "MEMBAR-release ! (empty encoding)" %}
 8831   ins_encode();
 8832   ins_pipe(empty);
 8833 %}
 8834 
 8835 instruct membar_release_lock()
 8836 %{
 8837   match(MemBarReleaseLock);
 8838   ins_cost(0);
 8839 
 8840   size(0);
 8841   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8842   ins_encode();
 8843   ins_pipe(empty);
 8844 %}
 8845 
 8846 instruct membar_storeload(rFlagsReg cr) %{
 8847   match(MemBarStoreLoad);
 8848   effect(KILL cr);
 8849   ins_cost(400);
 8850 
 8851   format %{
 8852     $$template
 8853     $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
 8854   %}
 8855   ins_encode %{
 8856     __ membar(Assembler::StoreLoad);
 8857   %}
 8858   ins_pipe(pipe_slow);
 8859 %}
 8860 
 8861 instruct membar_volatile(rFlagsReg cr) %{
 8862   match(MemBarVolatile);
 8863   effect(KILL cr);
 8864   ins_cost(400);
 8865 
 8866   format %{
 8867     $$template
 8868     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8869   %}
 8870   ins_encode %{
 8871     __ membar(Assembler::StoreLoad);
 8872   %}
 8873   ins_pipe(pipe_slow);
 8874 %}
 8875 
 8876 instruct unnecessary_membar_volatile()
 8877 %{
 8878   match(MemBarVolatile);
 8879   predicate(Matcher::post_store_load_barrier(n));
 8880   ins_cost(0);
 8881 
 8882   size(0);
 8883   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8884   ins_encode();
 8885   ins_pipe(empty);
 8886 %}
 8887 
 8888 instruct membar_full(rFlagsReg cr) %{
 8889   match(MemBarFull);
 8890   effect(KILL cr);
 8891   ins_cost(400);
 8892 
 8893   format %{
 8894     $$template
 8895     $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
 8896   %}
 8897   ins_encode %{
 8898     __ membar(Assembler::StoreLoad);
 8899   %}
 8900   ins_pipe(pipe_slow);
 8901 %}
 8902 
 8903 instruct membar_storestore() %{
 8904   match(MemBarStoreStore);
 8905   match(StoreStoreFence);
 8906   ins_cost(0);
 8907 
 8908   size(0);
 8909   format %{ "MEMBAR-storestore (empty encoding)" %}
 8910   ins_encode( );
 8911   ins_pipe(empty);
 8912 %}
 8913 
 8914 //----------Move Instructions--------------------------------------------------
 8915 
 8916 instruct castX2P(rRegP dst, rRegL src)
 8917 %{
 8918   match(Set dst (CastX2P src));
 8919 
 8920   format %{ "movq    $dst, $src\t# long->ptr" %}
 8921   ins_encode %{
 8922     if ($dst$$reg != $src$$reg) {
 8923       __ movptr($dst$$Register, $src$$Register);
 8924     }
 8925   %}
 8926   ins_pipe(ialu_reg_reg); // XXX
 8927 %}
 8928 
 8929 instruct castP2X(rRegL dst, rRegP src)
 8930 %{
 8931   match(Set dst (CastP2X src));
 8932 
 8933   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8934   ins_encode %{
 8935     if ($dst$$reg != $src$$reg) {
 8936       __ movptr($dst$$Register, $src$$Register);
 8937     }
 8938   %}
 8939   ins_pipe(ialu_reg_reg); // XXX
 8940 %}
 8941 
 8942 // Convert oop into int for vectors alignment masking
 8943 instruct convP2I(rRegI dst, rRegP src)
 8944 %{
 8945   match(Set dst (ConvL2I (CastP2X src)));
 8946 
 8947   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8948   ins_encode %{
 8949     __ movl($dst$$Register, $src$$Register);
 8950   %}
 8951   ins_pipe(ialu_reg_reg); // XXX
 8952 %}
 8953 
 8954 // Convert compressed oop into int for vectors alignment masking
 8955 // in case of 32bit oops (heap < 4Gb).
 8956 instruct convN2I(rRegI dst, rRegN src)
 8957 %{
 8958   predicate(CompressedOops::shift() == 0);
 8959   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8960 
 8961   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8962   ins_encode %{
 8963     __ movl($dst$$Register, $src$$Register);
 8964   %}
 8965   ins_pipe(ialu_reg_reg); // XXX
 8966 %}
 8967 
 8968 // Convert oop pointer into compressed form
 8969 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8970   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8971   match(Set dst (EncodeP src));
 8972   effect(KILL cr);
 8973   format %{ "encode_heap_oop $dst,$src" %}
 8974   ins_encode %{
 8975     Register s = $src$$Register;
 8976     Register d = $dst$$Register;
 8977     if (s != d) {
 8978       __ movq(d, s);
 8979     }
 8980     __ encode_heap_oop(d);
 8981   %}
 8982   ins_pipe(ialu_reg_long);
 8983 %}
 8984 
 8985 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8986   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8987   match(Set dst (EncodeP src));
 8988   effect(KILL cr);
 8989   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8990   ins_encode %{
 8991     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 8992   %}
 8993   ins_pipe(ialu_reg_long);
 8994 %}
 8995 
 8996 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 8997   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 8998             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 8999   match(Set dst (DecodeN src));
 9000   effect(KILL cr);
 9001   format %{ "decode_heap_oop $dst,$src" %}
 9002   ins_encode %{
 9003     Register s = $src$$Register;
 9004     Register d = $dst$$Register;
 9005     if (s != d) {
 9006       __ movq(d, s);
 9007     }
 9008     __ decode_heap_oop(d);
 9009   %}
 9010   ins_pipe(ialu_reg_long);
 9011 %}
 9012 
 9013 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9014   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9015             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9016   match(Set dst (DecodeN src));
 9017   effect(KILL cr);
 9018   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9019   ins_encode %{
 9020     Register s = $src$$Register;
 9021     Register d = $dst$$Register;
 9022     if (s != d) {
 9023       __ decode_heap_oop_not_null(d, s);
 9024     } else {
 9025       __ decode_heap_oop_not_null(d);
 9026     }
 9027   %}
 9028   ins_pipe(ialu_reg_long);
 9029 %}
 9030 
 9031 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9032   match(Set dst (EncodePKlass src));
 9033   effect(TEMP dst, KILL cr);
 9034   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9035   ins_encode %{
 9036     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9037   %}
 9038   ins_pipe(ialu_reg_long);
 9039 %}
 9040 
 9041 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9042   match(Set dst (DecodeNKlass src));
 9043   effect(TEMP dst, KILL cr);
 9044   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9045   ins_encode %{
 9046     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9047   %}
 9048   ins_pipe(ialu_reg_long);
 9049 %}
 9050 
 9051 //----------Conditional Move---------------------------------------------------
 9052 // Jump
 9053 // dummy instruction for generating temp registers
 9054 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9055   match(Jump (LShiftL switch_val shift));
 9056   ins_cost(350);
 9057   predicate(false);
 9058   effect(TEMP dest);
 9059 
 9060   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9061             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9062   ins_encode %{
 9063     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9064     // to do that and the compiler is using that register as one it can allocate.
 9065     // So we build it all by hand.
 9066     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9067     // ArrayAddress dispatch(table, index);
 9068     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9069     __ lea($dest$$Register, $constantaddress);
 9070     __ jmp(dispatch);
 9071   %}
 9072   ins_pipe(pipe_jmp);
 9073 %}
 9074 
 9075 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9076   match(Jump (AddL (LShiftL switch_val shift) offset));
 9077   ins_cost(350);
 9078   effect(TEMP dest);
 9079 
 9080   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9081             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9082   ins_encode %{
 9083     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9084     // to do that and the compiler is using that register as one it can allocate.
 9085     // So we build it all by hand.
 9086     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9087     // ArrayAddress dispatch(table, index);
 9088     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9089     __ lea($dest$$Register, $constantaddress);
 9090     __ jmp(dispatch);
 9091   %}
 9092   ins_pipe(pipe_jmp);
 9093 %}
 9094 
 9095 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9096   match(Jump switch_val);
 9097   ins_cost(350);
 9098   effect(TEMP dest);
 9099 
 9100   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9101             "jmp     [$dest + $switch_val]\n\t" %}
 9102   ins_encode %{
 9103     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9104     // to do that and the compiler is using that register as one it can allocate.
 9105     // So we build it all by hand.
 9106     // Address index(noreg, switch_reg, Address::times_1);
 9107     // ArrayAddress dispatch(table, index);
 9108     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9109     __ lea($dest$$Register, $constantaddress);
 9110     __ jmp(dispatch);
 9111   %}
 9112   ins_pipe(pipe_jmp);
 9113 %}
 9114 
 9115 // Conditional move
 9116 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9117 %{
 9118   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9119   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9120 
 9121   ins_cost(100); // XXX
 9122   format %{ "setbn$cop $dst\t# signed, int" %}
 9123   ins_encode %{
 9124     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9125     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9126   %}
 9127   ins_pipe(ialu_reg);
 9128 %}
 9129 
 9130 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9131 %{
 9132   predicate(!UseAPX);
 9133   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9134 
 9135   ins_cost(200); // XXX
 9136   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9137   ins_encode %{
 9138     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9139   %}
 9140   ins_pipe(pipe_cmov_reg);
 9141 %}
 9142 
 9143 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9144 %{
 9145   predicate(UseAPX);
 9146   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9147 
 9148   ins_cost(200);
 9149   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9150   ins_encode %{
 9151     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9152   %}
 9153   ins_pipe(pipe_cmov_reg);
 9154 %}
 9155 
 9156 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9157 %{
 9158   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9159   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9160 
 9161   ins_cost(100); // XXX
 9162   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9163   ins_encode %{
 9164     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9165     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9166   %}
 9167   ins_pipe(ialu_reg);
 9168 %}
 9169 
 9170 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9171   predicate(!UseAPX);
 9172   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9173 
 9174   ins_cost(200); // XXX
 9175   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9176   ins_encode %{
 9177     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9178   %}
 9179   ins_pipe(pipe_cmov_reg);
 9180 %}
 9181 
 9182 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9183   predicate(UseAPX);
 9184   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9185 
 9186   ins_cost(200);
 9187   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9188   ins_encode %{
 9189     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9190   %}
 9191   ins_pipe(pipe_cmov_reg);
 9192 %}
 9193 
 9194 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9195 %{
 9196   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9197   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9198 
 9199   ins_cost(100); // XXX
 9200   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9201   ins_encode %{
 9202     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9203     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9204   %}
 9205   ins_pipe(ialu_reg);
 9206 %}
 9207 
 9208 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9209 %{
 9210   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9211   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9212 
 9213   ins_cost(100); // XXX
 9214   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9215   ins_encode %{
 9216     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9217     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9218   %}
 9219   ins_pipe(ialu_reg);
 9220 %}
 9221 
 9222 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9223   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9224 
 9225   ins_cost(200);
 9226   expand %{
 9227     cmovI_regU(cop, cr, dst, src);
 9228   %}
 9229 %}
 9230 
 9231 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9232   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9233 
 9234   ins_cost(200);
 9235   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9236   ins_encode %{
 9237     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9238   %}
 9239   ins_pipe(pipe_cmov_reg);
 9240 %}
 9241 
 9242 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9243   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9244   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9245 
 9246   ins_cost(200); // XXX
 9247   format %{ "cmovpl  $dst, $src\n\t"
 9248             "cmovnel $dst, $src" %}
 9249   ins_encode %{
 9250     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9251     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9252   %}
 9253   ins_pipe(pipe_cmov_reg);
 9254 %}
 9255 
 9256 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9257 // inputs of the CMove
 9258 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9259   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9260   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9261   effect(TEMP dst);
 9262 
 9263   ins_cost(200); // XXX
 9264   format %{ "cmovpl  $dst, $src\n\t"
 9265             "cmovnel $dst, $src" %}
 9266   ins_encode %{
 9267     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9268     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9269   %}
 9270   ins_pipe(pipe_cmov_reg);
 9271 %}
 9272 
 9273 // Conditional move
 9274 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9275   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9276 
 9277   ins_cost(250); // XXX
 9278   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9279   ins_encode %{
 9280     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9281   %}
 9282   ins_pipe(pipe_cmov_mem);
 9283 %}
 9284 
 9285 // Conditional move
 9286 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9287 %{
 9288   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9289 
 9290   ins_cost(250); // XXX
 9291   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9292   ins_encode %{
 9293     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9294   %}
 9295   ins_pipe(pipe_cmov_mem);
 9296 %}
 9297 
 9298 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9299   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9300 
 9301   ins_cost(250);
 9302   expand %{
 9303     cmovI_memU(cop, cr, dst, src);
 9304   %}
 9305 %}
 9306 
 9307 instruct cmovI_memUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI dst, memory src) %{
 9308   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9309 
 9310   ins_cost(250); // XXX
 9311   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9312   ins_encode %{
 9313     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9314   %}
 9315   ins_pipe(pipe_cmov_mem);
 9316 %}
 9317 
 9318 // Conditional move
 9319 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9320 %{
 9321   predicate(!UseAPX);
 9322   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9323 
 9324   ins_cost(200); // XXX
 9325   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9326   ins_encode %{
 9327     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9328   %}
 9329   ins_pipe(pipe_cmov_reg);
 9330 %}
 9331 
 9332 // Conditional move ndd
 9333 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9334 %{
 9335   predicate(UseAPX);
 9336   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9337 
 9338   ins_cost(200);
 9339   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9340   ins_encode %{
 9341     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9342   %}
 9343   ins_pipe(pipe_cmov_reg);
 9344 %}
 9345 
 9346 // Conditional move
 9347 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9348 %{
 9349   predicate(!UseAPX);
 9350   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9351 
 9352   ins_cost(200); // XXX
 9353   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9354   ins_encode %{
 9355     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9356   %}
 9357   ins_pipe(pipe_cmov_reg);
 9358 %}
 9359 
 9360 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9361   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9362 
 9363   ins_cost(200);
 9364   expand %{
 9365     cmovN_regU(cop, cr, dst, src);
 9366   %}
 9367 %}
 9368 
 9369 // Conditional move ndd
 9370 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9371 %{
 9372   predicate(UseAPX);
 9373   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9374 
 9375   ins_cost(200);
 9376   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9377   ins_encode %{
 9378     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9379   %}
 9380   ins_pipe(pipe_cmov_reg);
 9381 %}
 9382 
 9383 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9384   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9385 
 9386   ins_cost(200);
 9387   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9388   ins_encode %{
 9389     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9390   %}
 9391   ins_pipe(pipe_cmov_reg);
 9392 %}
 9393 
 9394 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9395   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9396   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9397 
 9398   ins_cost(200); // XXX
 9399   format %{ "cmovpl  $dst, $src\n\t"
 9400             "cmovnel $dst, $src" %}
 9401   ins_encode %{
 9402     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9403     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9404   %}
 9405   ins_pipe(pipe_cmov_reg);
 9406 %}
 9407 
 9408 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9409 // inputs of the CMove
 9410 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9411   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9412   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9413 
 9414   ins_cost(200); // XXX
 9415   format %{ "cmovpl  $dst, $src\n\t"
 9416             "cmovnel $dst, $src" %}
 9417   ins_encode %{
 9418     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9419     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9420   %}
 9421   ins_pipe(pipe_cmov_reg);
 9422 %}
 9423 
 9424 // Conditional move
 9425 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9426 %{
 9427   predicate(!UseAPX);
 9428   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9429 
 9430   ins_cost(200); // XXX
 9431   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9432   ins_encode %{
 9433     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9434   %}
 9435   ins_pipe(pipe_cmov_reg);  // XXX
 9436 %}
 9437 
 9438 // Conditional move ndd
 9439 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9440 %{
 9441   predicate(UseAPX);
 9442   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9443 
 9444   ins_cost(200);
 9445   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9446   ins_encode %{
 9447     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9448   %}
 9449   ins_pipe(pipe_cmov_reg);
 9450 %}
 9451 
 9452 // Conditional move
 9453 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9454 %{
 9455   predicate(!UseAPX);
 9456   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9457 
 9458   ins_cost(200); // XXX
 9459   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9460   ins_encode %{
 9461     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9462   %}
 9463   ins_pipe(pipe_cmov_reg); // XXX
 9464 %}
 9465 
 9466 // Conditional move ndd
 9467 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9468 %{
 9469   predicate(UseAPX);
 9470   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9471 
 9472   ins_cost(200);
 9473   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9474   ins_encode %{
 9475     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9476   %}
 9477   ins_pipe(pipe_cmov_reg);
 9478 %}
 9479 
 9480 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9481   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9482 
 9483   ins_cost(200);
 9484   expand %{
 9485     cmovP_regU(cop, cr, dst, src);
 9486   %}
 9487 %}
 9488 
 9489 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9490   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9491 
 9492   ins_cost(200);
 9493   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9494   ins_encode %{
 9495     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9496   %}
 9497   ins_pipe(pipe_cmov_reg);
 9498 %}
 9499 
 9500 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9501   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9502   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9503 
 9504   ins_cost(200); // XXX
 9505   format %{ "cmovpq  $dst, $src\n\t"
 9506             "cmovneq $dst, $src" %}
 9507   ins_encode %{
 9508     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9509     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9510   %}
 9511   ins_pipe(pipe_cmov_reg);
 9512 %}
 9513 
 9514 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9515 // inputs of the CMove
 9516 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9517   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9518   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9519 
 9520   ins_cost(200); // XXX
 9521   format %{ "cmovpq  $dst, $src\n\t"
 9522             "cmovneq $dst, $src" %}
 9523   ins_encode %{
 9524     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9525     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9526   %}
 9527   ins_pipe(pipe_cmov_reg);
 9528 %}
 9529 
 9530 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9531 %{
 9532   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9533   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9534 
 9535   ins_cost(100); // XXX
 9536   format %{ "setbn$cop $dst\t# signed, long" %}
 9537   ins_encode %{
 9538     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9539     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9540   %}
 9541   ins_pipe(ialu_reg);
 9542 %}
 9543 
 9544 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9545 %{
 9546   predicate(!UseAPX);
 9547   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9548 
 9549   ins_cost(200); // XXX
 9550   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9551   ins_encode %{
 9552     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9553   %}
 9554   ins_pipe(pipe_cmov_reg);  // XXX
 9555 %}
 9556 
 9557 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9558 %{
 9559   predicate(UseAPX);
 9560   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9561 
 9562   ins_cost(200);
 9563   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9564   ins_encode %{
 9565     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9566   %}
 9567   ins_pipe(pipe_cmov_reg);
 9568 %}
 9569 
 9570 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9571 %{
 9572   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9573 
 9574   ins_cost(200); // XXX
 9575   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9576   ins_encode %{
 9577     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9578   %}
 9579   ins_pipe(pipe_cmov_mem);  // XXX
 9580 %}
 9581 
 9582 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9583 %{
 9584   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9585   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9586 
 9587   ins_cost(100); // XXX
 9588   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9589   ins_encode %{
 9590     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9591     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9592   %}
 9593   ins_pipe(ialu_reg);
 9594 %}
 9595 
 9596 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9597 %{
 9598   predicate(!UseAPX);
 9599   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9600 
 9601   ins_cost(200); // XXX
 9602   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9603   ins_encode %{
 9604     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9605   %}
 9606   ins_pipe(pipe_cmov_reg); // XXX
 9607 %}
 9608 
 9609 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9610 %{
 9611   predicate(UseAPX);
 9612   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9613 
 9614   ins_cost(200);
 9615   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9616   ins_encode %{
 9617     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9618   %}
 9619   ins_pipe(pipe_cmov_reg);
 9620 %}
 9621 
 9622 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9623 %{
 9624   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9625   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9626 
 9627   ins_cost(100); // XXX
 9628   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9629   ins_encode %{
 9630     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9631     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9632   %}
 9633   ins_pipe(ialu_reg);
 9634 %}
 9635 
 9636 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9637 %{
 9638   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9639   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9640 
 9641   ins_cost(100); // XXX
 9642   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9643   ins_encode %{
 9644     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9645     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9646   %}
 9647   ins_pipe(ialu_reg);
 9648 %}
 9649 
 9650 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9651   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9652 
 9653   ins_cost(200);
 9654   expand %{
 9655     cmovL_regU(cop, cr, dst, src);
 9656   %}
 9657 %}
 9658 
 9659 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9660 %{
 9661   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9662 
 9663   ins_cost(200);
 9664   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9665   ins_encode %{
 9666     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9667   %}
 9668   ins_pipe(pipe_cmov_reg);
 9669 %}
 9670 
 9671 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9672   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9673   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9674 
 9675   ins_cost(200); // XXX
 9676   format %{ "cmovpq  $dst, $src\n\t"
 9677             "cmovneq $dst, $src" %}
 9678   ins_encode %{
 9679     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9680     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9681   %}
 9682   ins_pipe(pipe_cmov_reg);
 9683 %}
 9684 
 9685 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9686 // inputs of the CMove
 9687 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9688   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9689   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9690 
 9691   ins_cost(200); // XXX
 9692   format %{ "cmovpq  $dst, $src\n\t"
 9693             "cmovneq $dst, $src" %}
 9694   ins_encode %{
 9695     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9696     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9697   %}
 9698   ins_pipe(pipe_cmov_reg);
 9699 %}
 9700 
 9701 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9702 %{
 9703   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9704 
 9705   ins_cost(200); // XXX
 9706   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9707   ins_encode %{
 9708     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9709   %}
 9710   ins_pipe(pipe_cmov_mem); // XXX
 9711 %}
 9712 
 9713 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9714   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9715 
 9716   ins_cost(200);
 9717   expand %{
 9718     cmovL_memU(cop, cr, dst, src);
 9719   %}
 9720 %}
 9721 
 9722 instruct cmovL_memUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL dst, memory src) %{
 9723   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9724 
 9725   ins_cost(200); // XXX
 9726   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9727   ins_encode %{
 9728     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9729   %}
 9730   ins_pipe(pipe_cmov_mem); // XXX
 9731 %}
 9732 
 9733 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9734 %{
 9735   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9736 
 9737   ins_cost(200); // XXX
 9738   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9739             "movss     $dst, $src\n"
 9740     "skip:" %}
 9741   ins_encode %{
 9742     Label Lskip;
 9743     // Invert sense of branch from sense of CMOV
 9744     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9745     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9746     __ bind(Lskip);
 9747   %}
 9748   ins_pipe(pipe_slow);
 9749 %}
 9750 
 9751 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9752 %{
 9753   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9754 
 9755   ins_cost(200); // XXX
 9756   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9757             "movss     $dst, $src\n"
 9758     "skip:" %}
 9759   ins_encode %{
 9760     Label Lskip;
 9761     // Invert sense of branch from sense of CMOV
 9762     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9763     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9764     __ bind(Lskip);
 9765   %}
 9766   ins_pipe(pipe_slow);
 9767 %}
 9768 
 9769 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9770   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9771 
 9772   ins_cost(200);
 9773   expand %{
 9774     cmovF_regU(cop, cr, dst, src);
 9775   %}
 9776 %}
 9777 
 9778 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9779 %{
 9780   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9781 
 9782   ins_cost(200); // XXX
 9783   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9784             "movss     $dst, $src\n"
 9785     "skip:" %}
 9786   ins_encode %{
 9787     Label Lskip;
 9788     // Invert sense of branch from sense of CMOV
 9789     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9790     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9791     __ bind(Lskip);
 9792   %}
 9793   ins_pipe(pipe_slow);
 9794 %}
 9795 
 9796 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9797 %{
 9798   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9799 
 9800   ins_cost(200); // XXX
 9801   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9802             "movsd     $dst, $src\n"
 9803     "skip:" %}
 9804   ins_encode %{
 9805     Label Lskip;
 9806     // Invert sense of branch from sense of CMOV
 9807     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9808     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9809     __ bind(Lskip);
 9810   %}
 9811   ins_pipe(pipe_slow);
 9812 %}
 9813 
 9814 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9815 %{
 9816   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9817 
 9818   ins_cost(200); // XXX
 9819   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9820             "movsd     $dst, $src\n"
 9821     "skip:" %}
 9822   ins_encode %{
 9823     Label Lskip;
 9824     // Invert sense of branch from sense of CMOV
 9825     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9826     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9827     __ bind(Lskip);
 9828   %}
 9829   ins_pipe(pipe_slow);
 9830 %}
 9831 
 9832 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9833   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9834 
 9835   ins_cost(200);
 9836   expand %{
 9837     cmovD_regU(cop, cr, dst, src);
 9838   %}
 9839 %}
 9840 
 9841 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9842 %{
 9843   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9844 
 9845   ins_cost(200); // XXX
 9846   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9847             "movsd     $dst, $src\n"
 9848     "skip:" %}
 9849   ins_encode %{
 9850     Label Lskip;
 9851     // Invert sense of branch from sense of CMOV
 9852     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9853     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9854     __ bind(Lskip);
 9855   %}
 9856   ins_pipe(pipe_slow);
 9857 %}
 9858 
 9859 //----------Arithmetic Instructions--------------------------------------------
 9860 //----------Addition Instructions----------------------------------------------
 9861 
 9862 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9863 %{
 9864   predicate(!UseAPX);
 9865   match(Set dst (AddI dst src));
 9866   effect(KILL cr);
 9867   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9868   format %{ "addl    $dst, $src\t# int" %}
 9869   ins_encode %{
 9870     __ addl($dst$$Register, $src$$Register);
 9871   %}
 9872   ins_pipe(ialu_reg_reg);
 9873 %}
 9874 
 9875 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9876 %{
 9877   predicate(UseAPX);
 9878   match(Set dst (AddI src1 src2));
 9879   effect(KILL cr);
 9880   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9881 
 9882   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9883   ins_encode %{
 9884     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9885   %}
 9886   ins_pipe(ialu_reg_reg);
 9887 %}
 9888 
 9889 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9890 %{
 9891   predicate(!UseAPX);
 9892   match(Set dst (AddI dst src));
 9893   effect(KILL cr);
 9894   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9895 
 9896   format %{ "addl    $dst, $src\t# int" %}
 9897   ins_encode %{
 9898     __ addl($dst$$Register, $src$$constant);
 9899   %}
 9900   ins_pipe( ialu_reg );
 9901 %}
 9902 
 9903 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9904 %{
 9905   predicate(UseAPX);
 9906   match(Set dst (AddI src1 src2));
 9907   effect(KILL cr);
 9908   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9909 
 9910   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9911   ins_encode %{
 9912     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9913   %}
 9914   ins_pipe( ialu_reg );
 9915 %}
 9916 
 9917 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
 9918 %{
 9919   match(Set dst (AddI dst (LoadI src)));
 9920   effect(KILL cr);
 9921   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9922 
 9923   ins_cost(150); // XXX
 9924   format %{ "addl    $dst, $src\t# int" %}
 9925   ins_encode %{
 9926     __ addl($dst$$Register, $src$$Address);
 9927   %}
 9928   ins_pipe(ialu_reg_mem);
 9929 %}
 9930 
 9931 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
 9932 %{
 9933   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9934   effect(KILL cr);
 9935   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9936 
 9937   ins_cost(150); // XXX
 9938   format %{ "addl    $dst, $src\t# int" %}
 9939   ins_encode %{
 9940     __ addl($dst$$Address, $src$$Register);
 9941   %}
 9942   ins_pipe(ialu_mem_reg);
 9943 %}
 9944 
 9945 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
 9946 %{
 9947   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9948   effect(KILL cr);
 9949   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9950 
 9951 
 9952   ins_cost(125); // XXX
 9953   format %{ "addl    $dst, $src\t# int" %}
 9954   ins_encode %{
 9955     __ addl($dst$$Address, $src$$constant);
 9956   %}
 9957   ins_pipe(ialu_mem_imm);
 9958 %}
 9959 
 9960 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
 9961 %{
 9962   predicate(!UseAPX && UseIncDec);
 9963   match(Set dst (AddI dst src));
 9964   effect(KILL cr);
 9965 
 9966   format %{ "incl    $dst\t# int" %}
 9967   ins_encode %{
 9968     __ incrementl($dst$$Register);
 9969   %}
 9970   ins_pipe(ialu_reg);
 9971 %}
 9972 
 9973 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
 9974 %{
 9975   predicate(UseAPX && UseIncDec);
 9976   match(Set dst (AddI src val));
 9977   effect(KILL cr);
 9978   flag(PD::Flag_ndd_demotable_opr1);
 9979 
 9980   format %{ "eincl    $dst, $src\t# int ndd" %}
 9981   ins_encode %{
 9982     __ eincl($dst$$Register, $src$$Register, false);
 9983   %}
 9984   ins_pipe(ialu_reg);
 9985 %}
 9986 
 9987 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
 9988 %{
 9989   predicate(UseIncDec);
 9990   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 9991   effect(KILL cr);
 9992 
 9993   ins_cost(125); // XXX
 9994   format %{ "incl    $dst\t# int" %}
 9995   ins_encode %{
 9996     __ incrementl($dst$$Address);
 9997   %}
 9998   ins_pipe(ialu_mem_imm);
 9999 %}
10000 
10001 // XXX why does that use AddI
10002 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10003 %{
10004   predicate(!UseAPX && UseIncDec);
10005   match(Set dst (AddI dst src));
10006   effect(KILL cr);
10007 
10008   format %{ "decl    $dst\t# int" %}
10009   ins_encode %{
10010     __ decrementl($dst$$Register);
10011   %}
10012   ins_pipe(ialu_reg);
10013 %}
10014 
10015 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10016 %{
10017   predicate(UseAPX && UseIncDec);
10018   match(Set dst (AddI src val));
10019   effect(KILL cr);
10020   flag(PD::Flag_ndd_demotable_opr1);
10021 
10022   format %{ "edecl    $dst, $src\t# int ndd" %}
10023   ins_encode %{
10024     __ edecl($dst$$Register, $src$$Register, false);
10025   %}
10026   ins_pipe(ialu_reg);
10027 %}
10028 
10029 // XXX why does that use AddI
10030 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10031 %{
10032   predicate(UseIncDec);
10033   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10034   effect(KILL cr);
10035 
10036   ins_cost(125); // XXX
10037   format %{ "decl    $dst\t# int" %}
10038   ins_encode %{
10039     __ decrementl($dst$$Address);
10040   %}
10041   ins_pipe(ialu_mem_imm);
10042 %}
10043 
10044 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10045 %{
10046   predicate(VM_Version::supports_fast_2op_lea());
10047   match(Set dst (AddI (LShiftI index scale) disp));
10048 
10049   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10050   ins_encode %{
10051     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10052     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10053   %}
10054   ins_pipe(ialu_reg_reg);
10055 %}
10056 
10057 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10058 %{
10059   predicate(VM_Version::supports_fast_3op_lea());
10060   match(Set dst (AddI (AddI base index) disp));
10061 
10062   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10063   ins_encode %{
10064     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10065   %}
10066   ins_pipe(ialu_reg_reg);
10067 %}
10068 
10069 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10070 %{
10071   predicate(VM_Version::supports_fast_2op_lea());
10072   match(Set dst (AddI base (LShiftI index scale)));
10073 
10074   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10075   ins_encode %{
10076     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10077     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10078   %}
10079   ins_pipe(ialu_reg_reg);
10080 %}
10081 
10082 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10083 %{
10084   predicate(VM_Version::supports_fast_3op_lea());
10085   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10086 
10087   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10088   ins_encode %{
10089     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10090     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10091   %}
10092   ins_pipe(ialu_reg_reg);
10093 %}
10094 
10095 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10096 %{
10097   predicate(!UseAPX);
10098   match(Set dst (AddL dst src));
10099   effect(KILL cr);
10100   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10101 
10102   format %{ "addq    $dst, $src\t# long" %}
10103   ins_encode %{
10104     __ addq($dst$$Register, $src$$Register);
10105   %}
10106   ins_pipe(ialu_reg_reg);
10107 %}
10108 
10109 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10110 %{
10111   predicate(UseAPX);
10112   match(Set dst (AddL src1 src2));
10113   effect(KILL cr);
10114   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10115 
10116   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10117   ins_encode %{
10118     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10119   %}
10120   ins_pipe(ialu_reg_reg);
10121 %}
10122 
10123 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10124 %{
10125   predicate(!UseAPX);
10126   match(Set dst (AddL dst src));
10127   effect(KILL cr);
10128   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10129 
10130   format %{ "addq    $dst, $src\t# long" %}
10131   ins_encode %{
10132     __ addq($dst$$Register, $src$$constant);
10133   %}
10134   ins_pipe( ialu_reg );
10135 %}
10136 
10137 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10138 %{
10139   predicate(UseAPX);
10140   match(Set dst (AddL src1 src2));
10141   effect(KILL cr);
10142   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10143 
10144   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10145   ins_encode %{
10146     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10147   %}
10148   ins_pipe( ialu_reg );
10149 %}
10150 
10151 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10152 %{
10153   match(Set dst (AddL dst (LoadL src)));
10154   effect(KILL cr);
10155   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10156 
10157   ins_cost(150); // XXX
10158   format %{ "addq    $dst, $src\t# long" %}
10159   ins_encode %{
10160     __ addq($dst$$Register, $src$$Address);
10161   %}
10162   ins_pipe(ialu_reg_mem);
10163 %}
10164 
10165 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10166 %{
10167   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10168   effect(KILL cr);
10169   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10170 
10171   ins_cost(150); // XXX
10172   format %{ "addq    $dst, $src\t# long" %}
10173   ins_encode %{
10174     __ addq($dst$$Address, $src$$Register);
10175   %}
10176   ins_pipe(ialu_mem_reg);
10177 %}
10178 
10179 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10180 %{
10181   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10182   effect(KILL cr);
10183   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10184 
10185   ins_cost(125); // XXX
10186   format %{ "addq    $dst, $src\t# long" %}
10187   ins_encode %{
10188     __ addq($dst$$Address, $src$$constant);
10189   %}
10190   ins_pipe(ialu_mem_imm);
10191 %}
10192 
10193 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10194 %{
10195   predicate(!UseAPX && UseIncDec);
10196   match(Set dst (AddL dst src));
10197   effect(KILL cr);
10198 
10199   format %{ "incq    $dst\t# long" %}
10200   ins_encode %{
10201     __ incrementq($dst$$Register);
10202   %}
10203   ins_pipe(ialu_reg);
10204 %}
10205 
10206 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10207 %{
10208   predicate(UseAPX && UseIncDec);
10209   match(Set dst (AddL src val));
10210   effect(KILL cr);
10211   flag(PD::Flag_ndd_demotable_opr1);
10212 
10213   format %{ "eincq    $dst, $src\t# long ndd" %}
10214   ins_encode %{
10215     __ eincq($dst$$Register, $src$$Register, false);
10216   %}
10217   ins_pipe(ialu_reg);
10218 %}
10219 
10220 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10221 %{
10222   predicate(UseIncDec);
10223   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10224   effect(KILL cr);
10225 
10226   ins_cost(125); // XXX
10227   format %{ "incq    $dst\t# long" %}
10228   ins_encode %{
10229     __ incrementq($dst$$Address);
10230   %}
10231   ins_pipe(ialu_mem_imm);
10232 %}
10233 
10234 // XXX why does that use AddL
10235 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10236 %{
10237   predicate(!UseAPX && UseIncDec);
10238   match(Set dst (AddL dst src));
10239   effect(KILL cr);
10240 
10241   format %{ "decq    $dst\t# long" %}
10242   ins_encode %{
10243     __ decrementq($dst$$Register);
10244   %}
10245   ins_pipe(ialu_reg);
10246 %}
10247 
10248 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10249 %{
10250   predicate(UseAPX && UseIncDec);
10251   match(Set dst (AddL src val));
10252   effect(KILL cr);
10253   flag(PD::Flag_ndd_demotable_opr1);
10254 
10255   format %{ "edecq    $dst, $src\t# long ndd" %}
10256   ins_encode %{
10257     __ edecq($dst$$Register, $src$$Register, false);
10258   %}
10259   ins_pipe(ialu_reg);
10260 %}
10261 
10262 // XXX why does that use AddL
10263 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10264 %{
10265   predicate(UseIncDec);
10266   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10267   effect(KILL cr);
10268 
10269   ins_cost(125); // XXX
10270   format %{ "decq    $dst\t# long" %}
10271   ins_encode %{
10272     __ decrementq($dst$$Address);
10273   %}
10274   ins_pipe(ialu_mem_imm);
10275 %}
10276 
10277 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10278 %{
10279   predicate(VM_Version::supports_fast_2op_lea());
10280   match(Set dst (AddL (LShiftL index scale) disp));
10281 
10282   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10283   ins_encode %{
10284     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10285     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10286   %}
10287   ins_pipe(ialu_reg_reg);
10288 %}
10289 
10290 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10291 %{
10292   predicate(VM_Version::supports_fast_3op_lea());
10293   match(Set dst (AddL (AddL base index) disp));
10294 
10295   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10296   ins_encode %{
10297     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10298   %}
10299   ins_pipe(ialu_reg_reg);
10300 %}
10301 
10302 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10303 %{
10304   predicate(VM_Version::supports_fast_2op_lea());
10305   match(Set dst (AddL base (LShiftL index scale)));
10306 
10307   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10308   ins_encode %{
10309     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10310     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10311   %}
10312   ins_pipe(ialu_reg_reg);
10313 %}
10314 
10315 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10316 %{
10317   predicate(VM_Version::supports_fast_3op_lea());
10318   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10319 
10320   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10321   ins_encode %{
10322     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10323     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10324   %}
10325   ins_pipe(ialu_reg_reg);
10326 %}
10327 
10328 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10329 %{
10330   match(Set dst (AddP dst src));
10331   effect(KILL cr);
10332   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10333 
10334   format %{ "addq    $dst, $src\t# ptr" %}
10335   ins_encode %{
10336     __ addq($dst$$Register, $src$$Register);
10337   %}
10338   ins_pipe(ialu_reg_reg);
10339 %}
10340 
10341 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10342 %{
10343   match(Set dst (AddP dst src));
10344   effect(KILL cr);
10345   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10346 
10347   format %{ "addq    $dst, $src\t# ptr" %}
10348   ins_encode %{
10349     __ addq($dst$$Register, $src$$constant);
10350   %}
10351   ins_pipe( ialu_reg );
10352 %}
10353 
10354 // XXX addP mem ops ????
10355 
10356 instruct checkCastPP(rRegP dst)
10357 %{
10358   match(Set dst (CheckCastPP dst));
10359 
10360   size(0);
10361   format %{ "# checkcastPP of $dst" %}
10362   ins_encode(/* empty encoding */);
10363   ins_pipe(empty);
10364 %}
10365 
10366 instruct castPP(rRegP dst)
10367 %{
10368   match(Set dst (CastPP dst));
10369 
10370   size(0);
10371   format %{ "# castPP of $dst" %}
10372   ins_encode(/* empty encoding */);
10373   ins_pipe(empty);
10374 %}
10375 
10376 instruct castII(rRegI dst)
10377 %{
10378   predicate(VerifyConstraintCasts == 0);
10379   match(Set dst (CastII dst));
10380 
10381   size(0);
10382   format %{ "# castII of $dst" %}
10383   ins_encode(/* empty encoding */);
10384   ins_cost(0);
10385   ins_pipe(empty);
10386 %}
10387 
10388 instruct castII_checked(rRegI dst, rFlagsReg cr)
10389 %{
10390   predicate(VerifyConstraintCasts > 0);
10391   match(Set dst (CastII dst));
10392 
10393   effect(KILL cr);
10394   format %{ "# cast_checked_II $dst" %}
10395   ins_encode %{
10396     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10397   %}
10398   ins_pipe(pipe_slow);
10399 %}
10400 
10401 instruct castLL(rRegL dst)
10402 %{
10403   predicate(VerifyConstraintCasts == 0);
10404   match(Set dst (CastLL dst));
10405 
10406   size(0);
10407   format %{ "# castLL of $dst" %}
10408   ins_encode(/* empty encoding */);
10409   ins_cost(0);
10410   ins_pipe(empty);
10411 %}
10412 
10413 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10414 %{
10415   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10416   match(Set dst (CastLL dst));
10417 
10418   effect(KILL cr);
10419   format %{ "# cast_checked_LL $dst" %}
10420   ins_encode %{
10421     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10422   %}
10423   ins_pipe(pipe_slow);
10424 %}
10425 
10426 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10427 %{
10428   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10429   match(Set dst (CastLL dst));
10430 
10431   effect(KILL cr, TEMP tmp);
10432   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10433   ins_encode %{
10434     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10435   %}
10436   ins_pipe(pipe_slow);
10437 %}
10438 
10439 instruct castFF(regF dst)
10440 %{
10441   match(Set dst (CastFF dst));
10442 
10443   size(0);
10444   format %{ "# castFF of $dst" %}
10445   ins_encode(/* empty encoding */);
10446   ins_cost(0);
10447   ins_pipe(empty);
10448 %}
10449 
10450 instruct castHH(regF dst)
10451 %{
10452   match(Set dst (CastHH dst));
10453 
10454   size(0);
10455   format %{ "# castHH of $dst" %}
10456   ins_encode(/* empty encoding */);
10457   ins_cost(0);
10458   ins_pipe(empty);
10459 %}
10460 
10461 instruct castDD(regD dst)
10462 %{
10463   match(Set dst (CastDD dst));
10464 
10465   size(0);
10466   format %{ "# castDD of $dst" %}
10467   ins_encode(/* empty encoding */);
10468   ins_cost(0);
10469   ins_pipe(empty);
10470 %}
10471 
10472 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10473 instruct compareAndSwapP(rRegI res,
10474                          memory mem_ptr,
10475                          rax_RegP oldval, rRegP newval,
10476                          rFlagsReg cr)
10477 %{
10478   predicate(n->as_LoadStore()->barrier_data() == 0);
10479   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10480   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10481   effect(KILL cr, KILL oldval);
10482 
10483   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10484             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10485             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10486   ins_encode %{
10487     __ lock();
10488     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10489     __ setcc(Assembler::equal, $res$$Register);
10490   %}
10491   ins_pipe( pipe_cmpxchg );
10492 %}
10493 
10494 instruct compareAndSwapL(rRegI res,
10495                          memory mem_ptr,
10496                          rax_RegL oldval, rRegL newval,
10497                          rFlagsReg cr)
10498 %{
10499   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10500   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10501   effect(KILL cr, KILL oldval);
10502 
10503   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10504             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10505             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10506   ins_encode %{
10507     __ lock();
10508     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10509     __ setcc(Assembler::equal, $res$$Register);
10510   %}
10511   ins_pipe( pipe_cmpxchg );
10512 %}
10513 
10514 instruct compareAndSwapI(rRegI res,
10515                          memory mem_ptr,
10516                          rax_RegI oldval, rRegI newval,
10517                          rFlagsReg cr)
10518 %{
10519   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10520   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10521   effect(KILL cr, KILL oldval);
10522 
10523   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10524             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10525             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10526   ins_encode %{
10527     __ lock();
10528     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10529     __ setcc(Assembler::equal, $res$$Register);
10530   %}
10531   ins_pipe( pipe_cmpxchg );
10532 %}
10533 
10534 instruct compareAndSwapB(rRegI res,
10535                          memory mem_ptr,
10536                          rax_RegI oldval, rRegI newval,
10537                          rFlagsReg cr)
10538 %{
10539   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10540   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10541   effect(KILL cr, KILL oldval);
10542 
10543   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10544             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10545             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10546   ins_encode %{
10547     __ lock();
10548     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10549     __ setcc(Assembler::equal, $res$$Register);
10550   %}
10551   ins_pipe( pipe_cmpxchg );
10552 %}
10553 
10554 instruct compareAndSwapS(rRegI res,
10555                          memory mem_ptr,
10556                          rax_RegI oldval, rRegI newval,
10557                          rFlagsReg cr)
10558 %{
10559   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10560   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10561   effect(KILL cr, KILL oldval);
10562 
10563   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10564             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10565             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10566   ins_encode %{
10567     __ lock();
10568     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10569     __ setcc(Assembler::equal, $res$$Register);
10570   %}
10571   ins_pipe( pipe_cmpxchg );
10572 %}
10573 
10574 instruct compareAndSwapN(rRegI res,
10575                           memory mem_ptr,
10576                           rax_RegN oldval, rRegN newval,
10577                           rFlagsReg cr) %{
10578   predicate(n->as_LoadStore()->barrier_data() == 0);
10579   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10580   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10581   effect(KILL cr, KILL oldval);
10582 
10583   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10584             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10585             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10586   ins_encode %{
10587     __ lock();
10588     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10589     __ setcc(Assembler::equal, $res$$Register);
10590   %}
10591   ins_pipe( pipe_cmpxchg );
10592 %}
10593 
10594 instruct compareAndExchangeB(
10595                          memory mem_ptr,
10596                          rax_RegI oldval, rRegI newval,
10597                          rFlagsReg cr)
10598 %{
10599   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10600   effect(KILL cr);
10601 
10602   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10603             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10604   ins_encode %{
10605     __ lock();
10606     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10607   %}
10608   ins_pipe( pipe_cmpxchg );
10609 %}
10610 
10611 instruct compareAndExchangeS(
10612                          memory mem_ptr,
10613                          rax_RegI oldval, rRegI newval,
10614                          rFlagsReg cr)
10615 %{
10616   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10617   effect(KILL cr);
10618 
10619   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10620             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10621   ins_encode %{
10622     __ lock();
10623     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10624   %}
10625   ins_pipe( pipe_cmpxchg );
10626 %}
10627 
10628 instruct compareAndExchangeI(
10629                          memory mem_ptr,
10630                          rax_RegI oldval, rRegI newval,
10631                          rFlagsReg cr)
10632 %{
10633   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10634   effect(KILL cr);
10635 
10636   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10637             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10638   ins_encode %{
10639     __ lock();
10640     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10641   %}
10642   ins_pipe( pipe_cmpxchg );
10643 %}
10644 
10645 instruct compareAndExchangeL(
10646                          memory mem_ptr,
10647                          rax_RegL oldval, rRegL newval,
10648                          rFlagsReg cr)
10649 %{
10650   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10651   effect(KILL cr);
10652 
10653   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10654             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10655   ins_encode %{
10656     __ lock();
10657     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10658   %}
10659   ins_pipe( pipe_cmpxchg );
10660 %}
10661 
10662 instruct compareAndExchangeN(
10663                           memory mem_ptr,
10664                           rax_RegN oldval, rRegN newval,
10665                           rFlagsReg cr) %{
10666   predicate(n->as_LoadStore()->barrier_data() == 0);
10667   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10668   effect(KILL cr);
10669 
10670   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10671             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10672   ins_encode %{
10673     __ lock();
10674     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10675   %}
10676   ins_pipe( pipe_cmpxchg );
10677 %}
10678 
10679 instruct compareAndExchangeP(
10680                          memory mem_ptr,
10681                          rax_RegP oldval, rRegP newval,
10682                          rFlagsReg cr)
10683 %{
10684   predicate(n->as_LoadStore()->barrier_data() == 0);
10685   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10686   effect(KILL cr);
10687 
10688   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10689             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10690   ins_encode %{
10691     __ lock();
10692     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10693   %}
10694   ins_pipe( pipe_cmpxchg );
10695 %}
10696 
10697 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10698   predicate(n->as_LoadStore()->result_not_used());
10699   match(Set dummy (GetAndAddB mem add));
10700   effect(KILL cr);
10701   format %{ "addb_lock   $mem, $add" %}
10702   ins_encode %{
10703     __ lock();
10704     __ addb($mem$$Address, $add$$Register);
10705   %}
10706   ins_pipe(pipe_cmpxchg);
10707 %}
10708 
10709 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10710   predicate(n->as_LoadStore()->result_not_used());
10711   match(Set dummy (GetAndAddB mem add));
10712   effect(KILL cr);
10713   format %{ "addb_lock   $mem, $add" %}
10714   ins_encode %{
10715     __ lock();
10716     __ addb($mem$$Address, $add$$constant);
10717   %}
10718   ins_pipe(pipe_cmpxchg);
10719 %}
10720 
10721 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10722   predicate(!n->as_LoadStore()->result_not_used());
10723   match(Set newval (GetAndAddB mem newval));
10724   effect(KILL cr);
10725   format %{ "xaddb_lock  $mem, $newval\t# $newval -> byte" %}
10726   ins_encode %{
10727     __ lock();
10728     __ xaddb($mem$$Address, $newval$$Register);
10729     __ narrow_subword_type($newval$$Register, T_BYTE);
10730   %}
10731   ins_pipe(pipe_cmpxchg);
10732 %}
10733 
10734 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10735   predicate(n->as_LoadStore()->result_not_used());
10736   match(Set dummy (GetAndAddS mem add));
10737   effect(KILL cr);
10738   format %{ "addw_lock   $mem, $add" %}
10739   ins_encode %{
10740     __ lock();
10741     __ addw($mem$$Address, $add$$Register);
10742   %}
10743   ins_pipe(pipe_cmpxchg);
10744 %}
10745 
10746 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10747   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10748   match(Set dummy (GetAndAddS mem add));
10749   effect(KILL cr);
10750   format %{ "addw_lock   $mem, $add" %}
10751   ins_encode %{
10752     __ lock();
10753     __ addw($mem$$Address, $add$$constant);
10754   %}
10755   ins_pipe(pipe_cmpxchg);
10756 %}
10757 
10758 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10759   predicate(!n->as_LoadStore()->result_not_used());
10760   match(Set newval (GetAndAddS mem newval));
10761   effect(KILL cr);
10762   format %{ "xaddw_lock  $mem, $newval\t# $newval -> short" %}
10763   ins_encode %{
10764     __ lock();
10765     __ xaddw($mem$$Address, $newval$$Register);
10766     __ narrow_subword_type($newval$$Register, T_SHORT);
10767   %}
10768   ins_pipe(pipe_cmpxchg);
10769 %}
10770 
10771 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10772   predicate(n->as_LoadStore()->result_not_used());
10773   match(Set dummy (GetAndAddI mem add));
10774   effect(KILL cr);
10775   format %{ "addl_lock   $mem, $add" %}
10776   ins_encode %{
10777     __ lock();
10778     __ addl($mem$$Address, $add$$Register);
10779   %}
10780   ins_pipe(pipe_cmpxchg);
10781 %}
10782 
10783 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10784   predicate(n->as_LoadStore()->result_not_used());
10785   match(Set dummy (GetAndAddI mem add));
10786   effect(KILL cr);
10787   format %{ "addl_lock   $mem, $add" %}
10788   ins_encode %{
10789     __ lock();
10790     __ addl($mem$$Address, $add$$constant);
10791   %}
10792   ins_pipe(pipe_cmpxchg);
10793 %}
10794 
10795 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10796   predicate(!n->as_LoadStore()->result_not_used());
10797   match(Set newval (GetAndAddI mem newval));
10798   effect(KILL cr);
10799   format %{ "xaddl_lock  $mem, $newval" %}
10800   ins_encode %{
10801     __ lock();
10802     __ xaddl($mem$$Address, $newval$$Register);
10803   %}
10804   ins_pipe(pipe_cmpxchg);
10805 %}
10806 
10807 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10808   predicate(n->as_LoadStore()->result_not_used());
10809   match(Set dummy (GetAndAddL mem add));
10810   effect(KILL cr);
10811   format %{ "addq_lock   $mem, $add" %}
10812   ins_encode %{
10813     __ lock();
10814     __ addq($mem$$Address, $add$$Register);
10815   %}
10816   ins_pipe(pipe_cmpxchg);
10817 %}
10818 
10819 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
10820   predicate(n->as_LoadStore()->result_not_used());
10821   match(Set dummy (GetAndAddL mem add));
10822   effect(KILL cr);
10823   format %{ "addq_lock   $mem, $add" %}
10824   ins_encode %{
10825     __ lock();
10826     __ addq($mem$$Address, $add$$constant);
10827   %}
10828   ins_pipe(pipe_cmpxchg);
10829 %}
10830 
10831 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
10832   predicate(!n->as_LoadStore()->result_not_used());
10833   match(Set newval (GetAndAddL mem newval));
10834   effect(KILL cr);
10835   format %{ "xaddq_lock  $mem, $newval" %}
10836   ins_encode %{
10837     __ lock();
10838     __ xaddq($mem$$Address, $newval$$Register);
10839   %}
10840   ins_pipe(pipe_cmpxchg);
10841 %}
10842 
10843 instruct xchgB( memory mem, rRegI newval) %{
10844   match(Set newval (GetAndSetB mem newval));
10845   format %{ "XCHGB  $newval,[$mem]\t# $newval -> byte" %}
10846   ins_encode %{
10847     __ xchgb($newval$$Register, $mem$$Address);
10848     __ narrow_subword_type($newval$$Register, T_BYTE);
10849   %}
10850   ins_pipe( pipe_cmpxchg );
10851 %}
10852 
10853 instruct xchgS( memory mem, rRegI newval) %{
10854   match(Set newval (GetAndSetS mem newval));
10855   format %{ "XCHGW  $newval,[$mem]\t# $newval -> short" %}
10856   ins_encode %{
10857     __ xchgw($newval$$Register, $mem$$Address);
10858     __ narrow_subword_type($newval$$Register, T_SHORT);
10859   %}
10860   ins_pipe( pipe_cmpxchg );
10861 %}
10862 
10863 instruct xchgI( memory mem, rRegI newval) %{
10864   match(Set newval (GetAndSetI mem newval));
10865   format %{ "XCHGL  $newval,[$mem]" %}
10866   ins_encode %{
10867     __ xchgl($newval$$Register, $mem$$Address);
10868   %}
10869   ins_pipe( pipe_cmpxchg );
10870 %}
10871 
10872 instruct xchgL( memory mem, rRegL newval) %{
10873   match(Set newval (GetAndSetL mem newval));
10874   format %{ "XCHGL  $newval,[$mem]" %}
10875   ins_encode %{
10876     __ xchgq($newval$$Register, $mem$$Address);
10877   %}
10878   ins_pipe( pipe_cmpxchg );
10879 %}
10880 
10881 instruct xchgP( memory mem, rRegP newval) %{
10882   match(Set newval (GetAndSetP mem newval));
10883   predicate(n->as_LoadStore()->barrier_data() == 0);
10884   format %{ "XCHGQ  $newval,[$mem]" %}
10885   ins_encode %{
10886     __ xchgq($newval$$Register, $mem$$Address);
10887   %}
10888   ins_pipe( pipe_cmpxchg );
10889 %}
10890 
10891 instruct xchgN( memory mem, rRegN newval) %{
10892   predicate(n->as_LoadStore()->barrier_data() == 0);
10893   match(Set newval (GetAndSetN mem newval));
10894   format %{ "XCHGL  $newval,$mem]" %}
10895   ins_encode %{
10896     __ xchgl($newval$$Register, $mem$$Address);
10897   %}
10898   ins_pipe( pipe_cmpxchg );
10899 %}
10900 
10901 //----------Abs Instructions-------------------------------------------
10902 
10903 // Integer Absolute Instructions
10904 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10905 %{
10906   match(Set dst (AbsI src));
10907   effect(TEMP dst, KILL cr);
10908   format %{ "xorl    $dst, $dst\t# abs int\n\t"
10909             "subl    $dst, $src\n\t"
10910             "cmovll  $dst, $src" %}
10911   ins_encode %{
10912     __ xorl($dst$$Register, $dst$$Register);
10913     __ subl($dst$$Register, $src$$Register);
10914     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
10915   %}
10916 
10917   ins_pipe(ialu_reg_reg);
10918 %}
10919 
10920 // Long Absolute Instructions
10921 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10922 %{
10923   match(Set dst (AbsL src));
10924   effect(TEMP dst, KILL cr);
10925   format %{ "xorl    $dst, $dst\t# abs long\n\t"
10926             "subq    $dst, $src\n\t"
10927             "cmovlq  $dst, $src" %}
10928   ins_encode %{
10929     __ xorl($dst$$Register, $dst$$Register);
10930     __ subq($dst$$Register, $src$$Register);
10931     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
10932   %}
10933 
10934   ins_pipe(ialu_reg_reg);
10935 %}
10936 
10937 //----------Subtraction Instructions-------------------------------------------
10938 
10939 // Integer Subtraction Instructions
10940 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
10941 %{
10942   predicate(!UseAPX);
10943   match(Set dst (SubI dst src));
10944   effect(KILL cr);
10945   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10946 
10947   format %{ "subl    $dst, $src\t# int" %}
10948   ins_encode %{
10949     __ subl($dst$$Register, $src$$Register);
10950   %}
10951   ins_pipe(ialu_reg_reg);
10952 %}
10953 
10954 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
10955 %{
10956   predicate(UseAPX);
10957   match(Set dst (SubI src1 src2));
10958   effect(KILL cr);
10959   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10960 
10961   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
10962   ins_encode %{
10963     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
10964   %}
10965   ins_pipe(ialu_reg_reg);
10966 %}
10967 
10968 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
10969 %{
10970   predicate(UseAPX);
10971   match(Set dst (SubI src1 src2));
10972   effect(KILL cr);
10973   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10974 
10975   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
10976   ins_encode %{
10977     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
10978   %}
10979   ins_pipe(ialu_reg_reg);
10980 %}
10981 
10982 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10983 %{
10984   match(Set dst (SubI dst (LoadI src)));
10985   effect(KILL cr);
10986   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10987 
10988   ins_cost(150);
10989   format %{ "subl    $dst, $src\t# int" %}
10990   ins_encode %{
10991     __ subl($dst$$Register, $src$$Address);
10992   %}
10993   ins_pipe(ialu_reg_mem);
10994 %}
10995 
10996 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10997 %{
10998   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
10999   effect(KILL cr);
11000   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11001 
11002   ins_cost(150);
11003   format %{ "subl    $dst, $src\t# int" %}
11004   ins_encode %{
11005     __ subl($dst$$Address, $src$$Register);
11006   %}
11007   ins_pipe(ialu_mem_reg);
11008 %}
11009 
11010 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11011 %{
11012   predicate(!UseAPX);
11013   match(Set dst (SubL dst src));
11014   effect(KILL cr);
11015   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11016 
11017   format %{ "subq    $dst, $src\t# long" %}
11018   ins_encode %{
11019     __ subq($dst$$Register, $src$$Register);
11020   %}
11021   ins_pipe(ialu_reg_reg);
11022 %}
11023 
11024 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11025 %{
11026   predicate(UseAPX);
11027   match(Set dst (SubL src1 src2));
11028   effect(KILL cr);
11029   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11030 
11031   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11032   ins_encode %{
11033     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11034   %}
11035   ins_pipe(ialu_reg_reg);
11036 %}
11037 
11038 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11039 %{
11040   predicate(UseAPX);
11041   match(Set dst (SubL src1 src2));
11042   effect(KILL cr);
11043   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11044 
11045   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11046   ins_encode %{
11047     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11048   %}
11049   ins_pipe(ialu_reg_reg);
11050 %}
11051 
11052 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11053 %{
11054   match(Set dst (SubL dst (LoadL src)));
11055   effect(KILL cr);
11056   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11057 
11058   ins_cost(150);
11059   format %{ "subq    $dst, $src\t# long" %}
11060   ins_encode %{
11061     __ subq($dst$$Register, $src$$Address);
11062   %}
11063   ins_pipe(ialu_reg_mem);
11064 %}
11065 
11066 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11067 %{
11068   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11069   effect(KILL cr);
11070   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11071 
11072   ins_cost(150);
11073   format %{ "subq    $dst, $src\t# long" %}
11074   ins_encode %{
11075     __ subq($dst$$Address, $src$$Register);
11076   %}
11077   ins_pipe(ialu_mem_reg);
11078 %}
11079 
11080 // Subtract from a pointer
11081 // XXX hmpf???
11082 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11083 %{
11084   match(Set dst (AddP dst (SubI zero src)));
11085   effect(KILL cr);
11086 
11087   format %{ "subq    $dst, $src\t# ptr - int" %}
11088   ins_encode %{
11089     __ subq($dst$$Register, $src$$Register);
11090   %}
11091   ins_pipe(ialu_reg_reg);
11092 %}
11093 
11094 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11095 %{
11096   predicate(!UseAPX);
11097   match(Set dst (SubI zero dst));
11098   effect(KILL cr);
11099   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11100 
11101   format %{ "negl    $dst\t# int" %}
11102   ins_encode %{
11103     __ negl($dst$$Register);
11104   %}
11105   ins_pipe(ialu_reg);
11106 %}
11107 
11108 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11109 %{
11110   predicate(UseAPX);
11111   match(Set dst (SubI zero src));
11112   effect(KILL cr);
11113   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11114 
11115   format %{ "enegl    $dst, $src\t# int ndd" %}
11116   ins_encode %{
11117     __ enegl($dst$$Register, $src$$Register, false);
11118   %}
11119   ins_pipe(ialu_reg);
11120 %}
11121 
11122 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11123 %{
11124   predicate(!UseAPX);
11125   match(Set dst (NegI dst));
11126   effect(KILL cr);
11127   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11128 
11129   format %{ "negl    $dst\t# int" %}
11130   ins_encode %{
11131     __ negl($dst$$Register);
11132   %}
11133   ins_pipe(ialu_reg);
11134 %}
11135 
11136 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11137 %{
11138   predicate(UseAPX);
11139   match(Set dst (NegI src));
11140   effect(KILL cr);
11141   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11142 
11143   format %{ "enegl    $dst, $src\t# int ndd" %}
11144   ins_encode %{
11145     __ enegl($dst$$Register, $src$$Register, false);
11146   %}
11147   ins_pipe(ialu_reg);
11148 %}
11149 
11150 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11151 %{
11152   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11153   effect(KILL cr);
11154   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11155 
11156   format %{ "negl    $dst\t# int" %}
11157   ins_encode %{
11158     __ negl($dst$$Address);
11159   %}
11160   ins_pipe(ialu_reg);
11161 %}
11162 
11163 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11164 %{
11165   predicate(!UseAPX);
11166   match(Set dst (SubL zero dst));
11167   effect(KILL cr);
11168   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11169 
11170   format %{ "negq    $dst\t# long" %}
11171   ins_encode %{
11172     __ negq($dst$$Register);
11173   %}
11174   ins_pipe(ialu_reg);
11175 %}
11176 
11177 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11178 %{
11179   predicate(UseAPX);
11180   match(Set dst (SubL zero src));
11181   effect(KILL cr);
11182   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11183 
11184   format %{ "enegq    $dst, $src\t# long ndd" %}
11185   ins_encode %{
11186     __ enegq($dst$$Register, $src$$Register, false);
11187   %}
11188   ins_pipe(ialu_reg);
11189 %}
11190 
11191 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11192 %{
11193   predicate(!UseAPX);
11194   match(Set dst (NegL dst));
11195   effect(KILL cr);
11196   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11197 
11198   format %{ "negq    $dst\t# int" %}
11199   ins_encode %{
11200     __ negq($dst$$Register);
11201   %}
11202   ins_pipe(ialu_reg);
11203 %}
11204 
11205 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11206 %{
11207   predicate(UseAPX);
11208   match(Set dst (NegL src));
11209   effect(KILL cr);
11210   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11211 
11212   format %{ "enegq    $dst, $src\t# long ndd" %}
11213   ins_encode %{
11214     __ enegq($dst$$Register, $src$$Register, false);
11215   %}
11216   ins_pipe(ialu_reg);
11217 %}
11218 
11219 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11220 %{
11221   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11222   effect(KILL cr);
11223   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11224 
11225   format %{ "negq    $dst\t# long" %}
11226   ins_encode %{
11227     __ negq($dst$$Address);
11228   %}
11229   ins_pipe(ialu_reg);
11230 %}
11231 
11232 //----------Multiplication/Division Instructions-------------------------------
11233 // Integer Multiplication Instructions
11234 // Multiply Register
11235 
11236 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11237 %{
11238   predicate(!UseAPX);
11239   match(Set dst (MulI dst src));
11240   effect(KILL cr);
11241 
11242   ins_cost(300);
11243   format %{ "imull   $dst, $src\t# int" %}
11244   ins_encode %{
11245     __ imull($dst$$Register, $src$$Register);
11246   %}
11247   ins_pipe(ialu_reg_reg_alu0);
11248 %}
11249 
11250 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11251 %{
11252   predicate(UseAPX);
11253   match(Set dst (MulI src1 src2));
11254   effect(KILL cr);
11255   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11256 
11257   ins_cost(300);
11258   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11259   ins_encode %{
11260     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11261   %}
11262   ins_pipe(ialu_reg_reg_alu0);
11263 %}
11264 
11265 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11266 %{
11267   match(Set dst (MulI src imm));
11268   effect(KILL cr);
11269 
11270   ins_cost(300);
11271   format %{ "imull   $dst, $src, $imm\t# int" %}
11272   ins_encode %{
11273     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11274   %}
11275   ins_pipe(ialu_reg_reg_alu0);
11276 %}
11277 
11278 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11279 %{
11280   match(Set dst (MulI dst (LoadI src)));
11281   effect(KILL cr);
11282 
11283   ins_cost(350);
11284   format %{ "imull   $dst, $src\t# int" %}
11285   ins_encode %{
11286     __ imull($dst$$Register, $src$$Address);
11287   %}
11288   ins_pipe(ialu_reg_mem_alu0);
11289 %}
11290 
11291 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11292 %{
11293   match(Set dst (MulI (LoadI src) imm));
11294   effect(KILL cr);
11295 
11296   ins_cost(300);
11297   format %{ "imull   $dst, $src, $imm\t# int" %}
11298   ins_encode %{
11299     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11300   %}
11301   ins_pipe(ialu_reg_mem_alu0);
11302 %}
11303 
11304 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11305 %{
11306   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11307   effect(KILL cr, KILL src2);
11308 
11309   expand %{ mulI_rReg(dst, src1, cr);
11310            mulI_rReg(src2, src3, cr);
11311            addI_rReg(dst, src2, cr); %}
11312 %}
11313 
11314 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11315 %{
11316   predicate(!UseAPX);
11317   match(Set dst (MulL dst src));
11318   effect(KILL cr);
11319 
11320   ins_cost(300);
11321   format %{ "imulq   $dst, $src\t# long" %}
11322   ins_encode %{
11323     __ imulq($dst$$Register, $src$$Register);
11324   %}
11325   ins_pipe(ialu_reg_reg_alu0);
11326 %}
11327 
11328 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11329 %{
11330   predicate(UseAPX);
11331   match(Set dst (MulL src1 src2));
11332   effect(KILL cr);
11333   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11334 
11335   ins_cost(300);
11336   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11337   ins_encode %{
11338     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11339   %}
11340   ins_pipe(ialu_reg_reg_alu0);
11341 %}
11342 
11343 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11344 %{
11345   match(Set dst (MulL src imm));
11346   effect(KILL cr);
11347 
11348   ins_cost(300);
11349   format %{ "imulq   $dst, $src, $imm\t# long" %}
11350   ins_encode %{
11351     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11352   %}
11353   ins_pipe(ialu_reg_reg_alu0);
11354 %}
11355 
11356 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11357 %{
11358   match(Set dst (MulL dst (LoadL src)));
11359   effect(KILL cr);
11360 
11361   ins_cost(350);
11362   format %{ "imulq   $dst, $src\t# long" %}
11363   ins_encode %{
11364     __ imulq($dst$$Register, $src$$Address);
11365   %}
11366   ins_pipe(ialu_reg_mem_alu0);
11367 %}
11368 
11369 
11370 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11371 %{
11372   match(Set dst (MulL (LoadL src) imm));
11373   effect(KILL cr);
11374 
11375   ins_cost(300);
11376   format %{ "imulq   $dst, $src, $imm\t# long" %}
11377   ins_encode %{
11378     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11379   %}
11380   ins_pipe(ialu_reg_mem_alu0);
11381 %}
11382 
11383 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11384 %{
11385   match(Set dst (MulHiL src rax));
11386   effect(USE_KILL rax, KILL cr);
11387 
11388   ins_cost(300);
11389   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11390   ins_encode %{
11391     __ imulq($src$$Register);
11392   %}
11393   ins_pipe(ialu_reg_reg_alu0);
11394 %}
11395 
11396 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11397 %{
11398   match(Set dst (UMulHiL src rax));
11399   effect(USE_KILL rax, KILL cr);
11400 
11401   ins_cost(300);
11402   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11403   ins_encode %{
11404     __ mulq($src$$Register);
11405   %}
11406   ins_pipe(ialu_reg_reg_alu0);
11407 %}
11408 
11409 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11410                    rFlagsReg cr)
11411 %{
11412   match(Set rax (DivI rax div));
11413   effect(KILL rdx, KILL cr);
11414 
11415   ins_cost(30*100+10*100); // XXX
11416   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11417             "jne,s   normal\n\t"
11418             "xorl    rdx, rdx\n\t"
11419             "cmpl    $div, -1\n\t"
11420             "je,s    done\n"
11421     "normal: cdql\n\t"
11422             "idivl   $div\n"
11423     "done:"        %}
11424   ins_encode(cdql_enc(div));
11425   ins_pipe(ialu_reg_reg_alu0);
11426 %}
11427 
11428 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11429                    rFlagsReg cr)
11430 %{
11431   match(Set rax (DivL rax div));
11432   effect(KILL rdx, KILL cr);
11433 
11434   ins_cost(30*100+10*100); // XXX
11435   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11436             "cmpq    rax, rdx\n\t"
11437             "jne,s   normal\n\t"
11438             "xorl    rdx, rdx\n\t"
11439             "cmpq    $div, -1\n\t"
11440             "je,s    done\n"
11441     "normal: cdqq\n\t"
11442             "idivq   $div\n"
11443     "done:"        %}
11444   ins_encode(cdqq_enc(div));
11445   ins_pipe(ialu_reg_reg_alu0);
11446 %}
11447 
11448 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11449 %{
11450   match(Set rax (UDivI rax div));
11451   effect(KILL rdx, KILL cr);
11452 
11453   ins_cost(300);
11454   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11455   ins_encode %{
11456     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11457   %}
11458   ins_pipe(ialu_reg_reg_alu0);
11459 %}
11460 
11461 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11462 %{
11463   match(Set rax (UDivL rax div));
11464   effect(KILL rdx, KILL cr);
11465 
11466   ins_cost(300);
11467   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11468   ins_encode %{
11469      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11470   %}
11471   ins_pipe(ialu_reg_reg_alu0);
11472 %}
11473 
11474 // Integer DIVMOD with Register, both quotient and mod results
11475 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11476                              rFlagsReg cr)
11477 %{
11478   match(DivModI rax div);
11479   effect(KILL cr);
11480 
11481   ins_cost(30*100+10*100); // XXX
11482   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11483             "jne,s   normal\n\t"
11484             "xorl    rdx, rdx\n\t"
11485             "cmpl    $div, -1\n\t"
11486             "je,s    done\n"
11487     "normal: cdql\n\t"
11488             "idivl   $div\n"
11489     "done:"        %}
11490   ins_encode(cdql_enc(div));
11491   ins_pipe(pipe_slow);
11492 %}
11493 
11494 // Long DIVMOD with Register, both quotient and mod results
11495 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11496                              rFlagsReg cr)
11497 %{
11498   match(DivModL rax div);
11499   effect(KILL cr);
11500 
11501   ins_cost(30*100+10*100); // XXX
11502   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11503             "cmpq    rax, rdx\n\t"
11504             "jne,s   normal\n\t"
11505             "xorl    rdx, rdx\n\t"
11506             "cmpq    $div, -1\n\t"
11507             "je,s    done\n"
11508     "normal: cdqq\n\t"
11509             "idivq   $div\n"
11510     "done:"        %}
11511   ins_encode(cdqq_enc(div));
11512   ins_pipe(pipe_slow);
11513 %}
11514 
11515 // Unsigned integer DIVMOD with Register, both quotient and mod results
11516 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11517                               no_rax_rdx_RegI div, rFlagsReg cr)
11518 %{
11519   match(UDivModI rax div);
11520   effect(TEMP tmp, KILL cr);
11521 
11522   ins_cost(300);
11523   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11524             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11525           %}
11526   ins_encode %{
11527     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11528   %}
11529   ins_pipe(pipe_slow);
11530 %}
11531 
11532 // Unsigned long DIVMOD with Register, both quotient and mod results
11533 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11534                               no_rax_rdx_RegL div, rFlagsReg cr)
11535 %{
11536   match(UDivModL rax div);
11537   effect(TEMP tmp, KILL cr);
11538 
11539   ins_cost(300);
11540   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11541             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11542           %}
11543   ins_encode %{
11544     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11545   %}
11546   ins_pipe(pipe_slow);
11547 %}
11548 
11549 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11550                    rFlagsReg cr)
11551 %{
11552   match(Set rdx (ModI rax div));
11553   effect(KILL rax, KILL cr);
11554 
11555   ins_cost(300); // XXX
11556   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11557             "jne,s   normal\n\t"
11558             "xorl    rdx, rdx\n\t"
11559             "cmpl    $div, -1\n\t"
11560             "je,s    done\n"
11561     "normal: cdql\n\t"
11562             "idivl   $div\n"
11563     "done:"        %}
11564   ins_encode(cdql_enc(div));
11565   ins_pipe(ialu_reg_reg_alu0);
11566 %}
11567 
11568 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11569                    rFlagsReg cr)
11570 %{
11571   match(Set rdx (ModL rax div));
11572   effect(KILL rax, KILL cr);
11573 
11574   ins_cost(300); // XXX
11575   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11576             "cmpq    rax, rdx\n\t"
11577             "jne,s   normal\n\t"
11578             "xorl    rdx, rdx\n\t"
11579             "cmpq    $div, -1\n\t"
11580             "je,s    done\n"
11581     "normal: cdqq\n\t"
11582             "idivq   $div\n"
11583     "done:"        %}
11584   ins_encode(cdqq_enc(div));
11585   ins_pipe(ialu_reg_reg_alu0);
11586 %}
11587 
11588 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11589 %{
11590   match(Set rdx (UModI rax div));
11591   effect(KILL rax, KILL cr);
11592 
11593   ins_cost(300);
11594   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11595   ins_encode %{
11596     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11597   %}
11598   ins_pipe(ialu_reg_reg_alu0);
11599 %}
11600 
11601 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11602 %{
11603   match(Set rdx (UModL rax div));
11604   effect(KILL rax, KILL cr);
11605 
11606   ins_cost(300);
11607   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11608   ins_encode %{
11609     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11610   %}
11611   ins_pipe(ialu_reg_reg_alu0);
11612 %}
11613 
11614 // Integer Shift Instructions
11615 // Shift Left by one, two, three
11616 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11617 %{
11618   predicate(!UseAPX);
11619   match(Set dst (LShiftI dst shift));
11620   effect(KILL cr);
11621 
11622   format %{ "sall    $dst, $shift" %}
11623   ins_encode %{
11624     __ sall($dst$$Register, $shift$$constant);
11625   %}
11626   ins_pipe(ialu_reg);
11627 %}
11628 
11629 // Shift Left by one, two, three
11630 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11631 %{
11632   predicate(UseAPX);
11633   match(Set dst (LShiftI src shift));
11634   effect(KILL cr);
11635   flag(PD::Flag_ndd_demotable_opr1);
11636 
11637   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11638   ins_encode %{
11639     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11640   %}
11641   ins_pipe(ialu_reg);
11642 %}
11643 
11644 // Shift Left by 8-bit immediate
11645 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11646 %{
11647   predicate(!UseAPX);
11648   match(Set dst (LShiftI dst shift));
11649   effect(KILL cr);
11650 
11651   format %{ "sall    $dst, $shift" %}
11652   ins_encode %{
11653     __ sall($dst$$Register, $shift$$constant);
11654   %}
11655   ins_pipe(ialu_reg);
11656 %}
11657 
11658 // Shift Left by 8-bit immediate
11659 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11660 %{
11661   predicate(UseAPX);
11662   match(Set dst (LShiftI src shift));
11663   effect(KILL cr);
11664   flag(PD::Flag_ndd_demotable_opr1);
11665 
11666   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11667   ins_encode %{
11668     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11669   %}
11670   ins_pipe(ialu_reg);
11671 %}
11672 
11673 // Shift Left by 8-bit immediate
11674 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11675 %{
11676   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11677   effect(KILL cr);
11678 
11679   format %{ "sall    $dst, $shift" %}
11680   ins_encode %{
11681     __ sall($dst$$Address, $shift$$constant);
11682   %}
11683   ins_pipe(ialu_mem_imm);
11684 %}
11685 
11686 // Shift Left by variable
11687 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11688 %{
11689   predicate(!VM_Version::supports_bmi2());
11690   match(Set dst (LShiftI dst shift));
11691   effect(KILL cr);
11692 
11693   format %{ "sall    $dst, $shift" %}
11694   ins_encode %{
11695     __ sall($dst$$Register);
11696   %}
11697   ins_pipe(ialu_reg_reg);
11698 %}
11699 
11700 // Shift Left by variable
11701 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11702 %{
11703   predicate(!VM_Version::supports_bmi2());
11704   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11705   effect(KILL cr);
11706 
11707   format %{ "sall    $dst, $shift" %}
11708   ins_encode %{
11709     __ sall($dst$$Address);
11710   %}
11711   ins_pipe(ialu_mem_reg);
11712 %}
11713 
11714 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11715 %{
11716   predicate(VM_Version::supports_bmi2());
11717   match(Set dst (LShiftI src shift));
11718 
11719   format %{ "shlxl   $dst, $src, $shift" %}
11720   ins_encode %{
11721     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
11722   %}
11723   ins_pipe(ialu_reg_reg);
11724 %}
11725 
11726 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
11727 %{
11728   predicate(VM_Version::supports_bmi2());
11729   match(Set dst (LShiftI (LoadI src) shift));
11730   ins_cost(175);
11731   format %{ "shlxl   $dst, $src, $shift" %}
11732   ins_encode %{
11733     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
11734   %}
11735   ins_pipe(ialu_reg_mem);
11736 %}
11737 
11738 // Arithmetic Shift Right by 8-bit immediate
11739 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11740 %{
11741   predicate(!UseAPX);
11742   match(Set dst (RShiftI dst shift));
11743   effect(KILL cr);
11744 
11745   format %{ "sarl    $dst, $shift" %}
11746   ins_encode %{
11747     __ sarl($dst$$Register, $shift$$constant);
11748   %}
11749   ins_pipe(ialu_mem_imm);
11750 %}
11751 
11752 // Arithmetic Shift Right by 8-bit immediate
11753 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11754 %{
11755   predicate(UseAPX);
11756   match(Set dst (RShiftI src shift));
11757   effect(KILL cr);
11758   flag(PD::Flag_ndd_demotable_opr1);
11759 
11760   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
11761   ins_encode %{
11762     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
11763   %}
11764   ins_pipe(ialu_mem_imm);
11765 %}
11766 
11767 // Arithmetic Shift Right by 8-bit immediate
11768 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11769 %{
11770   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11771   effect(KILL cr);
11772 
11773   format %{ "sarl    $dst, $shift" %}
11774   ins_encode %{
11775     __ sarl($dst$$Address, $shift$$constant);
11776   %}
11777   ins_pipe(ialu_mem_imm);
11778 %}
11779 
11780 // Arithmetic Shift Right by variable
11781 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11782 %{
11783   predicate(!VM_Version::supports_bmi2());
11784   match(Set dst (RShiftI dst shift));
11785   effect(KILL cr);
11786 
11787   format %{ "sarl    $dst, $shift" %}
11788   ins_encode %{
11789     __ sarl($dst$$Register);
11790   %}
11791   ins_pipe(ialu_reg_reg);
11792 %}
11793 
11794 // Arithmetic Shift Right by variable
11795 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11796 %{
11797   predicate(!VM_Version::supports_bmi2());
11798   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
11799   effect(KILL cr);
11800 
11801   format %{ "sarl    $dst, $shift" %}
11802   ins_encode %{
11803     __ sarl($dst$$Address);
11804   %}
11805   ins_pipe(ialu_mem_reg);
11806 %}
11807 
11808 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11809 %{
11810   predicate(VM_Version::supports_bmi2());
11811   match(Set dst (RShiftI src shift));
11812 
11813   format %{ "sarxl   $dst, $src, $shift" %}
11814   ins_encode %{
11815     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
11816   %}
11817   ins_pipe(ialu_reg_reg);
11818 %}
11819 
11820 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
11821 %{
11822   predicate(VM_Version::supports_bmi2());
11823   match(Set dst (RShiftI (LoadI src) shift));
11824   ins_cost(175);
11825   format %{ "sarxl   $dst, $src, $shift" %}
11826   ins_encode %{
11827     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
11828   %}
11829   ins_pipe(ialu_reg_mem);
11830 %}
11831 
11832 // Logical Shift Right by 8-bit immediate
11833 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11834 %{
11835   predicate(!UseAPX);
11836   match(Set dst (URShiftI dst shift));
11837   effect(KILL cr);
11838 
11839   format %{ "shrl    $dst, $shift" %}
11840   ins_encode %{
11841     __ shrl($dst$$Register, $shift$$constant);
11842   %}
11843   ins_pipe(ialu_reg);
11844 %}
11845 
11846 // Logical Shift Right by 8-bit immediate
11847 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11848 %{
11849   predicate(UseAPX);
11850   match(Set dst (URShiftI src shift));
11851   effect(KILL cr);
11852   flag(PD::Flag_ndd_demotable_opr1);
11853 
11854   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
11855   ins_encode %{
11856     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
11857   %}
11858   ins_pipe(ialu_reg);
11859 %}
11860 
11861 // Logical Shift Right by 8-bit immediate
11862 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11863 %{
11864   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
11865   effect(KILL cr);
11866 
11867   format %{ "shrl    $dst, $shift" %}
11868   ins_encode %{
11869     __ shrl($dst$$Address, $shift$$constant);
11870   %}
11871   ins_pipe(ialu_mem_imm);
11872 %}
11873 
11874 // Logical Shift Right by variable
11875 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
11876 %{
11877   predicate(!VM_Version::supports_bmi2());
11878   match(Set dst (URShiftI dst shift));
11879   effect(KILL cr);
11880 
11881   format %{ "shrl    $dst, $shift" %}
11882   ins_encode %{
11883     __ shrl($dst$$Register);
11884   %}
11885   ins_pipe(ialu_reg_reg);
11886 %}
11887 
11888 // Logical Shift Right by variable
11889 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
11890 %{
11891   predicate(!VM_Version::supports_bmi2());
11892   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
11893   effect(KILL cr);
11894 
11895   format %{ "shrl    $dst, $shift" %}
11896   ins_encode %{
11897     __ shrl($dst$$Address);
11898   %}
11899   ins_pipe(ialu_mem_reg);
11900 %}
11901 
11902 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
11903 %{
11904   predicate(VM_Version::supports_bmi2());
11905   match(Set dst (URShiftI src shift));
11906 
11907   format %{ "shrxl   $dst, $src, $shift" %}
11908   ins_encode %{
11909     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
11910   %}
11911   ins_pipe(ialu_reg_reg);
11912 %}
11913 
11914 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
11915 %{
11916   predicate(VM_Version::supports_bmi2());
11917   match(Set dst (URShiftI (LoadI src) shift));
11918   ins_cost(175);
11919   format %{ "shrxl   $dst, $src, $shift" %}
11920   ins_encode %{
11921     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
11922   %}
11923   ins_pipe(ialu_reg_mem);
11924 %}
11925 
11926 // Long Shift Instructions
11927 // Shift Left by one, two, three
11928 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
11929 %{
11930   predicate(!UseAPX);
11931   match(Set dst (LShiftL dst shift));
11932   effect(KILL cr);
11933 
11934   format %{ "salq    $dst, $shift" %}
11935   ins_encode %{
11936     __ salq($dst$$Register, $shift$$constant);
11937   %}
11938   ins_pipe(ialu_reg);
11939 %}
11940 
11941 // Shift Left by one, two, three
11942 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
11943 %{
11944   predicate(UseAPX);
11945   match(Set dst (LShiftL src shift));
11946   effect(KILL cr);
11947   flag(PD::Flag_ndd_demotable_opr1);
11948 
11949   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
11950   ins_encode %{
11951     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
11952   %}
11953   ins_pipe(ialu_reg);
11954 %}
11955 
11956 // Shift Left by 8-bit immediate
11957 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
11958 %{
11959   predicate(!UseAPX);
11960   match(Set dst (LShiftL dst shift));
11961   effect(KILL cr);
11962 
11963   format %{ "salq    $dst, $shift" %}
11964   ins_encode %{
11965     __ salq($dst$$Register, $shift$$constant);
11966   %}
11967   ins_pipe(ialu_reg);
11968 %}
11969 
11970 // Shift Left by 8-bit immediate
11971 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
11972 %{
11973   predicate(UseAPX);
11974   match(Set dst (LShiftL src shift));
11975   effect(KILL cr);
11976   flag(PD::Flag_ndd_demotable_opr1);
11977 
11978   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
11979   ins_encode %{
11980     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
11981   %}
11982   ins_pipe(ialu_reg);
11983 %}
11984 
11985 // Shift Left by 8-bit immediate
11986 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11987 %{
11988   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
11989   effect(KILL cr);
11990 
11991   format %{ "salq    $dst, $shift" %}
11992   ins_encode %{
11993     __ salq($dst$$Address, $shift$$constant);
11994   %}
11995   ins_pipe(ialu_mem_imm);
11996 %}
11997 
11998 // Shift Left by variable
11999 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12000 %{
12001   predicate(!VM_Version::supports_bmi2());
12002   match(Set dst (LShiftL dst shift));
12003   effect(KILL cr);
12004 
12005   format %{ "salq    $dst, $shift" %}
12006   ins_encode %{
12007     __ salq($dst$$Register);
12008   %}
12009   ins_pipe(ialu_reg_reg);
12010 %}
12011 
12012 // Shift Left by variable
12013 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12014 %{
12015   predicate(!VM_Version::supports_bmi2());
12016   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12017   effect(KILL cr);
12018 
12019   format %{ "salq    $dst, $shift" %}
12020   ins_encode %{
12021     __ salq($dst$$Address);
12022   %}
12023   ins_pipe(ialu_mem_reg);
12024 %}
12025 
12026 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12027 %{
12028   predicate(VM_Version::supports_bmi2());
12029   match(Set dst (LShiftL src shift));
12030 
12031   format %{ "shlxq   $dst, $src, $shift" %}
12032   ins_encode %{
12033     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12034   %}
12035   ins_pipe(ialu_reg_reg);
12036 %}
12037 
12038 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12039 %{
12040   predicate(VM_Version::supports_bmi2());
12041   match(Set dst (LShiftL (LoadL src) shift));
12042   ins_cost(175);
12043   format %{ "shlxq   $dst, $src, $shift" %}
12044   ins_encode %{
12045     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12046   %}
12047   ins_pipe(ialu_reg_mem);
12048 %}
12049 
12050 // Arithmetic Shift Right by 8-bit immediate
12051 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12052 %{
12053   predicate(!UseAPX);
12054   match(Set dst (RShiftL dst shift));
12055   effect(KILL cr);
12056 
12057   format %{ "sarq    $dst, $shift" %}
12058   ins_encode %{
12059     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12060   %}
12061   ins_pipe(ialu_mem_imm);
12062 %}
12063 
12064 // Arithmetic Shift Right by 8-bit immediate
12065 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12066 %{
12067   predicate(UseAPX);
12068   match(Set dst (RShiftL src shift));
12069   effect(KILL cr);
12070   flag(PD::Flag_ndd_demotable_opr1);
12071 
12072   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12073   ins_encode %{
12074     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12075   %}
12076   ins_pipe(ialu_mem_imm);
12077 %}
12078 
12079 // Arithmetic Shift Right by 8-bit immediate
12080 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12081 %{
12082   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12083   effect(KILL cr);
12084 
12085   format %{ "sarq    $dst, $shift" %}
12086   ins_encode %{
12087     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12088   %}
12089   ins_pipe(ialu_mem_imm);
12090 %}
12091 
12092 // Arithmetic Shift Right by variable
12093 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12094 %{
12095   predicate(!VM_Version::supports_bmi2());
12096   match(Set dst (RShiftL dst shift));
12097   effect(KILL cr);
12098 
12099   format %{ "sarq    $dst, $shift" %}
12100   ins_encode %{
12101     __ sarq($dst$$Register);
12102   %}
12103   ins_pipe(ialu_reg_reg);
12104 %}
12105 
12106 // Arithmetic Shift Right by variable
12107 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12108 %{
12109   predicate(!VM_Version::supports_bmi2());
12110   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12111   effect(KILL cr);
12112 
12113   format %{ "sarq    $dst, $shift" %}
12114   ins_encode %{
12115     __ sarq($dst$$Address);
12116   %}
12117   ins_pipe(ialu_mem_reg);
12118 %}
12119 
12120 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12121 %{
12122   predicate(VM_Version::supports_bmi2());
12123   match(Set dst (RShiftL src shift));
12124 
12125   format %{ "sarxq   $dst, $src, $shift" %}
12126   ins_encode %{
12127     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12128   %}
12129   ins_pipe(ialu_reg_reg);
12130 %}
12131 
12132 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12133 %{
12134   predicate(VM_Version::supports_bmi2());
12135   match(Set dst (RShiftL (LoadL src) shift));
12136   ins_cost(175);
12137   format %{ "sarxq   $dst, $src, $shift" %}
12138   ins_encode %{
12139     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12140   %}
12141   ins_pipe(ialu_reg_mem);
12142 %}
12143 
12144 // Logical Shift Right by 8-bit immediate
12145 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12146 %{
12147   predicate(!UseAPX);
12148   match(Set dst (URShiftL dst shift));
12149   effect(KILL cr);
12150 
12151   format %{ "shrq    $dst, $shift" %}
12152   ins_encode %{
12153     __ shrq($dst$$Register, $shift$$constant);
12154   %}
12155   ins_pipe(ialu_reg);
12156 %}
12157 
12158 // Logical Shift Right by 8-bit immediate
12159 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12160 %{
12161   predicate(UseAPX);
12162   match(Set dst (URShiftL src shift));
12163   effect(KILL cr);
12164   flag(PD::Flag_ndd_demotable_opr1);
12165 
12166   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12167   ins_encode %{
12168     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12169   %}
12170   ins_pipe(ialu_reg);
12171 %}
12172 
12173 // Logical Shift Right by 8-bit immediate
12174 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12175 %{
12176   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12177   effect(KILL cr);
12178 
12179   format %{ "shrq    $dst, $shift" %}
12180   ins_encode %{
12181     __ shrq($dst$$Address, $shift$$constant);
12182   %}
12183   ins_pipe(ialu_mem_imm);
12184 %}
12185 
12186 // Logical Shift Right by variable
12187 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12188 %{
12189   predicate(!VM_Version::supports_bmi2());
12190   match(Set dst (URShiftL dst shift));
12191   effect(KILL cr);
12192 
12193   format %{ "shrq    $dst, $shift" %}
12194   ins_encode %{
12195     __ shrq($dst$$Register);
12196   %}
12197   ins_pipe(ialu_reg_reg);
12198 %}
12199 
12200 // Logical Shift Right by variable
12201 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12202 %{
12203   predicate(!VM_Version::supports_bmi2());
12204   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12205   effect(KILL cr);
12206 
12207   format %{ "shrq    $dst, $shift" %}
12208   ins_encode %{
12209     __ shrq($dst$$Address);
12210   %}
12211   ins_pipe(ialu_mem_reg);
12212 %}
12213 
12214 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12215 %{
12216   predicate(VM_Version::supports_bmi2());
12217   match(Set dst (URShiftL src shift));
12218 
12219   format %{ "shrxq   $dst, $src, $shift" %}
12220   ins_encode %{
12221     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12222   %}
12223   ins_pipe(ialu_reg_reg);
12224 %}
12225 
12226 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12227 %{
12228   predicate(VM_Version::supports_bmi2());
12229   match(Set dst (URShiftL (LoadL src) shift));
12230   ins_cost(175);
12231   format %{ "shrxq   $dst, $src, $shift" %}
12232   ins_encode %{
12233     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12234   %}
12235   ins_pipe(ialu_reg_mem);
12236 %}
12237 
12238 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12239 // This idiom is used by the compiler for the i2b bytecode.
12240 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12241 %{
12242   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12243 
12244   format %{ "movsbl  $dst, $src\t# i2b" %}
12245   ins_encode %{
12246     __ movsbl($dst$$Register, $src$$Register);
12247   %}
12248   ins_pipe(ialu_reg_reg);
12249 %}
12250 
12251 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12252 // This idiom is used by the compiler the i2s bytecode.
12253 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12254 %{
12255   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12256 
12257   format %{ "movswl  $dst, $src\t# i2s" %}
12258   ins_encode %{
12259     __ movswl($dst$$Register, $src$$Register);
12260   %}
12261   ins_pipe(ialu_reg_reg);
12262 %}
12263 
12264 // ROL/ROR instructions
12265 
12266 // Rotate left by constant.
12267 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12268 %{
12269   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12270   match(Set dst (RotateLeft dst shift));
12271   effect(KILL cr);
12272   format %{ "roll    $dst, $shift" %}
12273   ins_encode %{
12274     __ roll($dst$$Register, $shift$$constant);
12275   %}
12276   ins_pipe(ialu_reg);
12277 %}
12278 
12279 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12280 %{
12281   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12282   match(Set dst (RotateLeft src shift));
12283   format %{ "rolxl   $dst, $src, $shift" %}
12284   ins_encode %{
12285     int shift = 32 - ($shift$$constant & 31);
12286     __ rorxl($dst$$Register, $src$$Register, shift);
12287   %}
12288   ins_pipe(ialu_reg_reg);
12289 %}
12290 
12291 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12292 %{
12293   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12294   match(Set dst (RotateLeft (LoadI src) shift));
12295   ins_cost(175);
12296   format %{ "rolxl   $dst, $src, $shift" %}
12297   ins_encode %{
12298     int shift = 32 - ($shift$$constant & 31);
12299     __ rorxl($dst$$Register, $src$$Address, shift);
12300   %}
12301   ins_pipe(ialu_reg_mem);
12302 %}
12303 
12304 // Rotate Left by variable
12305 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12306 %{
12307   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12308   match(Set dst (RotateLeft dst shift));
12309   effect(KILL cr);
12310   format %{ "roll    $dst, $shift" %}
12311   ins_encode %{
12312     __ roll($dst$$Register);
12313   %}
12314   ins_pipe(ialu_reg_reg);
12315 %}
12316 
12317 // Rotate Left by variable
12318 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12319 %{
12320   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12321   match(Set dst (RotateLeft src shift));
12322   effect(KILL cr);
12323   flag(PD::Flag_ndd_demotable_opr1);
12324 
12325   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12326   ins_encode %{
12327     __ eroll($dst$$Register, $src$$Register, false);
12328   %}
12329   ins_pipe(ialu_reg_reg);
12330 %}
12331 
12332 // Rotate Right by constant.
12333 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12334 %{
12335   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12336   match(Set dst (RotateRight dst shift));
12337   effect(KILL cr);
12338   format %{ "rorl    $dst, $shift" %}
12339   ins_encode %{
12340     __ rorl($dst$$Register, $shift$$constant);
12341   %}
12342   ins_pipe(ialu_reg);
12343 %}
12344 
12345 // Rotate Right by constant.
12346 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12347 %{
12348   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12349   match(Set dst (RotateRight src shift));
12350   format %{ "rorxl   $dst, $src, $shift" %}
12351   ins_encode %{
12352     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12353   %}
12354   ins_pipe(ialu_reg_reg);
12355 %}
12356 
12357 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12358 %{
12359   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12360   match(Set dst (RotateRight (LoadI src) shift));
12361   ins_cost(175);
12362   format %{ "rorxl   $dst, $src, $shift" %}
12363   ins_encode %{
12364     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12365   %}
12366   ins_pipe(ialu_reg_mem);
12367 %}
12368 
12369 // Rotate Right by variable
12370 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12371 %{
12372   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12373   match(Set dst (RotateRight dst shift));
12374   effect(KILL cr);
12375   format %{ "rorl    $dst, $shift" %}
12376   ins_encode %{
12377     __ rorl($dst$$Register);
12378   %}
12379   ins_pipe(ialu_reg_reg);
12380 %}
12381 
12382 // Rotate Right by variable
12383 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12384 %{
12385   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12386   match(Set dst (RotateRight src shift));
12387   effect(KILL cr);
12388   flag(PD::Flag_ndd_demotable_opr1);
12389 
12390   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12391   ins_encode %{
12392     __ erorl($dst$$Register, $src$$Register, false);
12393   %}
12394   ins_pipe(ialu_reg_reg);
12395 %}
12396 
12397 // Rotate Left by constant.
12398 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12399 %{
12400   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12401   match(Set dst (RotateLeft dst shift));
12402   effect(KILL cr);
12403   format %{ "rolq    $dst, $shift" %}
12404   ins_encode %{
12405     __ rolq($dst$$Register, $shift$$constant);
12406   %}
12407   ins_pipe(ialu_reg);
12408 %}
12409 
12410 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12411 %{
12412   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12413   match(Set dst (RotateLeft src shift));
12414   format %{ "rolxq   $dst, $src, $shift" %}
12415   ins_encode %{
12416     int shift = 64 - ($shift$$constant & 63);
12417     __ rorxq($dst$$Register, $src$$Register, shift);
12418   %}
12419   ins_pipe(ialu_reg_reg);
12420 %}
12421 
12422 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12423 %{
12424   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12425   match(Set dst (RotateLeft (LoadL src) shift));
12426   ins_cost(175);
12427   format %{ "rolxq   $dst, $src, $shift" %}
12428   ins_encode %{
12429     int shift = 64 - ($shift$$constant & 63);
12430     __ rorxq($dst$$Register, $src$$Address, shift);
12431   %}
12432   ins_pipe(ialu_reg_mem);
12433 %}
12434 
12435 // Rotate Left by variable
12436 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12437 %{
12438   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12439   match(Set dst (RotateLeft dst shift));
12440   effect(KILL cr);
12441 
12442   format %{ "rolq    $dst, $shift" %}
12443   ins_encode %{
12444     __ rolq($dst$$Register);
12445   %}
12446   ins_pipe(ialu_reg_reg);
12447 %}
12448 
12449 // Rotate Left by variable
12450 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12451 %{
12452   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12453   match(Set dst (RotateLeft src shift));
12454   effect(KILL cr);
12455   flag(PD::Flag_ndd_demotable_opr1);
12456 
12457   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12458   ins_encode %{
12459     __ erolq($dst$$Register, $src$$Register, false);
12460   %}
12461   ins_pipe(ialu_reg_reg);
12462 %}
12463 
12464 // Rotate Right by constant.
12465 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12466 %{
12467   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12468   match(Set dst (RotateRight dst shift));
12469   effect(KILL cr);
12470   format %{ "rorq    $dst, $shift" %}
12471   ins_encode %{
12472     __ rorq($dst$$Register, $shift$$constant);
12473   %}
12474   ins_pipe(ialu_reg);
12475 %}
12476 
12477 // Rotate Right by constant
12478 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12479 %{
12480   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12481   match(Set dst (RotateRight src shift));
12482   format %{ "rorxq   $dst, $src, $shift" %}
12483   ins_encode %{
12484     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12485   %}
12486   ins_pipe(ialu_reg_reg);
12487 %}
12488 
12489 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12490 %{
12491   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12492   match(Set dst (RotateRight (LoadL src) shift));
12493   ins_cost(175);
12494   format %{ "rorxq   $dst, $src, $shift" %}
12495   ins_encode %{
12496     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12497   %}
12498   ins_pipe(ialu_reg_mem);
12499 %}
12500 
12501 // Rotate Right by variable
12502 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12503 %{
12504   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12505   match(Set dst (RotateRight dst shift));
12506   effect(KILL cr);
12507   format %{ "rorq    $dst, $shift" %}
12508   ins_encode %{
12509     __ rorq($dst$$Register);
12510   %}
12511   ins_pipe(ialu_reg_reg);
12512 %}
12513 
12514 // Rotate Right by variable
12515 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12516 %{
12517   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12518   match(Set dst (RotateRight src shift));
12519   effect(KILL cr);
12520   flag(PD::Flag_ndd_demotable_opr1);
12521 
12522   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12523   ins_encode %{
12524     __ erorq($dst$$Register, $src$$Register, false);
12525   %}
12526   ins_pipe(ialu_reg_reg);
12527 %}
12528 
12529 //----------------------------- CompressBits/ExpandBits ------------------------
12530 
12531 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12532   predicate(n->bottom_type()->isa_long());
12533   match(Set dst (CompressBits src mask));
12534   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12535   ins_encode %{
12536     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12537   %}
12538   ins_pipe( pipe_slow );
12539 %}
12540 
12541 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12542   predicate(n->bottom_type()->isa_long());
12543   match(Set dst (ExpandBits src mask));
12544   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12545   ins_encode %{
12546     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12547   %}
12548   ins_pipe( pipe_slow );
12549 %}
12550 
12551 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12552   predicate(n->bottom_type()->isa_long());
12553   match(Set dst (CompressBits src (LoadL mask)));
12554   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12555   ins_encode %{
12556     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12557   %}
12558   ins_pipe( pipe_slow );
12559 %}
12560 
12561 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12562   predicate(n->bottom_type()->isa_long());
12563   match(Set dst (ExpandBits src (LoadL mask)));
12564   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12565   ins_encode %{
12566     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12567   %}
12568   ins_pipe( pipe_slow );
12569 %}
12570 
12571 
12572 // Logical Instructions
12573 
12574 // Integer Logical Instructions
12575 
12576 // And Instructions
12577 // And Register with Register
12578 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12579 %{
12580   predicate(!UseAPX);
12581   match(Set dst (AndI dst src));
12582   effect(KILL cr);
12583   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12584 
12585   format %{ "andl    $dst, $src\t# int" %}
12586   ins_encode %{
12587     __ andl($dst$$Register, $src$$Register);
12588   %}
12589   ins_pipe(ialu_reg_reg);
12590 %}
12591 
12592 // And Register with Register using New Data Destination (NDD)
12593 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12594 %{
12595   predicate(UseAPX);
12596   match(Set dst (AndI src1 src2));
12597   effect(KILL cr);
12598   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12599 
12600   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12601   ins_encode %{
12602     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12603 
12604   %}
12605   ins_pipe(ialu_reg_reg);
12606 %}
12607 
12608 // And Register with Immediate 255
12609 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12610 %{
12611   match(Set dst (AndI src mask));
12612 
12613   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12614   ins_encode %{
12615     __ movzbl($dst$$Register, $src$$Register);
12616   %}
12617   ins_pipe(ialu_reg);
12618 %}
12619 
12620 // And Register with Immediate 255 and promote to long
12621 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
12622 %{
12623   match(Set dst (ConvI2L (AndI src mask)));
12624 
12625   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
12626   ins_encode %{
12627     __ movzbl($dst$$Register, $src$$Register);
12628   %}
12629   ins_pipe(ialu_reg);
12630 %}
12631 
12632 // And Register with Immediate 65535
12633 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
12634 %{
12635   match(Set dst (AndI src mask));
12636 
12637   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
12638   ins_encode %{
12639     __ movzwl($dst$$Register, $src$$Register);
12640   %}
12641   ins_pipe(ialu_reg);
12642 %}
12643 
12644 // And Register with Immediate 65535 and promote to long
12645 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
12646 %{
12647   match(Set dst (ConvI2L (AndI src mask)));
12648 
12649   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
12650   ins_encode %{
12651     __ movzwl($dst$$Register, $src$$Register);
12652   %}
12653   ins_pipe(ialu_reg);
12654 %}
12655 
12656 // Can skip int2long conversions after AND with small bitmask
12657 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
12658 %{
12659   predicate(VM_Version::supports_bmi2());
12660   ins_cost(125);
12661   effect(TEMP tmp, KILL cr);
12662   match(Set dst (ConvI2L (AndI src mask)));
12663   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
12664   ins_encode %{
12665     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
12666     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
12667   %}
12668   ins_pipe(ialu_reg_reg);
12669 %}
12670 
12671 // And Register with Immediate
12672 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12673 %{
12674   predicate(!UseAPX);
12675   match(Set dst (AndI dst src));
12676   effect(KILL cr);
12677   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12678 
12679   format %{ "andl    $dst, $src\t# int" %}
12680   ins_encode %{
12681     __ andl($dst$$Register, $src$$constant);
12682   %}
12683   ins_pipe(ialu_reg);
12684 %}
12685 
12686 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12687 %{
12688   predicate(UseAPX);
12689   match(Set dst (AndI src1 src2));
12690   effect(KILL cr);
12691   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12692 
12693   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
12694   ins_encode %{
12695     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
12696   %}
12697   ins_pipe(ialu_reg);
12698 %}
12699 
12700 // And Register with Memory
12701 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12702 %{
12703   match(Set dst (AndI dst (LoadI src)));
12704   effect(KILL cr);
12705   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12706 
12707   ins_cost(150);
12708   format %{ "andl    $dst, $src\t# int" %}
12709   ins_encode %{
12710     __ andl($dst$$Register, $src$$Address);
12711   %}
12712   ins_pipe(ialu_reg_mem);
12713 %}
12714 
12715 // And Memory with Register
12716 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12717 %{
12718   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
12719   effect(KILL cr);
12720   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12721 
12722   ins_cost(150);
12723   format %{ "andb    $dst, $src\t# byte" %}
12724   ins_encode %{
12725     __ andb($dst$$Address, $src$$Register);
12726   %}
12727   ins_pipe(ialu_mem_reg);
12728 %}
12729 
12730 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12731 %{
12732   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12733   effect(KILL cr);
12734   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12735 
12736   ins_cost(150);
12737   format %{ "andl    $dst, $src\t# int" %}
12738   ins_encode %{
12739     __ andl($dst$$Address, $src$$Register);
12740   %}
12741   ins_pipe(ialu_mem_reg);
12742 %}
12743 
12744 // And Memory with Immediate
12745 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
12746 %{
12747   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
12748   effect(KILL cr);
12749   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12750 
12751   ins_cost(125);
12752   format %{ "andl    $dst, $src\t# int" %}
12753   ins_encode %{
12754     __ andl($dst$$Address, $src$$constant);
12755   %}
12756   ins_pipe(ialu_mem_imm);
12757 %}
12758 
12759 // BMI1 instructions
12760 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
12761   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
12762   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12763   effect(KILL cr);
12764   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12765 
12766   ins_cost(125);
12767   format %{ "andnl  $dst, $src1, $src2" %}
12768 
12769   ins_encode %{
12770     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
12771   %}
12772   ins_pipe(ialu_reg_mem);
12773 %}
12774 
12775 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
12776   match(Set dst (AndI (XorI src1 minus_1) src2));
12777   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12778   effect(KILL cr);
12779   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12780 
12781   format %{ "andnl  $dst, $src1, $src2" %}
12782 
12783   ins_encode %{
12784     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
12785   %}
12786   ins_pipe(ialu_reg);
12787 %}
12788 
12789 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
12790   match(Set dst (AndI (SubI imm_zero src) src));
12791   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12792   effect(KILL cr);
12793   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12794 
12795   format %{ "blsil  $dst, $src" %}
12796 
12797   ins_encode %{
12798     __ blsil($dst$$Register, $src$$Register);
12799   %}
12800   ins_pipe(ialu_reg);
12801 %}
12802 
12803 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
12804   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
12805   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12806   effect(KILL cr);
12807   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12808 
12809   ins_cost(125);
12810   format %{ "blsil  $dst, $src" %}
12811 
12812   ins_encode %{
12813     __ blsil($dst$$Register, $src$$Address);
12814   %}
12815   ins_pipe(ialu_reg_mem);
12816 %}
12817 
12818 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
12819 %{
12820   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
12821   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12822   effect(KILL cr);
12823   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
12824 
12825   ins_cost(125);
12826   format %{ "blsmskl $dst, $src" %}
12827 
12828   ins_encode %{
12829     __ blsmskl($dst$$Register, $src$$Address);
12830   %}
12831   ins_pipe(ialu_reg_mem);
12832 %}
12833 
12834 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
12835 %{
12836   match(Set dst (XorI (AddI src minus_1) src));
12837   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12838   effect(KILL cr);
12839   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
12840 
12841   format %{ "blsmskl $dst, $src" %}
12842 
12843   ins_encode %{
12844     __ blsmskl($dst$$Register, $src$$Register);
12845   %}
12846 
12847   ins_pipe(ialu_reg);
12848 %}
12849 
12850 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
12851 %{
12852   match(Set dst (AndI (AddI src minus_1) src) );
12853   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12854   effect(KILL cr);
12855   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12856 
12857   format %{ "blsrl  $dst, $src" %}
12858 
12859   ins_encode %{
12860     __ blsrl($dst$$Register, $src$$Register);
12861   %}
12862 
12863   ins_pipe(ialu_reg_mem);
12864 %}
12865 
12866 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
12867 %{
12868   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
12869   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
12870   effect(KILL cr);
12871   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
12872 
12873   ins_cost(125);
12874   format %{ "blsrl  $dst, $src" %}
12875 
12876   ins_encode %{
12877     __ blsrl($dst$$Register, $src$$Address);
12878   %}
12879 
12880   ins_pipe(ialu_reg);
12881 %}
12882 
12883 // Or Instructions
12884 // Or Register with Register
12885 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12886 %{
12887   predicate(!UseAPX);
12888   match(Set dst (OrI dst src));
12889   effect(KILL cr);
12890   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12891 
12892   format %{ "orl     $dst, $src\t# int" %}
12893   ins_encode %{
12894     __ orl($dst$$Register, $src$$Register);
12895   %}
12896   ins_pipe(ialu_reg_reg);
12897 %}
12898 
12899 // Or Register with Register using New Data Destination (NDD)
12900 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12901 %{
12902   predicate(UseAPX);
12903   match(Set dst (OrI src1 src2));
12904   effect(KILL cr);
12905   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12906 
12907   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
12908   ins_encode %{
12909     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
12910   %}
12911   ins_pipe(ialu_reg_reg);
12912 %}
12913 
12914 // Or Register with Immediate
12915 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
12916 %{
12917   predicate(!UseAPX);
12918   match(Set dst (OrI dst src));
12919   effect(KILL cr);
12920   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12921 
12922   format %{ "orl     $dst, $src\t# int" %}
12923   ins_encode %{
12924     __ orl($dst$$Register, $src$$constant);
12925   %}
12926   ins_pipe(ialu_reg);
12927 %}
12928 
12929 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
12930 %{
12931   predicate(UseAPX);
12932   match(Set dst (OrI src1 src2));
12933   effect(KILL cr);
12934   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12935 
12936   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
12937   ins_encode %{
12938     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
12939   %}
12940   ins_pipe(ialu_reg);
12941 %}
12942 
12943 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
12944 %{
12945   predicate(UseAPX);
12946   match(Set dst (OrI src1 src2));
12947   effect(KILL cr);
12948   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
12949 
12950   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
12951   ins_encode %{
12952     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
12953   %}
12954   ins_pipe(ialu_reg);
12955 %}
12956 
12957 // Or Register with Memory
12958 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
12959 %{
12960   match(Set dst (OrI dst (LoadI src)));
12961   effect(KILL cr);
12962   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12963 
12964   ins_cost(150);
12965   format %{ "orl     $dst, $src\t# int" %}
12966   ins_encode %{
12967     __ orl($dst$$Register, $src$$Address);
12968   %}
12969   ins_pipe(ialu_reg_mem);
12970 %}
12971 
12972 // Or Memory with Register
12973 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12974 %{
12975   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
12976   effect(KILL cr);
12977   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12978 
12979   ins_cost(150);
12980   format %{ "orb    $dst, $src\t# byte" %}
12981   ins_encode %{
12982     __ orb($dst$$Address, $src$$Register);
12983   %}
12984   ins_pipe(ialu_mem_reg);
12985 %}
12986 
12987 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
12988 %{
12989   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
12990   effect(KILL cr);
12991   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12992 
12993   ins_cost(150);
12994   format %{ "orl     $dst, $src\t# int" %}
12995   ins_encode %{
12996     __ orl($dst$$Address, $src$$Register);
12997   %}
12998   ins_pipe(ialu_mem_reg);
12999 %}
13000 
13001 // Or Memory with Immediate
13002 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13003 %{
13004   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13005   effect(KILL cr);
13006   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13007 
13008   ins_cost(125);
13009   format %{ "orl     $dst, $src\t# int" %}
13010   ins_encode %{
13011     __ orl($dst$$Address, $src$$constant);
13012   %}
13013   ins_pipe(ialu_mem_imm);
13014 %}
13015 
13016 // Xor Instructions
13017 // Xor Register with Register
13018 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13019 %{
13020   predicate(!UseAPX);
13021   match(Set dst (XorI dst src));
13022   effect(KILL cr);
13023   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13024 
13025   format %{ "xorl    $dst, $src\t# int" %}
13026   ins_encode %{
13027     __ xorl($dst$$Register, $src$$Register);
13028   %}
13029   ins_pipe(ialu_reg_reg);
13030 %}
13031 
13032 // Xor Register with Register using New Data Destination (NDD)
13033 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13034 %{
13035   predicate(UseAPX);
13036   match(Set dst (XorI src1 src2));
13037   effect(KILL cr);
13038   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13039 
13040   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13041   ins_encode %{
13042     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13043   %}
13044   ins_pipe(ialu_reg_reg);
13045 %}
13046 
13047 // Xor Register with Immediate -1
13048 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13049 %{
13050   predicate(!UseAPX);
13051   match(Set dst (XorI dst imm));
13052 
13053   format %{ "notl    $dst" %}
13054   ins_encode %{
13055      __ notl($dst$$Register);
13056   %}
13057   ins_pipe(ialu_reg);
13058 %}
13059 
13060 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13061 %{
13062   match(Set dst (XorI src imm));
13063   predicate(UseAPX);
13064   flag(PD::Flag_ndd_demotable_opr1);
13065 
13066   format %{ "enotl    $dst, $src" %}
13067   ins_encode %{
13068      __ enotl($dst$$Register, $src$$Register);
13069   %}
13070   ins_pipe(ialu_reg);
13071 %}
13072 
13073 // Xor Register with Immediate
13074 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13075 %{
13076   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13077   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13078   match(Set dst (XorI dst src));
13079   effect(KILL cr);
13080   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13081 
13082   format %{ "xorl    $dst, $src\t# int" %}
13083   ins_encode %{
13084     __ xorl($dst$$Register, $src$$constant);
13085   %}
13086   ins_pipe(ialu_reg);
13087 %}
13088 
13089 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13090 %{
13091   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13092   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13093   match(Set dst (XorI src1 src2));
13094   effect(KILL cr);
13095   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13096 
13097   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13098   ins_encode %{
13099     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13100   %}
13101   ins_pipe(ialu_reg);
13102 %}
13103 
13104 // Xor Register with Memory
13105 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13106 %{
13107   match(Set dst (XorI dst (LoadI src)));
13108   effect(KILL cr);
13109   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13110 
13111   ins_cost(150);
13112   format %{ "xorl    $dst, $src\t# int" %}
13113   ins_encode %{
13114     __ xorl($dst$$Register, $src$$Address);
13115   %}
13116   ins_pipe(ialu_reg_mem);
13117 %}
13118 
13119 // Xor Memory with Register
13120 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13121 %{
13122   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13123   effect(KILL cr);
13124   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13125 
13126   ins_cost(150);
13127   format %{ "xorb    $dst, $src\t# byte" %}
13128   ins_encode %{
13129     __ xorb($dst$$Address, $src$$Register);
13130   %}
13131   ins_pipe(ialu_mem_reg);
13132 %}
13133 
13134 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13135 %{
13136   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13137   effect(KILL cr);
13138   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13139 
13140   ins_cost(150);
13141   format %{ "xorl    $dst, $src\t# int" %}
13142   ins_encode %{
13143     __ xorl($dst$$Address, $src$$Register);
13144   %}
13145   ins_pipe(ialu_mem_reg);
13146 %}
13147 
13148 // Xor Memory with Immediate
13149 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13150 %{
13151   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13152   effect(KILL cr);
13153   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13154 
13155   ins_cost(125);
13156   format %{ "xorl    $dst, $src\t# int" %}
13157   ins_encode %{
13158     __ xorl($dst$$Address, $src$$constant);
13159   %}
13160   ins_pipe(ialu_mem_imm);
13161 %}
13162 
13163 
13164 // Long Logical Instructions
13165 
13166 // And Instructions
13167 // And Register with Register
13168 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13169 %{
13170   predicate(!UseAPX);
13171   match(Set dst (AndL dst src));
13172   effect(KILL cr);
13173   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13174 
13175   format %{ "andq    $dst, $src\t# long" %}
13176   ins_encode %{
13177     __ andq($dst$$Register, $src$$Register);
13178   %}
13179   ins_pipe(ialu_reg_reg);
13180 %}
13181 
13182 // And Register with Register using New Data Destination (NDD)
13183 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13184 %{
13185   predicate(UseAPX);
13186   match(Set dst (AndL src1 src2));
13187   effect(KILL cr);
13188   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13189 
13190   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13191   ins_encode %{
13192     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13193 
13194   %}
13195   ins_pipe(ialu_reg_reg);
13196 %}
13197 
13198 // And Register with Immediate 255
13199 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13200 %{
13201   match(Set dst (AndL src mask));
13202 
13203   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13204   ins_encode %{
13205     // movzbl zeroes out the upper 32-bit and does not need REX.W
13206     __ movzbl($dst$$Register, $src$$Register);
13207   %}
13208   ins_pipe(ialu_reg);
13209 %}
13210 
13211 // And Register with Immediate 65535
13212 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13213 %{
13214   match(Set dst (AndL src mask));
13215 
13216   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13217   ins_encode %{
13218     // movzwl zeroes out the upper 32-bit and does not need REX.W
13219     __ movzwl($dst$$Register, $src$$Register);
13220   %}
13221   ins_pipe(ialu_reg);
13222 %}
13223 
13224 // And Register with Immediate
13225 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13226 %{
13227   predicate(!UseAPX);
13228   match(Set dst (AndL dst src));
13229   effect(KILL cr);
13230   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13231 
13232   format %{ "andq    $dst, $src\t# long" %}
13233   ins_encode %{
13234     __ andq($dst$$Register, $src$$constant);
13235   %}
13236   ins_pipe(ialu_reg);
13237 %}
13238 
13239 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13240 %{
13241   predicate(UseAPX);
13242   match(Set dst (AndL src1 src2));
13243   effect(KILL cr);
13244   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13245 
13246   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13247   ins_encode %{
13248     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13249   %}
13250   ins_pipe(ialu_reg);
13251 %}
13252 
13253 // And Register with Memory
13254 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13255 %{
13256   match(Set dst (AndL dst (LoadL src)));
13257   effect(KILL cr);
13258   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13259 
13260   ins_cost(150);
13261   format %{ "andq    $dst, $src\t# long" %}
13262   ins_encode %{
13263     __ andq($dst$$Register, $src$$Address);
13264   %}
13265   ins_pipe(ialu_reg_mem);
13266 %}
13267 
13268 // And Memory with Register
13269 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13270 %{
13271   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13272   effect(KILL cr);
13273   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13274 
13275   ins_cost(150);
13276   format %{ "andq    $dst, $src\t# long" %}
13277   ins_encode %{
13278     __ andq($dst$$Address, $src$$Register);
13279   %}
13280   ins_pipe(ialu_mem_reg);
13281 %}
13282 
13283 // And Memory with Immediate
13284 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13285 %{
13286   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13287   effect(KILL cr);
13288   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13289 
13290   ins_cost(125);
13291   format %{ "andq    $dst, $src\t# long" %}
13292   ins_encode %{
13293     __ andq($dst$$Address, $src$$constant);
13294   %}
13295   ins_pipe(ialu_mem_imm);
13296 %}
13297 
13298 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13299 %{
13300   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13301   // because AND/OR works well enough for 8/32-bit values.
13302   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13303 
13304   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13305   effect(KILL cr);
13306 
13307   ins_cost(125);
13308   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13309   ins_encode %{
13310     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13311   %}
13312   ins_pipe(ialu_mem_imm);
13313 %}
13314 
13315 // BMI1 instructions
13316 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13317   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13318   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13319   effect(KILL cr);
13320   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13321 
13322   ins_cost(125);
13323   format %{ "andnq  $dst, $src1, $src2" %}
13324 
13325   ins_encode %{
13326     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13327   %}
13328   ins_pipe(ialu_reg_mem);
13329 %}
13330 
13331 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13332   match(Set dst (AndL (XorL src1 minus_1) src2));
13333   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13334   effect(KILL cr);
13335   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13336 
13337   format %{ "andnq  $dst, $src1, $src2" %}
13338 
13339   ins_encode %{
13340   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13341   %}
13342   ins_pipe(ialu_reg_mem);
13343 %}
13344 
13345 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13346   match(Set dst (AndL (SubL imm_zero src) src));
13347   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13348   effect(KILL cr);
13349   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13350 
13351   format %{ "blsiq  $dst, $src" %}
13352 
13353   ins_encode %{
13354     __ blsiq($dst$$Register, $src$$Register);
13355   %}
13356   ins_pipe(ialu_reg);
13357 %}
13358 
13359 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13360   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13361   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13362   effect(KILL cr);
13363   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13364 
13365   ins_cost(125);
13366   format %{ "blsiq  $dst, $src" %}
13367 
13368   ins_encode %{
13369     __ blsiq($dst$$Register, $src$$Address);
13370   %}
13371   ins_pipe(ialu_reg_mem);
13372 %}
13373 
13374 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13375 %{
13376   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13377   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13378   effect(KILL cr);
13379   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13380 
13381   ins_cost(125);
13382   format %{ "blsmskq $dst, $src" %}
13383 
13384   ins_encode %{
13385     __ blsmskq($dst$$Register, $src$$Address);
13386   %}
13387   ins_pipe(ialu_reg_mem);
13388 %}
13389 
13390 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13391 %{
13392   match(Set dst (XorL (AddL src minus_1) src));
13393   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13394   effect(KILL cr);
13395   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13396 
13397   format %{ "blsmskq $dst, $src" %}
13398 
13399   ins_encode %{
13400     __ blsmskq($dst$$Register, $src$$Register);
13401   %}
13402 
13403   ins_pipe(ialu_reg);
13404 %}
13405 
13406 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13407 %{
13408   match(Set dst (AndL (AddL src minus_1) src) );
13409   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13410   effect(KILL cr);
13411   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13412 
13413   format %{ "blsrq  $dst, $src" %}
13414 
13415   ins_encode %{
13416     __ blsrq($dst$$Register, $src$$Register);
13417   %}
13418 
13419   ins_pipe(ialu_reg);
13420 %}
13421 
13422 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13423 %{
13424   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13425   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13426   effect(KILL cr);
13427   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13428 
13429   ins_cost(125);
13430   format %{ "blsrq  $dst, $src" %}
13431 
13432   ins_encode %{
13433     __ blsrq($dst$$Register, $src$$Address);
13434   %}
13435 
13436   ins_pipe(ialu_reg);
13437 %}
13438 
13439 // Or Instructions
13440 // Or Register with Register
13441 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13442 %{
13443   predicate(!UseAPX);
13444   match(Set dst (OrL dst src));
13445   effect(KILL cr);
13446   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13447 
13448   format %{ "orq     $dst, $src\t# long" %}
13449   ins_encode %{
13450     __ orq($dst$$Register, $src$$Register);
13451   %}
13452   ins_pipe(ialu_reg_reg);
13453 %}
13454 
13455 // Or Register with Register using New Data Destination (NDD)
13456 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13457 %{
13458   predicate(UseAPX);
13459   match(Set dst (OrL src1 src2));
13460   effect(KILL cr);
13461   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13462 
13463   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13464   ins_encode %{
13465     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13466 
13467   %}
13468   ins_pipe(ialu_reg_reg);
13469 %}
13470 
13471 // Use any_RegP to match R15 (TLS register) without spilling.
13472 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13473   predicate(!UseAPX);
13474   match(Set dst (OrL dst (CastP2X src)));
13475   effect(KILL cr);
13476   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13477 
13478   format %{ "orq     $dst, $src\t# long" %}
13479   ins_encode %{
13480     __ orq($dst$$Register, $src$$Register);
13481   %}
13482   ins_pipe(ialu_reg_reg);
13483 %}
13484 
13485 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13486   predicate(UseAPX);
13487   match(Set dst (OrL src1 (CastP2X src2)));
13488   effect(KILL cr);
13489   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13490 
13491   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13492   ins_encode %{
13493     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13494   %}
13495   ins_pipe(ialu_reg_reg);
13496 %}
13497 
13498 // Or Register with Immediate
13499 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13500 %{
13501   predicate(!UseAPX);
13502   match(Set dst (OrL dst src));
13503   effect(KILL cr);
13504   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13505 
13506   format %{ "orq     $dst, $src\t# long" %}
13507   ins_encode %{
13508     __ orq($dst$$Register, $src$$constant);
13509   %}
13510   ins_pipe(ialu_reg);
13511 %}
13512 
13513 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13514 %{
13515   predicate(UseAPX);
13516   match(Set dst (OrL src1 src2));
13517   effect(KILL cr);
13518   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13519 
13520   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13521   ins_encode %{
13522     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13523   %}
13524   ins_pipe(ialu_reg);
13525 %}
13526 
13527 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
13528 %{
13529   predicate(UseAPX);
13530   match(Set dst (OrL src1 src2));
13531   effect(KILL cr);
13532   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13533 
13534   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
13535   ins_encode %{
13536     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
13537   %}
13538   ins_pipe(ialu_reg);
13539 %}
13540 
13541 // Or Register with Memory
13542 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13543 %{
13544   match(Set dst (OrL dst (LoadL src)));
13545   effect(KILL cr);
13546   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13547 
13548   ins_cost(150);
13549   format %{ "orq     $dst, $src\t# long" %}
13550   ins_encode %{
13551     __ orq($dst$$Register, $src$$Address);
13552   %}
13553   ins_pipe(ialu_reg_mem);
13554 %}
13555 
13556 // Or Memory with Register
13557 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13558 %{
13559   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13560   effect(KILL cr);
13561   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13562 
13563   ins_cost(150);
13564   format %{ "orq     $dst, $src\t# long" %}
13565   ins_encode %{
13566     __ orq($dst$$Address, $src$$Register);
13567   %}
13568   ins_pipe(ialu_mem_reg);
13569 %}
13570 
13571 // Or Memory with Immediate
13572 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13573 %{
13574   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
13575   effect(KILL cr);
13576   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13577 
13578   ins_cost(125);
13579   format %{ "orq     $dst, $src\t# long" %}
13580   ins_encode %{
13581     __ orq($dst$$Address, $src$$constant);
13582   %}
13583   ins_pipe(ialu_mem_imm);
13584 %}
13585 
13586 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
13587 %{
13588   // con should be a pure 64-bit power of 2 immediate
13589   // because AND/OR works well enough for 8/32-bit values.
13590   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
13591 
13592   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
13593   effect(KILL cr);
13594 
13595   ins_cost(125);
13596   format %{ "btsq    $dst, log2($con)\t# long" %}
13597   ins_encode %{
13598     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
13599   %}
13600   ins_pipe(ialu_mem_imm);
13601 %}
13602 
13603 // Xor Instructions
13604 // Xor Register with Register
13605 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13606 %{
13607   predicate(!UseAPX);
13608   match(Set dst (XorL dst src));
13609   effect(KILL cr);
13610   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13611 
13612   format %{ "xorq    $dst, $src\t# long" %}
13613   ins_encode %{
13614     __ xorq($dst$$Register, $src$$Register);
13615   %}
13616   ins_pipe(ialu_reg_reg);
13617 %}
13618 
13619 // Xor Register with Register using New Data Destination (NDD)
13620 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13621 %{
13622   predicate(UseAPX);
13623   match(Set dst (XorL src1 src2));
13624   effect(KILL cr);
13625   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13626 
13627   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
13628   ins_encode %{
13629     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13630   %}
13631   ins_pipe(ialu_reg_reg);
13632 %}
13633 
13634 // Xor Register with Immediate -1
13635 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
13636 %{
13637   predicate(!UseAPX);
13638   match(Set dst (XorL dst imm));
13639 
13640   format %{ "notq   $dst" %}
13641   ins_encode %{
13642      __ notq($dst$$Register);
13643   %}
13644   ins_pipe(ialu_reg);
13645 %}
13646 
13647 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
13648 %{
13649   predicate(UseAPX);
13650   match(Set dst (XorL src imm));
13651   flag(PD::Flag_ndd_demotable_opr1);
13652 
13653   format %{ "enotq   $dst, $src" %}
13654   ins_encode %{
13655     __ enotq($dst$$Register, $src$$Register);
13656   %}
13657   ins_pipe(ialu_reg);
13658 %}
13659 
13660 // Xor Register with Immediate
13661 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13662 %{
13663   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
13664   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
13665   match(Set dst (XorL dst src));
13666   effect(KILL cr);
13667   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13668 
13669   format %{ "xorq    $dst, $src\t# long" %}
13670   ins_encode %{
13671     __ xorq($dst$$Register, $src$$constant);
13672   %}
13673   ins_pipe(ialu_reg);
13674 %}
13675 
13676 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13677 %{
13678   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
13679   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
13680   match(Set dst (XorL src1 src2));
13681   effect(KILL cr);
13682   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13683 
13684   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
13685   ins_encode %{
13686     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
13687   %}
13688   ins_pipe(ialu_reg);
13689 %}
13690 
13691 // Xor Register with Memory
13692 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13693 %{
13694   match(Set dst (XorL dst (LoadL src)));
13695   effect(KILL cr);
13696   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13697 
13698   ins_cost(150);
13699   format %{ "xorq    $dst, $src\t# long" %}
13700   ins_encode %{
13701     __ xorq($dst$$Register, $src$$Address);
13702   %}
13703   ins_pipe(ialu_reg_mem);
13704 %}
13705 
13706 // Xor Memory with Register
13707 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13708 %{
13709   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
13710   effect(KILL cr);
13711   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13712 
13713   ins_cost(150);
13714   format %{ "xorq    $dst, $src\t# long" %}
13715   ins_encode %{
13716     __ xorq($dst$$Address, $src$$Register);
13717   %}
13718   ins_pipe(ialu_mem_reg);
13719 %}
13720 
13721 // Xor Memory with Immediate
13722 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13723 %{
13724   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
13725   effect(KILL cr);
13726   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13727 
13728   ins_cost(125);
13729   format %{ "xorq    $dst, $src\t# long" %}
13730   ins_encode %{
13731     __ xorq($dst$$Address, $src$$constant);
13732   %}
13733   ins_pipe(ialu_mem_imm);
13734 %}
13735 
13736 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
13737 %{
13738   match(Set dst (CmpLTMask p q));
13739   effect(KILL cr);
13740 
13741   ins_cost(400);
13742   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
13743             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
13744             "negl    $dst" %}
13745   ins_encode %{
13746     __ cmpl($p$$Register, $q$$Register);
13747     __ setcc(Assembler::less, $dst$$Register);
13748     __ negl($dst$$Register);
13749   %}
13750   ins_pipe(pipe_slow);
13751 %}
13752 
13753 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
13754 %{
13755   match(Set dst (CmpLTMask dst zero));
13756   effect(KILL cr);
13757 
13758   ins_cost(100);
13759   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
13760   ins_encode %{
13761     __ sarl($dst$$Register, 31);
13762   %}
13763   ins_pipe(ialu_reg);
13764 %}
13765 
13766 /* Better to save a register than avoid a branch */
13767 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
13768 %{
13769   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
13770   effect(KILL cr);
13771   ins_cost(300);
13772   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
13773             "jge     done\n\t"
13774             "addl    $p,$y\n"
13775             "done:   " %}
13776   ins_encode %{
13777     Register Rp = $p$$Register;
13778     Register Rq = $q$$Register;
13779     Register Ry = $y$$Register;
13780     Label done;
13781     __ subl(Rp, Rq);
13782     __ jccb(Assembler::greaterEqual, done);
13783     __ addl(Rp, Ry);
13784     __ bind(done);
13785   %}
13786   ins_pipe(pipe_cmplt);
13787 %}
13788 
13789 /* Better to save a register than avoid a branch */
13790 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
13791 %{
13792   match(Set y (AndI (CmpLTMask p q) y));
13793   effect(KILL cr);
13794 
13795   ins_cost(300);
13796 
13797   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
13798             "jlt     done\n\t"
13799             "xorl    $y, $y\n"
13800             "done:   " %}
13801   ins_encode %{
13802     Register Rp = $p$$Register;
13803     Register Rq = $q$$Register;
13804     Register Ry = $y$$Register;
13805     Label done;
13806     __ cmpl(Rp, Rq);
13807     __ jccb(Assembler::less, done);
13808     __ xorl(Ry, Ry);
13809     __ bind(done);
13810   %}
13811   ins_pipe(pipe_cmplt);
13812 %}
13813 
13814 
13815 //---------- FP Instructions------------------------------------------------
13816 
13817 // Really expensive, avoid
13818 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
13819 %{
13820   match(Set cr (CmpF src1 src2));
13821 
13822   ins_cost(500);
13823   format %{ "ucomiss $src1, $src2\n\t"
13824             "jnp,s   exit\n\t"
13825             "pushfq\t# saw NaN, set CF\n\t"
13826             "andq    [rsp], #0xffffff2b\n\t"
13827             "popfq\n"
13828     "exit:" %}
13829   ins_encode %{
13830     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13831     emit_cmpfp_fixup(masm);
13832   %}
13833   ins_pipe(pipe_slow);
13834 %}
13835 
13836 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
13837   match(Set cr (CmpF src1 src2));
13838 
13839   ins_cost(100);
13840   format %{ "ucomiss $src1, $src2" %}
13841   ins_encode %{
13842     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
13843   %}
13844   ins_pipe(pipe_slow);
13845 %}
13846 
13847 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
13848   match(Set cr (CmpF src1 src2));
13849 
13850   ins_cost(100);
13851   format %{ "evucomxss $src1, $src2" %}
13852   ins_encode %{
13853     __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
13854   %}
13855   ins_pipe(pipe_slow);
13856 %}
13857 
13858 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
13859   match(Set cr (CmpF src1 (LoadF src2)));
13860 
13861   ins_cost(100);
13862   format %{ "ucomiss $src1, $src2" %}
13863   ins_encode %{
13864     __ ucomiss($src1$$XMMRegister, $src2$$Address);
13865   %}
13866   ins_pipe(pipe_slow);
13867 %}
13868 
13869 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
13870   match(Set cr (CmpF src1 (LoadF src2)));
13871 
13872   ins_cost(100);
13873   format %{ "evucomxss $src1, $src2" %}
13874   ins_encode %{
13875     __ evucomxss($src1$$XMMRegister, $src2$$Address);
13876   %}
13877   ins_pipe(pipe_slow);
13878 %}
13879 
13880 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
13881   match(Set cr (CmpF src con));
13882 
13883   ins_cost(100);
13884   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
13885   ins_encode %{
13886     __ ucomiss($src$$XMMRegister, $constantaddress($con));
13887   %}
13888   ins_pipe(pipe_slow);
13889 %}
13890 
13891 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
13892   match(Set cr (CmpF src con));
13893 
13894   ins_cost(100);
13895   format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
13896   ins_encode %{
13897     __ evucomxss($src$$XMMRegister, $constantaddress($con));
13898   %}
13899   ins_pipe(pipe_slow);
13900 %}
13901 
13902 // Really expensive, avoid
13903 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
13904 %{
13905   match(Set cr (CmpD src1 src2));
13906 
13907   ins_cost(500);
13908   format %{ "ucomisd $src1, $src2\n\t"
13909             "jnp,s   exit\n\t"
13910             "pushfq\t# saw NaN, set CF\n\t"
13911             "andq    [rsp], #0xffffff2b\n\t"
13912             "popfq\n"
13913     "exit:" %}
13914   ins_encode %{
13915     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
13916     emit_cmpfp_fixup(masm);
13917   %}
13918   ins_pipe(pipe_slow);
13919 %}
13920 
13921 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
13922   match(Set cr (CmpD src1 src2));
13923 
13924   ins_cost(100);
13925   format %{ "ucomisd $src1, $src2 test" %}
13926   ins_encode %{
13927     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
13928   %}
13929   ins_pipe(pipe_slow);
13930 %}
13931 
13932 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
13933   match(Set cr (CmpD src1 src2));
13934 
13935   ins_cost(100);
13936   format %{ "evucomxsd $src1, $src2 test" %}
13937   ins_encode %{
13938     __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
13939   %}
13940   ins_pipe(pipe_slow);
13941 %}
13942 
13943 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
13944   match(Set cr (CmpD src1 (LoadD src2)));
13945 
13946   ins_cost(100);
13947   format %{ "ucomisd $src1, $src2" %}
13948   ins_encode %{
13949     __ ucomisd($src1$$XMMRegister, $src2$$Address);
13950   %}
13951   ins_pipe(pipe_slow);
13952 %}
13953 
13954 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
13955   match(Set cr (CmpD src1 (LoadD src2)));
13956 
13957   ins_cost(100);
13958   format %{ "evucomxsd $src1, $src2" %}
13959   ins_encode %{
13960     __ evucomxsd($src1$$XMMRegister, $src2$$Address);
13961   %}
13962   ins_pipe(pipe_slow);
13963 %}
13964 
13965 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
13966   match(Set cr (CmpD src con));
13967   ins_cost(100);
13968   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
13969   ins_encode %{
13970     __ ucomisd($src$$XMMRegister, $constantaddress($con));
13971   %}
13972   ins_pipe(pipe_slow);
13973 %}
13974 
13975 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
13976   match(Set cr (CmpD src con));
13977 
13978   ins_cost(100);
13979   format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
13980   ins_encode %{
13981     __ evucomxsd($src$$XMMRegister, $constantaddress($con));
13982   %}
13983   ins_pipe(pipe_slow);
13984 %}
13985 
13986 // Compare into -1,0,1
13987 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
13988 %{
13989   match(Set dst (CmpF3 src1 src2));
13990   effect(KILL cr);
13991 
13992   ins_cost(275);
13993   format %{ "ucomiss $src1, $src2\n\t"
13994             "movl    $dst, #-1\n\t"
13995             "jp,s    done\n\t"
13996             "jb,s    done\n\t"
13997             "setne   $dst\n\t"
13998             "movzbl  $dst, $dst\n"
13999     "done:" %}
14000   ins_encode %{
14001     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14002     emit_cmpfp3(masm, $dst$$Register);
14003   %}
14004   ins_pipe(pipe_slow);
14005 %}
14006 
14007 // Compare into -1,0,1
14008 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14009 %{
14010   match(Set dst (CmpF3 src1 (LoadF src2)));
14011   effect(KILL cr);
14012 
14013   ins_cost(275);
14014   format %{ "ucomiss $src1, $src2\n\t"
14015             "movl    $dst, #-1\n\t"
14016             "jp,s    done\n\t"
14017             "jb,s    done\n\t"
14018             "setne   $dst\n\t"
14019             "movzbl  $dst, $dst\n"
14020     "done:" %}
14021   ins_encode %{
14022     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14023     emit_cmpfp3(masm, $dst$$Register);
14024   %}
14025   ins_pipe(pipe_slow);
14026 %}
14027 
14028 // Compare into -1,0,1
14029 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14030   match(Set dst (CmpF3 src con));
14031   effect(KILL cr);
14032 
14033   ins_cost(275);
14034   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14035             "movl    $dst, #-1\n\t"
14036             "jp,s    done\n\t"
14037             "jb,s    done\n\t"
14038             "setne   $dst\n\t"
14039             "movzbl  $dst, $dst\n"
14040     "done:" %}
14041   ins_encode %{
14042     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14043     emit_cmpfp3(masm, $dst$$Register);
14044   %}
14045   ins_pipe(pipe_slow);
14046 %}
14047 
14048 // Compare into -1,0,1
14049 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14050 %{
14051   match(Set dst (CmpD3 src1 src2));
14052   effect(KILL cr);
14053 
14054   ins_cost(275);
14055   format %{ "ucomisd $src1, $src2\n\t"
14056             "movl    $dst, #-1\n\t"
14057             "jp,s    done\n\t"
14058             "jb,s    done\n\t"
14059             "setne   $dst\n\t"
14060             "movzbl  $dst, $dst\n"
14061     "done:" %}
14062   ins_encode %{
14063     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14064     emit_cmpfp3(masm, $dst$$Register);
14065   %}
14066   ins_pipe(pipe_slow);
14067 %}
14068 
14069 // Compare into -1,0,1
14070 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14071 %{
14072   match(Set dst (CmpD3 src1 (LoadD src2)));
14073   effect(KILL cr);
14074 
14075   ins_cost(275);
14076   format %{ "ucomisd $src1, $src2\n\t"
14077             "movl    $dst, #-1\n\t"
14078             "jp,s    done\n\t"
14079             "jb,s    done\n\t"
14080             "setne   $dst\n\t"
14081             "movzbl  $dst, $dst\n"
14082     "done:" %}
14083   ins_encode %{
14084     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14085     emit_cmpfp3(masm, $dst$$Register);
14086   %}
14087   ins_pipe(pipe_slow);
14088 %}
14089 
14090 // Compare into -1,0,1
14091 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14092   match(Set dst (CmpD3 src con));
14093   effect(KILL cr);
14094 
14095   ins_cost(275);
14096   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14097             "movl    $dst, #-1\n\t"
14098             "jp,s    done\n\t"
14099             "jb,s    done\n\t"
14100             "setne   $dst\n\t"
14101             "movzbl  $dst, $dst\n"
14102     "done:" %}
14103   ins_encode %{
14104     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14105     emit_cmpfp3(masm, $dst$$Register);
14106   %}
14107   ins_pipe(pipe_slow);
14108 %}
14109 
14110 //----------Arithmetic Conversion Instructions---------------------------------
14111 
14112 instruct convF2D_reg_reg(regD dst, regF src)
14113 %{
14114   match(Set dst (ConvF2D src));
14115 
14116   format %{ "cvtss2sd $dst, $src" %}
14117   ins_encode %{
14118     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14119   %}
14120   ins_pipe(pipe_slow); // XXX
14121 %}
14122 
14123 instruct convF2D_reg_mem(regD dst, memory src)
14124 %{
14125   predicate(UseAVX == 0);
14126   match(Set dst (ConvF2D (LoadF src)));
14127 
14128   format %{ "cvtss2sd $dst, $src" %}
14129   ins_encode %{
14130     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14131   %}
14132   ins_pipe(pipe_slow); // XXX
14133 %}
14134 
14135 instruct convD2F_reg_reg(regF dst, regD src)
14136 %{
14137   match(Set dst (ConvD2F src));
14138 
14139   format %{ "cvtsd2ss $dst, $src" %}
14140   ins_encode %{
14141     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14142   %}
14143   ins_pipe(pipe_slow); // XXX
14144 %}
14145 
14146 instruct convD2F_reg_mem(regF dst, memory src)
14147 %{
14148   predicate(UseAVX == 0);
14149   match(Set dst (ConvD2F (LoadD src)));
14150 
14151   format %{ "cvtsd2ss $dst, $src" %}
14152   ins_encode %{
14153     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14154   %}
14155   ins_pipe(pipe_slow); // XXX
14156 %}
14157 
14158 // XXX do mem variants
14159 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14160 %{
14161   predicate(!VM_Version::supports_avx10_2());
14162   match(Set dst (ConvF2I src));
14163   effect(KILL cr);
14164   format %{ "convert_f2i $dst, $src" %}
14165   ins_encode %{
14166     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14167   %}
14168   ins_pipe(pipe_slow);
14169 %}
14170 
14171 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14172 %{
14173   predicate(VM_Version::supports_avx10_2());
14174   match(Set dst (ConvF2I src));
14175   format %{ "evcvttss2sisl $dst, $src" %}
14176   ins_encode %{
14177     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14178   %}
14179   ins_pipe(pipe_slow);
14180 %}
14181 
14182 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14183 %{
14184   predicate(VM_Version::supports_avx10_2());
14185   match(Set dst (ConvF2I (LoadF src)));
14186   format %{ "evcvttss2sisl $dst, $src" %}
14187   ins_encode %{
14188     __ evcvttss2sisl($dst$$Register, $src$$Address);
14189   %}
14190   ins_pipe(pipe_slow);
14191 %}
14192 
14193 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14194 %{
14195   predicate(!VM_Version::supports_avx10_2());
14196   match(Set dst (ConvF2L src));
14197   effect(KILL cr);
14198   format %{ "convert_f2l $dst, $src"%}
14199   ins_encode %{
14200     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14201   %}
14202   ins_pipe(pipe_slow);
14203 %}
14204 
14205 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14206 %{
14207   predicate(VM_Version::supports_avx10_2());
14208   match(Set dst (ConvF2L src));
14209   format %{ "evcvttss2sisq $dst, $src" %}
14210   ins_encode %{
14211     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14212   %}
14213   ins_pipe(pipe_slow);
14214 %}
14215 
14216 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14217 %{
14218   predicate(VM_Version::supports_avx10_2());
14219   match(Set dst (ConvF2L (LoadF src)));
14220   format %{ "evcvttss2sisq $dst, $src" %}
14221   ins_encode %{
14222     __ evcvttss2sisq($dst$$Register, $src$$Address);
14223   %}
14224   ins_pipe(pipe_slow);
14225 %}
14226 
14227 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14228 %{
14229   predicate(!VM_Version::supports_avx10_2());
14230   match(Set dst (ConvD2I src));
14231   effect(KILL cr);
14232   format %{ "convert_d2i $dst, $src"%}
14233   ins_encode %{
14234     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14235   %}
14236   ins_pipe(pipe_slow);
14237 %}
14238 
14239 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14240 %{
14241   predicate(VM_Version::supports_avx10_2());
14242   match(Set dst (ConvD2I src));
14243   format %{ "evcvttsd2sisl $dst, $src" %}
14244   ins_encode %{
14245     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14246   %}
14247   ins_pipe(pipe_slow);
14248 %}
14249 
14250 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14251 %{
14252   predicate(VM_Version::supports_avx10_2());
14253   match(Set dst (ConvD2I (LoadD src)));
14254   format %{ "evcvttsd2sisl $dst, $src" %}
14255   ins_encode %{
14256     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14257   %}
14258   ins_pipe(pipe_slow);
14259 %}
14260 
14261 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14262 %{
14263   predicate(!VM_Version::supports_avx10_2());
14264   match(Set dst (ConvD2L src));
14265   effect(KILL cr);
14266   format %{ "convert_d2l $dst, $src"%}
14267   ins_encode %{
14268     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14269   %}
14270   ins_pipe(pipe_slow);
14271 %}
14272 
14273 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14274 %{
14275   predicate(VM_Version::supports_avx10_2());
14276   match(Set dst (ConvD2L src));
14277   format %{ "evcvttsd2sisq $dst, $src" %}
14278   ins_encode %{
14279     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14280   %}
14281   ins_pipe(pipe_slow);
14282 %}
14283 
14284 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14285 %{
14286   predicate(VM_Version::supports_avx10_2());
14287   match(Set dst (ConvD2L (LoadD src)));
14288   format %{ "evcvttsd2sisq $dst, $src" %}
14289   ins_encode %{
14290     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14291   %}
14292   ins_pipe(pipe_slow);
14293 %}
14294 
14295 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14296 %{
14297   match(Set dst (RoundD src));
14298   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14299   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14300   ins_encode %{
14301     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14302   %}
14303   ins_pipe(pipe_slow);
14304 %}
14305 
14306 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14307 %{
14308   match(Set dst (RoundF src));
14309   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14310   format %{ "round_float $dst,$src" %}
14311   ins_encode %{
14312     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14313   %}
14314   ins_pipe(pipe_slow);
14315 %}
14316 
14317 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14318 %{
14319   predicate(!UseXmmI2F);
14320   match(Set dst (ConvI2F src));
14321 
14322   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14323   ins_encode %{
14324     if (UseAVX > 0) {
14325       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14326     }
14327     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14328   %}
14329   ins_pipe(pipe_slow); // XXX
14330 %}
14331 
14332 instruct convI2F_reg_mem(regF dst, memory src)
14333 %{
14334   predicate(UseAVX == 0);
14335   match(Set dst (ConvI2F (LoadI src)));
14336 
14337   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14338   ins_encode %{
14339     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14340   %}
14341   ins_pipe(pipe_slow); // XXX
14342 %}
14343 
14344 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14345 %{
14346   predicate(!UseXmmI2D);
14347   match(Set dst (ConvI2D src));
14348 
14349   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14350   ins_encode %{
14351     if (UseAVX > 0) {
14352       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14353     }
14354     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14355   %}
14356   ins_pipe(pipe_slow); // XXX
14357 %}
14358 
14359 instruct convI2D_reg_mem(regD dst, memory src)
14360 %{
14361   predicate(UseAVX == 0);
14362   match(Set dst (ConvI2D (LoadI src)));
14363 
14364   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14365   ins_encode %{
14366     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14367   %}
14368   ins_pipe(pipe_slow); // XXX
14369 %}
14370 
14371 instruct convXI2F_reg(regF dst, rRegI src)
14372 %{
14373   predicate(UseXmmI2F);
14374   match(Set dst (ConvI2F src));
14375 
14376   format %{ "movdl $dst, $src\n\t"
14377             "cvtdq2psl $dst, $dst\t# i2f" %}
14378   ins_encode %{
14379     __ movdl($dst$$XMMRegister, $src$$Register);
14380     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14381   %}
14382   ins_pipe(pipe_slow); // XXX
14383 %}
14384 
14385 instruct convXI2D_reg(regD dst, rRegI src)
14386 %{
14387   predicate(UseXmmI2D);
14388   match(Set dst (ConvI2D src));
14389 
14390   format %{ "movdl $dst, $src\n\t"
14391             "cvtdq2pdl $dst, $dst\t# i2d" %}
14392   ins_encode %{
14393     __ movdl($dst$$XMMRegister, $src$$Register);
14394     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14395   %}
14396   ins_pipe(pipe_slow); // XXX
14397 %}
14398 
14399 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14400 %{
14401   match(Set dst (ConvL2F src));
14402 
14403   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14404   ins_encode %{
14405     if (UseAVX > 0) {
14406       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14407     }
14408     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14409   %}
14410   ins_pipe(pipe_slow); // XXX
14411 %}
14412 
14413 instruct convL2F_reg_mem(regF dst, memory src)
14414 %{
14415   predicate(UseAVX == 0);
14416   match(Set dst (ConvL2F (LoadL src)));
14417 
14418   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14419   ins_encode %{
14420     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14421   %}
14422   ins_pipe(pipe_slow); // XXX
14423 %}
14424 
14425 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14426 %{
14427   match(Set dst (ConvL2D src));
14428 
14429   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14430   ins_encode %{
14431     if (UseAVX > 0) {
14432       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14433     }
14434     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14435   %}
14436   ins_pipe(pipe_slow); // XXX
14437 %}
14438 
14439 instruct convL2D_reg_mem(regD dst, memory src)
14440 %{
14441   predicate(UseAVX == 0);
14442   match(Set dst (ConvL2D (LoadL src)));
14443 
14444   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14445   ins_encode %{
14446     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
14447   %}
14448   ins_pipe(pipe_slow); // XXX
14449 %}
14450 
14451 instruct convI2L_reg_reg(rRegL dst, rRegI src)
14452 %{
14453   match(Set dst (ConvI2L src));
14454 
14455   ins_cost(125);
14456   format %{ "movslq  $dst, $src\t# i2l" %}
14457   ins_encode %{
14458     __ movslq($dst$$Register, $src$$Register);
14459   %}
14460   ins_pipe(ialu_reg_reg);
14461 %}
14462 
14463 // Zero-extend convert int to long
14464 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
14465 %{
14466   match(Set dst (AndL (ConvI2L src) mask));
14467 
14468   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14469   ins_encode %{
14470     if ($dst$$reg != $src$$reg) {
14471       __ movl($dst$$Register, $src$$Register);
14472     }
14473   %}
14474   ins_pipe(ialu_reg_reg);
14475 %}
14476 
14477 // Zero-extend convert int to long
14478 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
14479 %{
14480   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
14481 
14482   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
14483   ins_encode %{
14484     __ movl($dst$$Register, $src$$Address);
14485   %}
14486   ins_pipe(ialu_reg_mem);
14487 %}
14488 
14489 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
14490 %{
14491   match(Set dst (AndL src mask));
14492 
14493   format %{ "movl    $dst, $src\t# zero-extend long" %}
14494   ins_encode %{
14495     __ movl($dst$$Register, $src$$Register);
14496   %}
14497   ins_pipe(ialu_reg_reg);
14498 %}
14499 
14500 instruct convL2I_reg_reg(rRegI dst, rRegL src)
14501 %{
14502   match(Set dst (ConvL2I src));
14503 
14504   format %{ "movl    $dst, $src\t# l2i" %}
14505   ins_encode %{
14506     __ movl($dst$$Register, $src$$Register);
14507   %}
14508   ins_pipe(ialu_reg_reg);
14509 %}
14510 
14511 
14512 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
14513   match(Set dst (MoveF2I src));
14514   effect(DEF dst, USE src);
14515 
14516   ins_cost(125);
14517   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
14518   ins_encode %{
14519     __ movl($dst$$Register, Address(rsp, $src$$disp));
14520   %}
14521   ins_pipe(ialu_reg_mem);
14522 %}
14523 
14524 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
14525   match(Set dst (MoveI2F src));
14526   effect(DEF dst, USE src);
14527 
14528   ins_cost(125);
14529   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
14530   ins_encode %{
14531     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
14532   %}
14533   ins_pipe(pipe_slow);
14534 %}
14535 
14536 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
14537   match(Set dst (MoveD2L src));
14538   effect(DEF dst, USE src);
14539 
14540   ins_cost(125);
14541   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
14542   ins_encode %{
14543     __ movq($dst$$Register, Address(rsp, $src$$disp));
14544   %}
14545   ins_pipe(ialu_reg_mem);
14546 %}
14547 
14548 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
14549   predicate(!UseXmmLoadAndClearUpper);
14550   match(Set dst (MoveL2D src));
14551   effect(DEF dst, USE src);
14552 
14553   ins_cost(125);
14554   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
14555   ins_encode %{
14556     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14557   %}
14558   ins_pipe(pipe_slow);
14559 %}
14560 
14561 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
14562   predicate(UseXmmLoadAndClearUpper);
14563   match(Set dst (MoveL2D src));
14564   effect(DEF dst, USE src);
14565 
14566   ins_cost(125);
14567   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
14568   ins_encode %{
14569     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
14570   %}
14571   ins_pipe(pipe_slow);
14572 %}
14573 
14574 
14575 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
14576   match(Set dst (MoveF2I src));
14577   effect(DEF dst, USE src);
14578 
14579   ins_cost(95); // XXX
14580   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
14581   ins_encode %{
14582     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
14583   %}
14584   ins_pipe(pipe_slow);
14585 %}
14586 
14587 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
14588   match(Set dst (MoveI2F src));
14589   effect(DEF dst, USE src);
14590 
14591   ins_cost(100);
14592   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
14593   ins_encode %{
14594     __ movl(Address(rsp, $dst$$disp), $src$$Register);
14595   %}
14596   ins_pipe( ialu_mem_reg );
14597 %}
14598 
14599 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
14600   match(Set dst (MoveD2L src));
14601   effect(DEF dst, USE src);
14602 
14603   ins_cost(95); // XXX
14604   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
14605   ins_encode %{
14606     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
14607   %}
14608   ins_pipe(pipe_slow);
14609 %}
14610 
14611 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
14612   match(Set dst (MoveL2D src));
14613   effect(DEF dst, USE src);
14614 
14615   ins_cost(100);
14616   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
14617   ins_encode %{
14618     __ movq(Address(rsp, $dst$$disp), $src$$Register);
14619   %}
14620   ins_pipe(ialu_mem_reg);
14621 %}
14622 
14623 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
14624   match(Set dst (MoveF2I src));
14625   effect(DEF dst, USE src);
14626   ins_cost(85);
14627   format %{ "movd    $dst,$src\t# MoveF2I" %}
14628   ins_encode %{
14629     __ movdl($dst$$Register, $src$$XMMRegister);
14630   %}
14631   ins_pipe( pipe_slow );
14632 %}
14633 
14634 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
14635   match(Set dst (MoveD2L src));
14636   effect(DEF dst, USE src);
14637   ins_cost(85);
14638   format %{ "movd    $dst,$src\t# MoveD2L" %}
14639   ins_encode %{
14640     __ movdq($dst$$Register, $src$$XMMRegister);
14641   %}
14642   ins_pipe( pipe_slow );
14643 %}
14644 
14645 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
14646   match(Set dst (MoveI2F src));
14647   effect(DEF dst, USE src);
14648   ins_cost(100);
14649   format %{ "movd    $dst,$src\t# MoveI2F" %}
14650   ins_encode %{
14651     __ movdl($dst$$XMMRegister, $src$$Register);
14652   %}
14653   ins_pipe( pipe_slow );
14654 %}
14655 
14656 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
14657   match(Set dst (MoveL2D src));
14658   effect(DEF dst, USE src);
14659   ins_cost(100);
14660   format %{ "movd    $dst,$src\t# MoveL2D" %}
14661   ins_encode %{
14662      __ movdq($dst$$XMMRegister, $src$$Register);
14663   %}
14664   ins_pipe( pipe_slow );
14665 %}
14666 
14667 // Fast clearing of an array
14668 // Small non-constant lenght ClearArray for non-AVX512 targets.
14669 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14670                   Universe dummy, rFlagsReg cr)
14671 %{
14672   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
14673   match(Set dummy (ClearArray cnt base));
14674   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14675 
14676   format %{ $$template
14677     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14678     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14679     $$emit$$"jg      LARGE\n\t"
14680     $$emit$$"dec     rcx\n\t"
14681     $$emit$$"js      DONE\t# Zero length\n\t"
14682     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14683     $$emit$$"dec     rcx\n\t"
14684     $$emit$$"jge     LOOP\n\t"
14685     $$emit$$"jmp     DONE\n\t"
14686     $$emit$$"# LARGE:\n\t"
14687     if (UseFastStosb) {
14688        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14689        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
14690     } else if (UseXMMForObjInit) {
14691        $$emit$$"mov     rdi,rax\n\t"
14692        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14693        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14694        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14695        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14696        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14697        $$emit$$"add     0x40,rax\n\t"
14698        $$emit$$"# L_zero_64_bytes:\n\t"
14699        $$emit$$"sub     0x8,rcx\n\t"
14700        $$emit$$"jge     L_loop\n\t"
14701        $$emit$$"add     0x4,rcx\n\t"
14702        $$emit$$"jl      L_tail\n\t"
14703        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14704        $$emit$$"add     0x20,rax\n\t"
14705        $$emit$$"sub     0x4,rcx\n\t"
14706        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14707        $$emit$$"add     0x4,rcx\n\t"
14708        $$emit$$"jle     L_end\n\t"
14709        $$emit$$"dec     rcx\n\t"
14710        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14711        $$emit$$"vmovq   xmm0,(rax)\n\t"
14712        $$emit$$"add     0x8,rax\n\t"
14713        $$emit$$"dec     rcx\n\t"
14714        $$emit$$"jge     L_sloop\n\t"
14715        $$emit$$"# L_end:\n\t"
14716     } else {
14717        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
14718     }
14719     $$emit$$"# DONE"
14720   %}
14721   ins_encode %{
14722     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14723                  $tmp$$XMMRegister, false, knoreg);
14724   %}
14725   ins_pipe(pipe_slow);
14726 %}
14727 
14728 // Small non-constant length ClearArray for AVX512 targets.
14729 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
14730                        Universe dummy, rFlagsReg cr)
14731 %{
14732   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
14733   match(Set dummy (ClearArray cnt base));
14734   ins_cost(125);
14735   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
14736 
14737   format %{ $$template
14738     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14739     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
14740     $$emit$$"jg      LARGE\n\t"
14741     $$emit$$"dec     rcx\n\t"
14742     $$emit$$"js      DONE\t# Zero length\n\t"
14743     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
14744     $$emit$$"dec     rcx\n\t"
14745     $$emit$$"jge     LOOP\n\t"
14746     $$emit$$"jmp     DONE\n\t"
14747     $$emit$$"# LARGE:\n\t"
14748     if (UseFastStosb) {
14749        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14750        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
14751     } else if (UseXMMForObjInit) {
14752        $$emit$$"mov     rdi,rax\n\t"
14753        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14754        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14755        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14756        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14757        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14758        $$emit$$"add     0x40,rax\n\t"
14759        $$emit$$"# L_zero_64_bytes:\n\t"
14760        $$emit$$"sub     0x8,rcx\n\t"
14761        $$emit$$"jge     L_loop\n\t"
14762        $$emit$$"add     0x4,rcx\n\t"
14763        $$emit$$"jl      L_tail\n\t"
14764        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14765        $$emit$$"add     0x20,rax\n\t"
14766        $$emit$$"sub     0x4,rcx\n\t"
14767        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14768        $$emit$$"add     0x4,rcx\n\t"
14769        $$emit$$"jle     L_end\n\t"
14770        $$emit$$"dec     rcx\n\t"
14771        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14772        $$emit$$"vmovq   xmm0,(rax)\n\t"
14773        $$emit$$"add     0x8,rax\n\t"
14774        $$emit$$"dec     rcx\n\t"
14775        $$emit$$"jge     L_sloop\n\t"
14776        $$emit$$"# L_end:\n\t"
14777     } else {
14778        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
14779     }
14780     $$emit$$"# DONE"
14781   %}
14782   ins_encode %{
14783     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14784                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
14785   %}
14786   ins_pipe(pipe_slow);
14787 %}
14788 
14789 // Large non-constant length ClearArray for non-AVX512 targets.
14790 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
14791                         Universe dummy, rFlagsReg cr)
14792 %{
14793   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
14794   match(Set dummy (ClearArray cnt base));
14795   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
14796 
14797   format %{ $$template
14798     if (UseFastStosb) {
14799        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14800        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14801        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
14802     } else if (UseXMMForObjInit) {
14803        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
14804        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14805        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14806        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14807        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14808        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14809        $$emit$$"add     0x40,rax\n\t"
14810        $$emit$$"# L_zero_64_bytes:\n\t"
14811        $$emit$$"sub     0x8,rcx\n\t"
14812        $$emit$$"jge     L_loop\n\t"
14813        $$emit$$"add     0x4,rcx\n\t"
14814        $$emit$$"jl      L_tail\n\t"
14815        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14816        $$emit$$"add     0x20,rax\n\t"
14817        $$emit$$"sub     0x4,rcx\n\t"
14818        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14819        $$emit$$"add     0x4,rcx\n\t"
14820        $$emit$$"jle     L_end\n\t"
14821        $$emit$$"dec     rcx\n\t"
14822        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14823        $$emit$$"vmovq   xmm0,(rax)\n\t"
14824        $$emit$$"add     0x8,rax\n\t"
14825        $$emit$$"dec     rcx\n\t"
14826        $$emit$$"jge     L_sloop\n\t"
14827        $$emit$$"# L_end:\n\t"
14828     } else {
14829        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14830        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
14831     }
14832   %}
14833   ins_encode %{
14834     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14835                  $tmp$$XMMRegister, true, knoreg);
14836   %}
14837   ins_pipe(pipe_slow);
14838 %}
14839 
14840 // Large non-constant length ClearArray for AVX512 targets.
14841 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
14842                              Universe dummy, rFlagsReg cr)
14843 %{
14844   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
14845   match(Set dummy (ClearArray cnt base));
14846   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
14847 
14848   format %{ $$template
14849     if (UseFastStosb) {
14850        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14851        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
14852        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
14853     } else if (UseXMMForObjInit) {
14854        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
14855        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
14856        $$emit$$"jmpq    L_zero_64_bytes\n\t"
14857        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
14858        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14859        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
14860        $$emit$$"add     0x40,rax\n\t"
14861        $$emit$$"# L_zero_64_bytes:\n\t"
14862        $$emit$$"sub     0x8,rcx\n\t"
14863        $$emit$$"jge     L_loop\n\t"
14864        $$emit$$"add     0x4,rcx\n\t"
14865        $$emit$$"jl      L_tail\n\t"
14866        $$emit$$"vmovdqu ymm0,(rax)\n\t"
14867        $$emit$$"add     0x20,rax\n\t"
14868        $$emit$$"sub     0x4,rcx\n\t"
14869        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
14870        $$emit$$"add     0x4,rcx\n\t"
14871        $$emit$$"jle     L_end\n\t"
14872        $$emit$$"dec     rcx\n\t"
14873        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
14874        $$emit$$"vmovq   xmm0,(rax)\n\t"
14875        $$emit$$"add     0x8,rax\n\t"
14876        $$emit$$"dec     rcx\n\t"
14877        $$emit$$"jge     L_sloop\n\t"
14878        $$emit$$"# L_end:\n\t"
14879     } else {
14880        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
14881        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
14882     }
14883   %}
14884   ins_encode %{
14885     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
14886                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
14887   %}
14888   ins_pipe(pipe_slow);
14889 %}
14890 
14891 // Small constant length ClearArray for AVX512 targets.
14892 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
14893 %{
14894   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
14895   match(Set dummy (ClearArray cnt base));
14896   ins_cost(100);
14897   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
14898   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
14899   ins_encode %{
14900    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
14901   %}
14902   ins_pipe(pipe_slow);
14903 %}
14904 
14905 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14906                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
14907 %{
14908   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14909   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14910   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14911 
14912   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
14913   ins_encode %{
14914     __ string_compare($str1$$Register, $str2$$Register,
14915                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14916                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
14917   %}
14918   ins_pipe( pipe_slow );
14919 %}
14920 
14921 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14922                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
14923 %{
14924   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14925   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14926   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14927 
14928   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
14929   ins_encode %{
14930     __ string_compare($str1$$Register, $str2$$Register,
14931                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14932                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
14933   %}
14934   ins_pipe( pipe_slow );
14935 %}
14936 
14937 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14938                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
14939 %{
14940   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14941   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14942   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14943 
14944   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
14945   ins_encode %{
14946     __ string_compare($str1$$Register, $str2$$Register,
14947                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14948                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
14949   %}
14950   ins_pipe( pipe_slow );
14951 %}
14952 
14953 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14954                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
14955 %{
14956   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14957   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14958   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14959 
14960   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
14961   ins_encode %{
14962     __ string_compare($str1$$Register, $str2$$Register,
14963                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14964                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
14965   %}
14966   ins_pipe( pipe_slow );
14967 %}
14968 
14969 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14970                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
14971 %{
14972   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14973   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14974   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14975 
14976   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
14977   ins_encode %{
14978     __ string_compare($str1$$Register, $str2$$Register,
14979                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14980                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
14981   %}
14982   ins_pipe( pipe_slow );
14983 %}
14984 
14985 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
14986                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
14987 %{
14988   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14989   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14990   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14991 
14992   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
14993   ins_encode %{
14994     __ string_compare($str1$$Register, $str2$$Register,
14995                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14996                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
14997   %}
14998   ins_pipe( pipe_slow );
14999 %}
15000 
15001 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15002                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15003 %{
15004   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15005   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15006   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15007 
15008   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15009   ins_encode %{
15010     __ string_compare($str2$$Register, $str1$$Register,
15011                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15012                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15013   %}
15014   ins_pipe( pipe_slow );
15015 %}
15016 
15017 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15018                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15019 %{
15020   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15021   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15022   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15023 
15024   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15025   ins_encode %{
15026     __ string_compare($str2$$Register, $str1$$Register,
15027                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15028                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15029   %}
15030   ins_pipe( pipe_slow );
15031 %}
15032 
15033 // fast search of substring with known size.
15034 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15035                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15036 %{
15037   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15038   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15039   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15040 
15041   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15042   ins_encode %{
15043     int icnt2 = (int)$int_cnt2$$constant;
15044     if (icnt2 >= 16) {
15045       // IndexOf for constant substrings with size >= 16 elements
15046       // which don't need to be loaded through stack.
15047       __ string_indexofC8($str1$$Register, $str2$$Register,
15048                           $cnt1$$Register, $cnt2$$Register,
15049                           icnt2, $result$$Register,
15050                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15051     } else {
15052       // Small strings are loaded through stack if they cross page boundary.
15053       __ string_indexof($str1$$Register, $str2$$Register,
15054                         $cnt1$$Register, $cnt2$$Register,
15055                         icnt2, $result$$Register,
15056                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15057     }
15058   %}
15059   ins_pipe( pipe_slow );
15060 %}
15061 
15062 // fast search of substring with known size.
15063 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15064                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15065 %{
15066   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15067   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15068   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15069 
15070   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15071   ins_encode %{
15072     int icnt2 = (int)$int_cnt2$$constant;
15073     if (icnt2 >= 8) {
15074       // IndexOf for constant substrings with size >= 8 elements
15075       // which don't need to be loaded through stack.
15076       __ string_indexofC8($str1$$Register, $str2$$Register,
15077                           $cnt1$$Register, $cnt2$$Register,
15078                           icnt2, $result$$Register,
15079                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15080     } else {
15081       // Small strings are loaded through stack if they cross page boundary.
15082       __ string_indexof($str1$$Register, $str2$$Register,
15083                         $cnt1$$Register, $cnt2$$Register,
15084                         icnt2, $result$$Register,
15085                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15086     }
15087   %}
15088   ins_pipe( pipe_slow );
15089 %}
15090 
15091 // fast search of substring with known size.
15092 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15093                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15094 %{
15095   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15096   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15097   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15098 
15099   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15100   ins_encode %{
15101     int icnt2 = (int)$int_cnt2$$constant;
15102     if (icnt2 >= 8) {
15103       // IndexOf for constant substrings with size >= 8 elements
15104       // which don't need to be loaded through stack.
15105       __ string_indexofC8($str1$$Register, $str2$$Register,
15106                           $cnt1$$Register, $cnt2$$Register,
15107                           icnt2, $result$$Register,
15108                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15109     } else {
15110       // Small strings are loaded through stack if they cross page boundary.
15111       __ string_indexof($str1$$Register, $str2$$Register,
15112                         $cnt1$$Register, $cnt2$$Register,
15113                         icnt2, $result$$Register,
15114                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15115     }
15116   %}
15117   ins_pipe( pipe_slow );
15118 %}
15119 
15120 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15121                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15122 %{
15123   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15124   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15125   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15126 
15127   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15128   ins_encode %{
15129     __ string_indexof($str1$$Register, $str2$$Register,
15130                       $cnt1$$Register, $cnt2$$Register,
15131                       (-1), $result$$Register,
15132                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15133   %}
15134   ins_pipe( pipe_slow );
15135 %}
15136 
15137 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15138                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15139 %{
15140   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15141   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15142   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15143 
15144   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15145   ins_encode %{
15146     __ string_indexof($str1$$Register, $str2$$Register,
15147                       $cnt1$$Register, $cnt2$$Register,
15148                       (-1), $result$$Register,
15149                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15150   %}
15151   ins_pipe( pipe_slow );
15152 %}
15153 
15154 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15155                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15156 %{
15157   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15158   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15159   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15160 
15161   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15162   ins_encode %{
15163     __ string_indexof($str1$$Register, $str2$$Register,
15164                       $cnt1$$Register, $cnt2$$Register,
15165                       (-1), $result$$Register,
15166                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15167   %}
15168   ins_pipe( pipe_slow );
15169 %}
15170 
15171 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15172                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15173 %{
15174   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15175   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15176   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15177   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15178   ins_encode %{
15179     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15180                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15181   %}
15182   ins_pipe( pipe_slow );
15183 %}
15184 
15185 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15186                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15187 %{
15188   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15189   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15190   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15191   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15192   ins_encode %{
15193     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15194                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15195   %}
15196   ins_pipe( pipe_slow );
15197 %}
15198 
15199 // fast string equals
15200 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15201                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15202 %{
15203   predicate(!VM_Version::supports_avx512vlbw());
15204   match(Set result (StrEquals (Binary str1 str2) cnt));
15205   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15206 
15207   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15208   ins_encode %{
15209     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15210                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15211                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15212   %}
15213   ins_pipe( pipe_slow );
15214 %}
15215 
15216 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15217                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15218 %{
15219   predicate(VM_Version::supports_avx512vlbw());
15220   match(Set result (StrEquals (Binary str1 str2) cnt));
15221   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15222 
15223   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15224   ins_encode %{
15225     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15226                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15227                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15228   %}
15229   ins_pipe( pipe_slow );
15230 %}
15231 
15232 // fast array equals
15233 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15234                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15235 %{
15236   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15237   match(Set result (AryEq ary1 ary2));
15238   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15239 
15240   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15241   ins_encode %{
15242     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15243                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15244                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15245   %}
15246   ins_pipe( pipe_slow );
15247 %}
15248 
15249 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15250                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15251 %{
15252   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15253   match(Set result (AryEq ary1 ary2));
15254   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15255 
15256   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15257   ins_encode %{
15258     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15259                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15260                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15261   %}
15262   ins_pipe( pipe_slow );
15263 %}
15264 
15265 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15266                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15267 %{
15268   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15269   match(Set result (AryEq ary1 ary2));
15270   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15271 
15272   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15273   ins_encode %{
15274     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15275                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15276                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15277   %}
15278   ins_pipe( pipe_slow );
15279 %}
15280 
15281 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15282                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15283 %{
15284   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15285   match(Set result (AryEq ary1 ary2));
15286   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15287 
15288   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15289   ins_encode %{
15290     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15291                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15292                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15293   %}
15294   ins_pipe( pipe_slow );
15295 %}
15296 
15297 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15298                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15299                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15300                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15301                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15302 %{
15303   predicate(UseAVX >= 2);
15304   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15305   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15306          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15307          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15308          USE basic_type, KILL cr);
15309 
15310   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15311   ins_encode %{
15312     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15313                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15314                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15315                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15316                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15317                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15318                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15319   %}
15320   ins_pipe( pipe_slow );
15321 %}
15322 
15323 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15324                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15325 %{
15326   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15327   match(Set result (CountPositives ary1 len));
15328   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15329 
15330   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15331   ins_encode %{
15332     __ count_positives($ary1$$Register, $len$$Register,
15333                        $result$$Register, $tmp3$$Register,
15334                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15335   %}
15336   ins_pipe( pipe_slow );
15337 %}
15338 
15339 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15340                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15341 %{
15342   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15343   match(Set result (CountPositives ary1 len));
15344   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15345 
15346   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15347   ins_encode %{
15348     __ count_positives($ary1$$Register, $len$$Register,
15349                        $result$$Register, $tmp3$$Register,
15350                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15351   %}
15352   ins_pipe( pipe_slow );
15353 %}
15354 
15355 // fast char[] to byte[] compression
15356 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15357                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15358   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15359   match(Set result (StrCompressedCopy src (Binary dst len)));
15360   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15361          USE_KILL len, KILL tmp5, KILL cr);
15362 
15363   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15364   ins_encode %{
15365     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15366                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15367                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15368                            knoreg, knoreg);
15369   %}
15370   ins_pipe( pipe_slow );
15371 %}
15372 
15373 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15374                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15375   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15376   match(Set result (StrCompressedCopy src (Binary dst len)));
15377   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15378          USE_KILL len, KILL tmp5, KILL cr);
15379 
15380   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15381   ins_encode %{
15382     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15383                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15384                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15385                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15386   %}
15387   ins_pipe( pipe_slow );
15388 %}
15389 // fast byte[] to char[] inflation
15390 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15391                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15392   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15393   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15394   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15395 
15396   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15397   ins_encode %{
15398     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15399                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15400   %}
15401   ins_pipe( pipe_slow );
15402 %}
15403 
15404 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15405                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15406   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15407   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15408   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15409 
15410   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15411   ins_encode %{
15412     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15413                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15414   %}
15415   ins_pipe( pipe_slow );
15416 %}
15417 
15418 // encode char[] to byte[] in ISO_8859_1
15419 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15420                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15421                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15422   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15423   match(Set result (EncodeISOArray src (Binary dst len)));
15424   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15425 
15426   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15427   ins_encode %{
15428     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15429                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15430                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15431   %}
15432   ins_pipe( pipe_slow );
15433 %}
15434 
15435 // encode char[] to byte[] in ASCII
15436 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15437                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15438                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15439   predicate(((EncodeISOArrayNode*)n)->is_ascii());
15440   match(Set result (EncodeISOArray src (Binary dst len)));
15441   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15442 
15443   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15444   ins_encode %{
15445     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15446                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15447                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
15448   %}
15449   ins_pipe( pipe_slow );
15450 %}
15451 
15452 //----------Overflow Math Instructions-----------------------------------------
15453 
15454 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15455 %{
15456   match(Set cr (OverflowAddI op1 op2));
15457   effect(DEF cr, USE_KILL op1, USE op2);
15458 
15459   format %{ "addl    $op1, $op2\t# overflow check int" %}
15460 
15461   ins_encode %{
15462     __ addl($op1$$Register, $op2$$Register);
15463   %}
15464   ins_pipe(ialu_reg_reg);
15465 %}
15466 
15467 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
15468 %{
15469   match(Set cr (OverflowAddI op1 op2));
15470   effect(DEF cr, USE_KILL op1, USE op2);
15471 
15472   format %{ "addl    $op1, $op2\t# overflow check int" %}
15473 
15474   ins_encode %{
15475     __ addl($op1$$Register, $op2$$constant);
15476   %}
15477   ins_pipe(ialu_reg_reg);
15478 %}
15479 
15480 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15481 %{
15482   match(Set cr (OverflowAddL op1 op2));
15483   effect(DEF cr, USE_KILL op1, USE op2);
15484 
15485   format %{ "addq    $op1, $op2\t# overflow check long" %}
15486   ins_encode %{
15487     __ addq($op1$$Register, $op2$$Register);
15488   %}
15489   ins_pipe(ialu_reg_reg);
15490 %}
15491 
15492 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
15493 %{
15494   match(Set cr (OverflowAddL op1 op2));
15495   effect(DEF cr, USE_KILL op1, USE op2);
15496 
15497   format %{ "addq    $op1, $op2\t# overflow check long" %}
15498   ins_encode %{
15499     __ addq($op1$$Register, $op2$$constant);
15500   %}
15501   ins_pipe(ialu_reg_reg);
15502 %}
15503 
15504 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15505 %{
15506   match(Set cr (OverflowSubI op1 op2));
15507 
15508   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15509   ins_encode %{
15510     __ cmpl($op1$$Register, $op2$$Register);
15511   %}
15512   ins_pipe(ialu_reg_reg);
15513 %}
15514 
15515 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15516 %{
15517   match(Set cr (OverflowSubI op1 op2));
15518 
15519   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
15520   ins_encode %{
15521     __ cmpl($op1$$Register, $op2$$constant);
15522   %}
15523   ins_pipe(ialu_reg_reg);
15524 %}
15525 
15526 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15527 %{
15528   match(Set cr (OverflowSubL op1 op2));
15529 
15530   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15531   ins_encode %{
15532     __ cmpq($op1$$Register, $op2$$Register);
15533   %}
15534   ins_pipe(ialu_reg_reg);
15535 %}
15536 
15537 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15538 %{
15539   match(Set cr (OverflowSubL op1 op2));
15540 
15541   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
15542   ins_encode %{
15543     __ cmpq($op1$$Register, $op2$$constant);
15544   %}
15545   ins_pipe(ialu_reg_reg);
15546 %}
15547 
15548 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
15549 %{
15550   match(Set cr (OverflowSubI zero op2));
15551   effect(DEF cr, USE_KILL op2);
15552 
15553   format %{ "negl    $op2\t# overflow check int" %}
15554   ins_encode %{
15555     __ negl($op2$$Register);
15556   %}
15557   ins_pipe(ialu_reg_reg);
15558 %}
15559 
15560 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
15561 %{
15562   match(Set cr (OverflowSubL zero op2));
15563   effect(DEF cr, USE_KILL op2);
15564 
15565   format %{ "negq    $op2\t# overflow check long" %}
15566   ins_encode %{
15567     __ negq($op2$$Register);
15568   %}
15569   ins_pipe(ialu_reg_reg);
15570 %}
15571 
15572 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
15573 %{
15574   match(Set cr (OverflowMulI op1 op2));
15575   effect(DEF cr, USE_KILL op1, USE op2);
15576 
15577   format %{ "imull    $op1, $op2\t# overflow check int" %}
15578   ins_encode %{
15579     __ imull($op1$$Register, $op2$$Register);
15580   %}
15581   ins_pipe(ialu_reg_reg_alu0);
15582 %}
15583 
15584 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
15585 %{
15586   match(Set cr (OverflowMulI op1 op2));
15587   effect(DEF cr, TEMP tmp, USE op1, USE op2);
15588 
15589   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
15590   ins_encode %{
15591     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
15592   %}
15593   ins_pipe(ialu_reg_reg_alu0);
15594 %}
15595 
15596 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
15597 %{
15598   match(Set cr (OverflowMulL op1 op2));
15599   effect(DEF cr, USE_KILL op1, USE op2);
15600 
15601   format %{ "imulq    $op1, $op2\t# overflow check long" %}
15602   ins_encode %{
15603     __ imulq($op1$$Register, $op2$$Register);
15604   %}
15605   ins_pipe(ialu_reg_reg_alu0);
15606 %}
15607 
15608 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
15609 %{
15610   match(Set cr (OverflowMulL op1 op2));
15611   effect(DEF cr, TEMP tmp, USE op1, USE op2);
15612 
15613   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
15614   ins_encode %{
15615     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
15616   %}
15617   ins_pipe(ialu_reg_reg_alu0);
15618 %}
15619 
15620 
15621 //----------Control Flow Instructions------------------------------------------
15622 // Signed compare Instructions
15623 
15624 // XXX more variants!!
15625 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
15626 %{
15627   match(Set cr (CmpI op1 op2));
15628   effect(DEF cr, USE op1, USE op2);
15629 
15630   format %{ "cmpl    $op1, $op2" %}
15631   ins_encode %{
15632     __ cmpl($op1$$Register, $op2$$Register);
15633   %}
15634   ins_pipe(ialu_cr_reg_reg);
15635 %}
15636 
15637 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
15638 %{
15639   match(Set cr (CmpI op1 op2));
15640 
15641   format %{ "cmpl    $op1, $op2" %}
15642   ins_encode %{
15643     __ cmpl($op1$$Register, $op2$$constant);
15644   %}
15645   ins_pipe(ialu_cr_reg_imm);
15646 %}
15647 
15648 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
15649 %{
15650   match(Set cr (CmpI op1 (LoadI op2)));
15651 
15652   ins_cost(500); // XXX
15653   format %{ "cmpl    $op1, $op2" %}
15654   ins_encode %{
15655     __ cmpl($op1$$Register, $op2$$Address);
15656   %}
15657   ins_pipe(ialu_cr_reg_mem);
15658 %}
15659 
15660 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
15661 %{
15662   match(Set cr (CmpI src zero));
15663 
15664   format %{ "testl   $src, $src" %}
15665   ins_encode %{
15666     __ testl($src$$Register, $src$$Register);
15667   %}
15668   ins_pipe(ialu_cr_reg_imm);
15669 %}
15670 
15671 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
15672 %{
15673   match(Set cr (CmpI (AndI src con) zero));
15674 
15675   format %{ "testl   $src, $con" %}
15676   ins_encode %{
15677     __ testl($src$$Register, $con$$constant);
15678   %}
15679   ins_pipe(ialu_cr_reg_imm);
15680 %}
15681 
15682 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
15683 %{
15684   match(Set cr (CmpI (AndI src1 src2) zero));
15685 
15686   format %{ "testl   $src1, $src2" %}
15687   ins_encode %{
15688     __ testl($src1$$Register, $src2$$Register);
15689   %}
15690   ins_pipe(ialu_cr_reg_imm);
15691 %}
15692 
15693 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
15694 %{
15695   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
15696 
15697   format %{ "testl   $src, $mem" %}
15698   ins_encode %{
15699     __ testl($src$$Register, $mem$$Address);
15700   %}
15701   ins_pipe(ialu_cr_reg_mem);
15702 %}
15703 
15704 // Unsigned compare Instructions; really, same as signed except they
15705 // produce an rFlagsRegU instead of rFlagsReg.
15706 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
15707 %{
15708   match(Set cr (CmpU op1 op2));
15709 
15710   format %{ "cmpl    $op1, $op2\t# unsigned" %}
15711   ins_encode %{
15712     __ cmpl($op1$$Register, $op2$$Register);
15713   %}
15714   ins_pipe(ialu_cr_reg_reg);
15715 %}
15716 
15717 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
15718 %{
15719   match(Set cr (CmpU op1 op2));
15720 
15721   format %{ "cmpl    $op1, $op2\t# unsigned" %}
15722   ins_encode %{
15723     __ cmpl($op1$$Register, $op2$$constant);
15724   %}
15725   ins_pipe(ialu_cr_reg_imm);
15726 %}
15727 
15728 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
15729 %{
15730   match(Set cr (CmpU op1 (LoadI op2)));
15731 
15732   ins_cost(500); // XXX
15733   format %{ "cmpl    $op1, $op2\t# unsigned" %}
15734   ins_encode %{
15735     __ cmpl($op1$$Register, $op2$$Address);
15736   %}
15737   ins_pipe(ialu_cr_reg_mem);
15738 %}
15739 
15740 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
15741 %{
15742   match(Set cr (CmpU src zero));
15743 
15744   format %{ "testl   $src, $src\t# unsigned" %}
15745   ins_encode %{
15746     __ testl($src$$Register, $src$$Register);
15747   %}
15748   ins_pipe(ialu_cr_reg_imm);
15749 %}
15750 
15751 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
15752 %{
15753   match(Set cr (CmpP op1 op2));
15754 
15755   format %{ "cmpq    $op1, $op2\t# ptr" %}
15756   ins_encode %{
15757     __ cmpq($op1$$Register, $op2$$Register);
15758   %}
15759   ins_pipe(ialu_cr_reg_reg);
15760 %}
15761 
15762 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
15763 %{
15764   match(Set cr (CmpP op1 (LoadP op2)));
15765   predicate(n->in(2)->as_Load()->barrier_data() == 0);
15766 
15767   ins_cost(500); // XXX
15768   format %{ "cmpq    $op1, $op2\t# ptr" %}
15769   ins_encode %{
15770     __ cmpq($op1$$Register, $op2$$Address);
15771   %}
15772   ins_pipe(ialu_cr_reg_mem);
15773 %}
15774 
15775 // XXX this is generalized by compP_rReg_mem???
15776 // Compare raw pointer (used in out-of-heap check).
15777 // Only works because non-oop pointers must be raw pointers
15778 // and raw pointers have no anti-dependencies.
15779 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
15780 %{
15781   predicate(n->in(2)->in(2)->bottom_type()->isa_rawptr() != nullptr &&
15782             n->in(2)->as_Load()->barrier_data() == 0);
15783   match(Set cr (CmpP op1 (LoadP op2)));
15784 
15785   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
15786   ins_encode %{
15787     __ cmpq($op1$$Register, $op2$$Address);
15788   %}
15789   ins_pipe(ialu_cr_reg_mem);
15790 %}
15791 
15792 // This will generate a signed flags result. This should be OK since
15793 // any compare to a zero should be eq/neq.
15794 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
15795 %{
15796   match(Set cr (CmpP src zero));
15797 
15798   format %{ "testq   $src, $src\t# ptr" %}
15799   ins_encode %{
15800     __ testq($src$$Register, $src$$Register);
15801   %}
15802   ins_pipe(ialu_cr_reg_imm);
15803 %}
15804 
15805 // This will generate a signed flags result. This should be OK since
15806 // any compare to a zero should be eq/neq.
15807 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
15808 %{
15809   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
15810             n->in(1)->as_Load()->barrier_data() == 0);
15811   match(Set cr (CmpP (LoadP op) zero));
15812 
15813   ins_cost(500); // XXX
15814   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
15815   ins_encode %{
15816     __ testq($op$$Address, 0xFFFFFFFF);
15817   %}
15818   ins_pipe(ialu_cr_reg_imm);
15819 %}
15820 
15821 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
15822 %{
15823   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
15824             n->in(1)->as_Load()->barrier_data() == 0);
15825   match(Set cr (CmpP (LoadP mem) zero));
15826 
15827   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
15828   ins_encode %{
15829     __ cmpq(r12, $mem$$Address);
15830   %}
15831   ins_pipe(ialu_cr_reg_mem);
15832 %}
15833 
15834 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
15835 %{
15836   match(Set cr (CmpN op1 op2));
15837 
15838   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
15839   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
15840   ins_pipe(ialu_cr_reg_reg);
15841 %}
15842 
15843 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
15844 %{
15845   predicate(n->in(2)->as_Load()->barrier_data() == 0);
15846   match(Set cr (CmpN src (LoadN mem)));
15847 
15848   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
15849   ins_encode %{
15850     __ cmpl($src$$Register, $mem$$Address);
15851   %}
15852   ins_pipe(ialu_cr_reg_mem);
15853 %}
15854 
15855 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
15856   match(Set cr (CmpN op1 op2));
15857 
15858   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
15859   ins_encode %{
15860     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
15861   %}
15862   ins_pipe(ialu_cr_reg_imm);
15863 %}
15864 
15865 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
15866 %{
15867   predicate(n->in(2)->as_Load()->barrier_data() == 0);
15868   match(Set cr (CmpN src (LoadN mem)));
15869 
15870   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
15871   ins_encode %{
15872     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
15873   %}
15874   ins_pipe(ialu_cr_reg_mem);
15875 %}
15876 
15877 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
15878   match(Set cr (CmpN op1 op2));
15879 
15880   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
15881   ins_encode %{
15882     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
15883   %}
15884   ins_pipe(ialu_cr_reg_imm);
15885 %}
15886 
15887 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
15888 %{
15889   predicate(!UseCompactObjectHeaders);
15890   match(Set cr (CmpN src (LoadNKlass mem)));
15891 
15892   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
15893   ins_encode %{
15894     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
15895   %}
15896   ins_pipe(ialu_cr_reg_mem);
15897 %}
15898 
15899 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
15900   match(Set cr (CmpN src zero));
15901 
15902   format %{ "testl   $src, $src\t# compressed ptr" %}
15903   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
15904   ins_pipe(ialu_cr_reg_imm);
15905 %}
15906 
15907 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
15908 %{
15909   predicate(CompressedOops::base() != nullptr &&
15910             n->in(1)->as_Load()->barrier_data() == 0);
15911   match(Set cr (CmpN (LoadN mem) zero));
15912 
15913   ins_cost(500); // XXX
15914   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
15915   ins_encode %{
15916     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
15917   %}
15918   ins_pipe(ialu_cr_reg_mem);
15919 %}
15920 
15921 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
15922 %{
15923   predicate(CompressedOops::base() == nullptr &&
15924             n->in(1)->as_Load()->barrier_data() == 0);
15925   match(Set cr (CmpN (LoadN mem) zero));
15926 
15927   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
15928   ins_encode %{
15929     __ cmpl(r12, $mem$$Address);
15930   %}
15931   ins_pipe(ialu_cr_reg_mem);
15932 %}
15933 
15934 // Yanked all unsigned pointer compare operations.
15935 // Pointer compares are done with CmpP which is already unsigned.
15936 
15937 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
15938 %{
15939   match(Set cr (CmpL op1 op2));
15940 
15941   format %{ "cmpq    $op1, $op2" %}
15942   ins_encode %{
15943     __ cmpq($op1$$Register, $op2$$Register);
15944   %}
15945   ins_pipe(ialu_cr_reg_reg);
15946 %}
15947 
15948 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
15949 %{
15950   match(Set cr (CmpL op1 op2));
15951 
15952   format %{ "cmpq    $op1, $op2" %}
15953   ins_encode %{
15954     __ cmpq($op1$$Register, $op2$$constant);
15955   %}
15956   ins_pipe(ialu_cr_reg_imm);
15957 %}
15958 
15959 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
15960 %{
15961   match(Set cr (CmpL op1 (LoadL op2)));
15962 
15963   format %{ "cmpq    $op1, $op2" %}
15964   ins_encode %{
15965     __ cmpq($op1$$Register, $op2$$Address);
15966   %}
15967   ins_pipe(ialu_cr_reg_mem);
15968 %}
15969 
15970 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
15971 %{
15972   match(Set cr (CmpL src zero));
15973 
15974   format %{ "testq   $src, $src" %}
15975   ins_encode %{
15976     __ testq($src$$Register, $src$$Register);
15977   %}
15978   ins_pipe(ialu_cr_reg_imm);
15979 %}
15980 
15981 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
15982 %{
15983   match(Set cr (CmpL (AndL src con) zero));
15984 
15985   format %{ "testq   $src, $con\t# long" %}
15986   ins_encode %{
15987     __ testq($src$$Register, $con$$constant);
15988   %}
15989   ins_pipe(ialu_cr_reg_imm);
15990 %}
15991 
15992 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
15993 %{
15994   match(Set cr (CmpL (AndL src1 src2) zero));
15995 
15996   format %{ "testq   $src1, $src2\t# long" %}
15997   ins_encode %{
15998     __ testq($src1$$Register, $src2$$Register);
15999   %}
16000   ins_pipe(ialu_cr_reg_imm);
16001 %}
16002 
16003 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16004 %{
16005   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16006 
16007   format %{ "testq   $src, $mem" %}
16008   ins_encode %{
16009     __ testq($src$$Register, $mem$$Address);
16010   %}
16011   ins_pipe(ialu_cr_reg_mem);
16012 %}
16013 
16014 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16015 %{
16016   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16017 
16018   format %{ "testq   $src, $mem" %}
16019   ins_encode %{
16020     __ testq($src$$Register, $mem$$Address);
16021   %}
16022   ins_pipe(ialu_cr_reg_mem);
16023 %}
16024 
16025 // Manifest a CmpU result in an integer register.  Very painful.
16026 // This is the test to avoid.
16027 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16028 %{
16029   match(Set dst (CmpU3 src1 src2));
16030   effect(KILL flags);
16031 
16032   ins_cost(275); // XXX
16033   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16034             "movl    $dst, -1\n\t"
16035             "jb,u    done\n\t"
16036             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16037     "done:" %}
16038   ins_encode %{
16039     Label done;
16040     __ cmpl($src1$$Register, $src2$$Register);
16041     __ movl($dst$$Register, -1);
16042     __ jccb(Assembler::below, done);
16043     __ setcc(Assembler::notZero, $dst$$Register);
16044     __ bind(done);
16045   %}
16046   ins_pipe(pipe_slow);
16047 %}
16048 
16049 // Manifest a CmpL result in an integer register.  Very painful.
16050 // This is the test to avoid.
16051 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16052 %{
16053   match(Set dst (CmpL3 src1 src2));
16054   effect(KILL flags);
16055 
16056   ins_cost(275); // XXX
16057   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16058             "movl    $dst, -1\n\t"
16059             "jl,s    done\n\t"
16060             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16061     "done:" %}
16062   ins_encode %{
16063     Label done;
16064     __ cmpq($src1$$Register, $src2$$Register);
16065     __ movl($dst$$Register, -1);
16066     __ jccb(Assembler::less, done);
16067     __ setcc(Assembler::notZero, $dst$$Register);
16068     __ bind(done);
16069   %}
16070   ins_pipe(pipe_slow);
16071 %}
16072 
16073 // Manifest a CmpUL result in an integer register.  Very painful.
16074 // This is the test to avoid.
16075 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16076 %{
16077   match(Set dst (CmpUL3 src1 src2));
16078   effect(KILL flags);
16079 
16080   ins_cost(275); // XXX
16081   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16082             "movl    $dst, -1\n\t"
16083             "jb,u    done\n\t"
16084             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16085     "done:" %}
16086   ins_encode %{
16087     Label done;
16088     __ cmpq($src1$$Register, $src2$$Register);
16089     __ movl($dst$$Register, -1);
16090     __ jccb(Assembler::below, done);
16091     __ setcc(Assembler::notZero, $dst$$Register);
16092     __ bind(done);
16093   %}
16094   ins_pipe(pipe_slow);
16095 %}
16096 
16097 // Unsigned long compare Instructions; really, same as signed long except they
16098 // produce an rFlagsRegU instead of rFlagsReg.
16099 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16100 %{
16101   match(Set cr (CmpUL op1 op2));
16102 
16103   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16104   ins_encode %{
16105     __ cmpq($op1$$Register, $op2$$Register);
16106   %}
16107   ins_pipe(ialu_cr_reg_reg);
16108 %}
16109 
16110 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16111 %{
16112   match(Set cr (CmpUL op1 op2));
16113 
16114   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16115   ins_encode %{
16116     __ cmpq($op1$$Register, $op2$$constant);
16117   %}
16118   ins_pipe(ialu_cr_reg_imm);
16119 %}
16120 
16121 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16122 %{
16123   match(Set cr (CmpUL op1 (LoadL op2)));
16124 
16125   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16126   ins_encode %{
16127     __ cmpq($op1$$Register, $op2$$Address);
16128   %}
16129   ins_pipe(ialu_cr_reg_mem);
16130 %}
16131 
16132 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16133 %{
16134   match(Set cr (CmpUL src zero));
16135 
16136   format %{ "testq   $src, $src\t# unsigned" %}
16137   ins_encode %{
16138     __ testq($src$$Register, $src$$Register);
16139   %}
16140   ins_pipe(ialu_cr_reg_imm);
16141 %}
16142 
16143 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16144 %{
16145   match(Set cr (CmpI (LoadB mem) imm));
16146 
16147   ins_cost(125);
16148   format %{ "cmpb    $mem, $imm" %}
16149   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16150   ins_pipe(ialu_cr_reg_mem);
16151 %}
16152 
16153 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16154 %{
16155   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16156 
16157   ins_cost(125);
16158   format %{ "testb   $mem, $imm\t# ubyte" %}
16159   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16160   ins_pipe(ialu_cr_reg_mem);
16161 %}
16162 
16163 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16164 %{
16165   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16166 
16167   ins_cost(125);
16168   format %{ "testb   $mem, $imm\t# byte" %}
16169   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16170   ins_pipe(ialu_cr_reg_mem);
16171 %}
16172 
16173 //----------Max and Min--------------------------------------------------------
16174 // Min Instructions
16175 
16176 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16177 %{
16178   predicate(!UseAPX);
16179   effect(USE_DEF dst, USE src, USE cr);
16180 
16181   format %{ "cmovlgt $dst, $src\t# min" %}
16182   ins_encode %{
16183     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16184   %}
16185   ins_pipe(pipe_cmov_reg);
16186 %}
16187 
16188 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16189 %{
16190   predicate(UseAPX);
16191   effect(DEF dst, USE src1, USE src2, USE cr);
16192 
16193   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16194   ins_encode %{
16195     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16196   %}
16197   ins_pipe(pipe_cmov_reg);
16198 %}
16199 
16200 instruct minI_rReg(rRegI dst, rRegI src)
16201 %{
16202   predicate(!UseAPX);
16203   match(Set dst (MinI dst src));
16204 
16205   ins_cost(200);
16206   expand %{
16207     rFlagsReg cr;
16208     compI_rReg(cr, dst, src);
16209     cmovI_reg_g(dst, src, cr);
16210   %}
16211 %}
16212 
16213 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16214 %{
16215   predicate(UseAPX);
16216   match(Set dst (MinI src1 src2));
16217   effect(DEF dst, USE src1, USE src2);
16218   flag(PD::Flag_ndd_demotable_opr1);
16219 
16220   ins_cost(200);
16221   expand %{
16222     rFlagsReg cr;
16223     compI_rReg(cr, src1, src2);
16224     cmovI_reg_g_ndd(dst, src1, src2, cr);
16225   %}
16226 %}
16227 
16228 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16229 %{
16230   predicate(!UseAPX);
16231   effect(USE_DEF dst, USE src, USE cr);
16232 
16233   format %{ "cmovllt $dst, $src\t# max" %}
16234   ins_encode %{
16235     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16236   %}
16237   ins_pipe(pipe_cmov_reg);
16238 %}
16239 
16240 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16241 %{
16242   predicate(UseAPX);
16243   effect(DEF dst, USE src1, USE src2, USE cr);
16244 
16245   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16246   ins_encode %{
16247     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16248   %}
16249   ins_pipe(pipe_cmov_reg);
16250 %}
16251 
16252 instruct maxI_rReg(rRegI dst, rRegI src)
16253 %{
16254   predicate(!UseAPX);
16255   match(Set dst (MaxI dst src));
16256 
16257   ins_cost(200);
16258   expand %{
16259     rFlagsReg cr;
16260     compI_rReg(cr, dst, src);
16261     cmovI_reg_l(dst, src, cr);
16262   %}
16263 %}
16264 
16265 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16266 %{
16267   predicate(UseAPX);
16268   match(Set dst (MaxI src1 src2));
16269   effect(DEF dst, USE src1, USE src2);
16270   flag(PD::Flag_ndd_demotable_opr1);
16271 
16272   ins_cost(200);
16273   expand %{
16274     rFlagsReg cr;
16275     compI_rReg(cr, src1, src2);
16276     cmovI_reg_l_ndd(dst, src1, src2, cr);
16277   %}
16278 %}
16279 
16280 // ============================================================================
16281 // Branch Instructions
16282 
16283 // Jump Direct - Label defines a relative address from JMP+1
16284 instruct jmpDir(label labl)
16285 %{
16286   match(Goto);
16287   effect(USE labl);
16288 
16289   ins_cost(300);
16290   format %{ "jmp     $labl" %}
16291   size(5);
16292   ins_encode %{
16293     Label* L = $labl$$label;
16294     __ jmp(*L, false); // Always long jump
16295   %}
16296   ins_pipe(pipe_jmp);
16297 %}
16298 
16299 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16300 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16301 %{
16302   match(If cop cr);
16303   effect(USE labl);
16304 
16305   ins_cost(300);
16306   format %{ "j$cop     $labl" %}
16307   size(6);
16308   ins_encode %{
16309     Label* L = $labl$$label;
16310     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16311   %}
16312   ins_pipe(pipe_jcc);
16313 %}
16314 
16315 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16316 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16317 %{
16318   match(CountedLoopEnd cop cr);
16319   effect(USE labl);
16320 
16321   ins_cost(300);
16322   format %{ "j$cop     $labl\t# loop end" %}
16323   size(6);
16324   ins_encode %{
16325     Label* L = $labl$$label;
16326     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16327   %}
16328   ins_pipe(pipe_jcc);
16329 %}
16330 
16331 // Jump Direct Conditional - using unsigned comparison
16332 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16333   match(If cop cmp);
16334   effect(USE labl);
16335 
16336   ins_cost(300);
16337   format %{ "j$cop,u   $labl" %}
16338   size(6);
16339   ins_encode %{
16340     Label* L = $labl$$label;
16341     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16342   %}
16343   ins_pipe(pipe_jcc);
16344 %}
16345 
16346 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16347   match(If cop cmp);
16348   effect(USE labl);
16349 
16350   ins_cost(200);
16351   format %{ "j$cop,u   $labl" %}
16352   size(6);
16353   ins_encode %{
16354     Label* L = $labl$$label;
16355     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16356   %}
16357   ins_pipe(pipe_jcc);
16358 %}
16359 
16360 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16361   match(If cop cmp);
16362   effect(USE labl);
16363 
16364   ins_cost(200);
16365   format %{ $$template
16366     if ($cop$$cmpcode == Assembler::notEqual) {
16367       $$emit$$"jp,u    $labl\n\t"
16368       $$emit$$"j$cop,u   $labl"
16369     } else {
16370       $$emit$$"jp,u    done\n\t"
16371       $$emit$$"j$cop,u   $labl\n\t"
16372       $$emit$$"done:"
16373     }
16374   %}
16375   ins_encode %{
16376     Label* l = $labl$$label;
16377     if ($cop$$cmpcode == Assembler::notEqual) {
16378       __ jcc(Assembler::parity, *l, false);
16379       __ jcc(Assembler::notEqual, *l, false);
16380     } else if ($cop$$cmpcode == Assembler::equal) {
16381       Label done;
16382       __ jccb(Assembler::parity, done);
16383       __ jcc(Assembler::equal, *l, false);
16384       __ bind(done);
16385     } else {
16386        ShouldNotReachHere();
16387     }
16388   %}
16389   ins_pipe(pipe_jcc);
16390 %}
16391 
16392 // Jump Direct Conditional - using signed and unsigned comparison
16393 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16394   match(If cop cmp);
16395   effect(USE labl);
16396 
16397   ins_cost(200);
16398   format %{ "j$cop,su   $labl" %}
16399   size(6);
16400   ins_encode %{
16401     Label* L = $labl$$label;
16402     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16403   %}
16404   ins_pipe(pipe_jcc);
16405 %}
16406 
16407 // ============================================================================
16408 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16409 // superklass array for an instance of the superklass.  Set a hidden
16410 // internal cache on a hit (cache is checked with exposed code in
16411 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16412 // encoding ALSO sets flags.
16413 
16414 instruct partialSubtypeCheck(rdi_RegP result,
16415                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16416                              rFlagsReg cr)
16417 %{
16418   match(Set result (PartialSubtypeCheck sub super));
16419   predicate(!UseSecondarySupersTable);
16420   effect(KILL rcx, KILL cr);
16421 
16422   ins_cost(1100);  // slightly larger than the next version
16423   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16424             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16425             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16426             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16427             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16428             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16429             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16430     "miss:\t" %}
16431 
16432   ins_encode %{
16433     Label miss;
16434     // NB: Callers may assume that, when $result is a valid register,
16435     // check_klass_subtype_slow_path_linear sets it to a nonzero
16436     // value.
16437     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
16438                                             $rcx$$Register, $result$$Register,
16439                                             nullptr, &miss,
16440                                             /*set_cond_codes:*/ true);
16441     __ xorptr($result$$Register, $result$$Register);
16442     __ bind(miss);
16443   %}
16444 
16445   ins_pipe(pipe_slow);
16446 %}
16447 
16448 // ============================================================================
16449 // Two versions of hashtable-based partialSubtypeCheck, both used when
16450 // we need to search for a super class in the secondary supers array.
16451 // The first is used when we don't know _a priori_ the class being
16452 // searched for. The second, far more common, is used when we do know:
16453 // this is used for instanceof, checkcast, and any case where C2 can
16454 // determine it by constant propagation.
16455 
16456 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
16457                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16458                                        rFlagsReg cr)
16459 %{
16460   match(Set result (PartialSubtypeCheck sub super));
16461   predicate(UseSecondarySupersTable);
16462   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16463 
16464   ins_cost(1000);
16465   format %{ "partialSubtypeCheck $result, $sub, $super" %}
16466 
16467   ins_encode %{
16468     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
16469 					 $temp3$$Register, $temp4$$Register, $result$$Register);
16470   %}
16471 
16472   ins_pipe(pipe_slow);
16473 %}
16474 
16475 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
16476                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
16477                                        rFlagsReg cr)
16478 %{
16479   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
16480   predicate(UseSecondarySupersTable);
16481   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
16482 
16483   ins_cost(700);  // smaller than the next version
16484   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
16485 
16486   ins_encode %{
16487     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
16488     if (InlineSecondarySupersTest) {
16489       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
16490                                        $temp3$$Register, $temp4$$Register, $result$$Register,
16491                                        super_klass_slot);
16492     } else {
16493       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
16494     }
16495   %}
16496 
16497   ins_pipe(pipe_slow);
16498 %}
16499 
16500 // ============================================================================
16501 // Branch Instructions -- short offset versions
16502 //
16503 // These instructions are used to replace jumps of a long offset (the default
16504 // match) with jumps of a shorter offset.  These instructions are all tagged
16505 // with the ins_short_branch attribute, which causes the ADLC to suppress the
16506 // match rules in general matching.  Instead, the ADLC generates a conversion
16507 // method in the MachNode which can be used to do in-place replacement of the
16508 // long variant with the shorter variant.  The compiler will determine if a
16509 // branch can be taken by the is_short_branch_offset() predicate in the machine
16510 // specific code section of the file.
16511 
16512 // Jump Direct - Label defines a relative address from JMP+1
16513 instruct jmpDir_short(label labl) %{
16514   match(Goto);
16515   effect(USE labl);
16516 
16517   ins_cost(300);
16518   format %{ "jmp,s   $labl" %}
16519   size(2);
16520   ins_encode %{
16521     Label* L = $labl$$label;
16522     __ jmpb(*L);
16523   %}
16524   ins_pipe(pipe_jmp);
16525   ins_short_branch(1);
16526 %}
16527 
16528 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16529 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
16530   match(If cop cr);
16531   effect(USE labl);
16532 
16533   ins_cost(300);
16534   format %{ "j$cop,s   $labl" %}
16535   size(2);
16536   ins_encode %{
16537     Label* L = $labl$$label;
16538     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16539   %}
16540   ins_pipe(pipe_jcc);
16541   ins_short_branch(1);
16542 %}
16543 
16544 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16545 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
16546   match(CountedLoopEnd cop cr);
16547   effect(USE labl);
16548 
16549   ins_cost(300);
16550   format %{ "j$cop,s   $labl\t# loop end" %}
16551   size(2);
16552   ins_encode %{
16553     Label* L = $labl$$label;
16554     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16555   %}
16556   ins_pipe(pipe_jcc);
16557   ins_short_branch(1);
16558 %}
16559 
16560 // Jump Direct Conditional - using unsigned comparison
16561 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16562   match(If cop cmp);
16563   effect(USE labl);
16564 
16565   ins_cost(300);
16566   format %{ "j$cop,us  $labl" %}
16567   size(2);
16568   ins_encode %{
16569     Label* L = $labl$$label;
16570     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16571   %}
16572   ins_pipe(pipe_jcc);
16573   ins_short_branch(1);
16574 %}
16575 
16576 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16577   match(If cop cmp);
16578   effect(USE labl);
16579 
16580   ins_cost(300);
16581   format %{ "j$cop,us  $labl" %}
16582   size(2);
16583   ins_encode %{
16584     Label* L = $labl$$label;
16585     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16586   %}
16587   ins_pipe(pipe_jcc);
16588   ins_short_branch(1);
16589 %}
16590 
16591 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16592   match(If cop cmp);
16593   effect(USE labl);
16594 
16595   ins_cost(300);
16596   format %{ $$template
16597     if ($cop$$cmpcode == Assembler::notEqual) {
16598       $$emit$$"jp,u,s  $labl\n\t"
16599       $$emit$$"j$cop,u,s  $labl"
16600     } else {
16601       $$emit$$"jp,u,s  done\n\t"
16602       $$emit$$"j$cop,u,s  $labl\n\t"
16603       $$emit$$"done:"
16604     }
16605   %}
16606   size(4);
16607   ins_encode %{
16608     Label* l = $labl$$label;
16609     if ($cop$$cmpcode == Assembler::notEqual) {
16610       __ jccb(Assembler::parity, *l);
16611       __ jccb(Assembler::notEqual, *l);
16612     } else if ($cop$$cmpcode == Assembler::equal) {
16613       Label done;
16614       __ jccb(Assembler::parity, done);
16615       __ jccb(Assembler::equal, *l);
16616       __ bind(done);
16617     } else {
16618        ShouldNotReachHere();
16619     }
16620   %}
16621   ins_pipe(pipe_jcc);
16622   ins_short_branch(1);
16623 %}
16624 
16625 // Jump Direct Conditional - using signed and unsigned comparison
16626 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16627   match(If cop cmp);
16628   effect(USE labl);
16629 
16630   ins_cost(300);
16631   format %{ "j$cop,sus  $labl" %}
16632   size(2);
16633   ins_encode %{
16634     Label* L = $labl$$label;
16635     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
16636   %}
16637   ins_pipe(pipe_jcc);
16638   ins_short_branch(1);
16639 %}
16640 
16641 // ============================================================================
16642 // inlined locking and unlocking
16643 
16644 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
16645   match(Set cr (FastLock object box));
16646   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
16647   ins_cost(300);
16648   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
16649   ins_encode %{
16650     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16651   %}
16652   ins_pipe(pipe_slow);
16653 %}
16654 
16655 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
16656   match(Set cr (FastUnlock object rax_reg));
16657   effect(TEMP tmp, USE_KILL rax_reg);
16658   ins_cost(300);
16659   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
16660   ins_encode %{
16661     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
16662   %}
16663   ins_pipe(pipe_slow);
16664 %}
16665 
16666 
16667 // ============================================================================
16668 // Safepoint Instructions
16669 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
16670 %{
16671   match(SafePoint poll);
16672   effect(KILL cr, USE poll);
16673 
16674   format %{ "testl   rax, [$poll]\t"
16675             "# Safepoint: poll for GC" %}
16676   ins_cost(125);
16677   ins_encode %{
16678     __ relocate(relocInfo::poll_type);
16679     address pre_pc = __ pc();
16680     __ testl(rax, Address($poll$$Register, 0));
16681     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
16682   %}
16683   ins_pipe(ialu_reg_mem);
16684 %}
16685 
16686 instruct mask_all_evexL(kReg dst, rRegL src) %{
16687   match(Set dst (MaskAll src));
16688   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
16689   ins_encode %{
16690     int mask_len = Matcher::vector_length(this);
16691     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
16692   %}
16693   ins_pipe( pipe_slow );
16694 %}
16695 
16696 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
16697   predicate(Matcher::vector_length(n) > 32);
16698   match(Set dst (MaskAll src));
16699   effect(TEMP tmp);
16700   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
16701   ins_encode %{
16702     int mask_len = Matcher::vector_length(this);
16703     __ movslq($tmp$$Register, $src$$Register);
16704     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
16705   %}
16706   ins_pipe( pipe_slow );
16707 %}
16708 
16709 // ============================================================================
16710 // Procedure Call/Return Instructions
16711 // Call Java Static Instruction
16712 // Note: If this code changes, the corresponding ret_addr_offset() and
16713 //       compute_padding() functions will have to be adjusted.
16714 instruct CallStaticJavaDirect(method meth) %{
16715   match(CallStaticJava);
16716   effect(USE meth);
16717 
16718   ins_cost(300);
16719   format %{ "call,static " %}
16720   opcode(0xE8); /* E8 cd */
16721   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
16722   ins_pipe(pipe_slow);
16723   ins_alignment(4);
16724 %}
16725 
16726 // Call Java Dynamic Instruction
16727 // Note: If this code changes, the corresponding ret_addr_offset() and
16728 //       compute_padding() functions will have to be adjusted.
16729 instruct CallDynamicJavaDirect(method meth)
16730 %{
16731   match(CallDynamicJava);
16732   effect(USE meth);
16733 
16734   ins_cost(300);
16735   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
16736             "call,dynamic " %}
16737   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
16738   ins_pipe(pipe_slow);
16739   ins_alignment(4);
16740 %}
16741 
16742 // Call Runtime Instruction
16743 instruct CallRuntimeDirect(method meth)
16744 %{
16745   match(CallRuntime);
16746   effect(USE meth);
16747 
16748   ins_cost(300);
16749   format %{ "call,runtime " %}
16750   ins_encode(clear_avx, Java_To_Runtime(meth));
16751   ins_pipe(pipe_slow);
16752 %}
16753 
16754 // Call runtime without safepoint
16755 instruct CallLeafDirect(method meth)
16756 %{
16757   match(CallLeaf);
16758   effect(USE meth);
16759 
16760   ins_cost(300);
16761   format %{ "call_leaf,runtime " %}
16762   ins_encode(clear_avx, Java_To_Runtime(meth));
16763   ins_pipe(pipe_slow);
16764 %}
16765 
16766 // Call runtime without safepoint and with vector arguments
16767 instruct CallLeafDirectVector(method meth)
16768 %{
16769   match(CallLeafVector);
16770   effect(USE meth);
16771 
16772   ins_cost(300);
16773   format %{ "call_leaf,vector " %}
16774   ins_encode(Java_To_Runtime(meth));
16775   ins_pipe(pipe_slow);
16776 %}
16777 
16778 // Call runtime without safepoint
16779 instruct CallLeafNoFPDirect(method meth)
16780 %{
16781   match(CallLeafNoFP);
16782   effect(USE meth);
16783 
16784   ins_cost(300);
16785   format %{ "call_leaf_nofp,runtime " %}
16786   ins_encode(clear_avx, Java_To_Runtime(meth));
16787   ins_pipe(pipe_slow);
16788 %}
16789 
16790 // Return Instruction
16791 // Remove the return address & jump to it.
16792 // Notice: We always emit a nop after a ret to make sure there is room
16793 // for safepoint patching
16794 instruct Ret()
16795 %{
16796   match(Return);
16797 
16798   format %{ "ret" %}
16799   ins_encode %{
16800     __ ret(0);
16801   %}
16802   ins_pipe(pipe_jmp);
16803 %}
16804 
16805 // Tail Call; Jump from runtime stub to Java code.
16806 // Also known as an 'interprocedural jump'.
16807 // Target of jump will eventually return to caller.
16808 // TailJump below removes the return address.
16809 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
16810 // emitted just above the TailCall which has reset rbp to the caller state.
16811 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
16812 %{
16813   match(TailCall jump_target method_ptr);
16814 
16815   ins_cost(300);
16816   format %{ "jmp     $jump_target\t# rbx holds method" %}
16817   ins_encode %{
16818     __ jmp($jump_target$$Register);
16819   %}
16820   ins_pipe(pipe_jmp);
16821 %}
16822 
16823 // Tail Jump; remove the return address; jump to target.
16824 // TailCall above leaves the return address around.
16825 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
16826 %{
16827   match(TailJump jump_target ex_oop);
16828 
16829   ins_cost(300);
16830   format %{ "popq    rdx\t# pop return address\n\t"
16831             "jmp     $jump_target" %}
16832   ins_encode %{
16833     __ popq(as_Register(RDX_enc));
16834     __ jmp($jump_target$$Register);
16835   %}
16836   ins_pipe(pipe_jmp);
16837 %}
16838 
16839 // Forward exception.
16840 instruct ForwardExceptionjmp()
16841 %{
16842   match(ForwardException);
16843 
16844   format %{ "jmp     forward_exception_stub" %}
16845   ins_encode %{
16846     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
16847   %}
16848   ins_pipe(pipe_jmp);
16849 %}
16850 
16851 // Create exception oop: created by stack-crawling runtime code.
16852 // Created exception is now available to this handler, and is setup
16853 // just prior to jumping to this handler.  No code emitted.
16854 instruct CreateException(rax_RegP ex_oop)
16855 %{
16856   match(Set ex_oop (CreateEx));
16857 
16858   size(0);
16859   // use the following format syntax
16860   format %{ "# exception oop is in rax; no code emitted" %}
16861   ins_encode();
16862   ins_pipe(empty);
16863 %}
16864 
16865 // Rethrow exception:
16866 // The exception oop will come in the first argument position.
16867 // Then JUMP (not call) to the rethrow stub code.
16868 instruct RethrowException()
16869 %{
16870   match(Rethrow);
16871 
16872   // use the following format syntax
16873   format %{ "jmp     rethrow_stub" %}
16874   ins_encode %{
16875     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
16876   %}
16877   ins_pipe(pipe_jmp);
16878 %}
16879 
16880 // ============================================================================
16881 // This name is KNOWN by the ADLC and cannot be changed.
16882 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
16883 // for this guy.
16884 instruct tlsLoadP(r15_RegP dst) %{
16885   match(Set dst (ThreadLocal));
16886   effect(DEF dst);
16887 
16888   size(0);
16889   format %{ "# TLS is in R15" %}
16890   ins_encode( /*empty encoding*/ );
16891   ins_pipe(ialu_reg_reg);
16892 %}
16893 
16894 instruct addF_reg(regF dst, regF src) %{
16895   predicate(UseAVX == 0);
16896   match(Set dst (AddF dst src));
16897 
16898   format %{ "addss   $dst, $src" %}
16899   ins_cost(150);
16900   ins_encode %{
16901     __ addss($dst$$XMMRegister, $src$$XMMRegister);
16902   %}
16903   ins_pipe(pipe_slow);
16904 %}
16905 
16906 instruct addF_mem(regF dst, memory src) %{
16907   predicate(UseAVX == 0);
16908   match(Set dst (AddF dst (LoadF src)));
16909 
16910   format %{ "addss   $dst, $src" %}
16911   ins_cost(150);
16912   ins_encode %{
16913     __ addss($dst$$XMMRegister, $src$$Address);
16914   %}
16915   ins_pipe(pipe_slow);
16916 %}
16917 
16918 instruct addF_imm(regF dst, immF con) %{
16919   predicate(UseAVX == 0);
16920   match(Set dst (AddF dst con));
16921   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
16922   ins_cost(150);
16923   ins_encode %{
16924     __ addss($dst$$XMMRegister, $constantaddress($con));
16925   %}
16926   ins_pipe(pipe_slow);
16927 %}
16928 
16929 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
16930   predicate(UseAVX > 0);
16931   match(Set dst (AddF src1 src2));
16932 
16933   format %{ "vaddss  $dst, $src1, $src2" %}
16934   ins_cost(150);
16935   ins_encode %{
16936     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
16937   %}
16938   ins_pipe(pipe_slow);
16939 %}
16940 
16941 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
16942   predicate(UseAVX > 0);
16943   match(Set dst (AddF src1 (LoadF src2)));
16944 
16945   format %{ "vaddss  $dst, $src1, $src2" %}
16946   ins_cost(150);
16947   ins_encode %{
16948     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
16949   %}
16950   ins_pipe(pipe_slow);
16951 %}
16952 
16953 instruct addF_reg_imm(regF dst, regF src, immF con) %{
16954   predicate(UseAVX > 0);
16955   match(Set dst (AddF src con));
16956 
16957   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
16958   ins_cost(150);
16959   ins_encode %{
16960     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
16961   %}
16962   ins_pipe(pipe_slow);
16963 %}
16964 
16965 instruct addD_reg(regD dst, regD src) %{
16966   predicate(UseAVX == 0);
16967   match(Set dst (AddD dst src));
16968 
16969   format %{ "addsd   $dst, $src" %}
16970   ins_cost(150);
16971   ins_encode %{
16972     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
16973   %}
16974   ins_pipe(pipe_slow);
16975 %}
16976 
16977 instruct addD_mem(regD dst, memory src) %{
16978   predicate(UseAVX == 0);
16979   match(Set dst (AddD dst (LoadD src)));
16980 
16981   format %{ "addsd   $dst, $src" %}
16982   ins_cost(150);
16983   ins_encode %{
16984     __ addsd($dst$$XMMRegister, $src$$Address);
16985   %}
16986   ins_pipe(pipe_slow);
16987 %}
16988 
16989 instruct addD_imm(regD dst, immD con) %{
16990   predicate(UseAVX == 0);
16991   match(Set dst (AddD dst con));
16992   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
16993   ins_cost(150);
16994   ins_encode %{
16995     __ addsd($dst$$XMMRegister, $constantaddress($con));
16996   %}
16997   ins_pipe(pipe_slow);
16998 %}
16999 
17000 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17001   predicate(UseAVX > 0);
17002   match(Set dst (AddD src1 src2));
17003 
17004   format %{ "vaddsd  $dst, $src1, $src2" %}
17005   ins_cost(150);
17006   ins_encode %{
17007     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17008   %}
17009   ins_pipe(pipe_slow);
17010 %}
17011 
17012 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17013   predicate(UseAVX > 0);
17014   match(Set dst (AddD src1 (LoadD src2)));
17015 
17016   format %{ "vaddsd  $dst, $src1, $src2" %}
17017   ins_cost(150);
17018   ins_encode %{
17019     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17020   %}
17021   ins_pipe(pipe_slow);
17022 %}
17023 
17024 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17025   predicate(UseAVX > 0);
17026   match(Set dst (AddD src con));
17027 
17028   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17029   ins_cost(150);
17030   ins_encode %{
17031     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17032   %}
17033   ins_pipe(pipe_slow);
17034 %}
17035 
17036 instruct subF_reg(regF dst, regF src) %{
17037   predicate(UseAVX == 0);
17038   match(Set dst (SubF dst src));
17039 
17040   format %{ "subss   $dst, $src" %}
17041   ins_cost(150);
17042   ins_encode %{
17043     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17044   %}
17045   ins_pipe(pipe_slow);
17046 %}
17047 
17048 instruct subF_mem(regF dst, memory src) %{
17049   predicate(UseAVX == 0);
17050   match(Set dst (SubF dst (LoadF src)));
17051 
17052   format %{ "subss   $dst, $src" %}
17053   ins_cost(150);
17054   ins_encode %{
17055     __ subss($dst$$XMMRegister, $src$$Address);
17056   %}
17057   ins_pipe(pipe_slow);
17058 %}
17059 
17060 instruct subF_imm(regF dst, immF con) %{
17061   predicate(UseAVX == 0);
17062   match(Set dst (SubF dst con));
17063   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17064   ins_cost(150);
17065   ins_encode %{
17066     __ subss($dst$$XMMRegister, $constantaddress($con));
17067   %}
17068   ins_pipe(pipe_slow);
17069 %}
17070 
17071 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17072   predicate(UseAVX > 0);
17073   match(Set dst (SubF src1 src2));
17074 
17075   format %{ "vsubss  $dst, $src1, $src2" %}
17076   ins_cost(150);
17077   ins_encode %{
17078     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17079   %}
17080   ins_pipe(pipe_slow);
17081 %}
17082 
17083 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17084   predicate(UseAVX > 0);
17085   match(Set dst (SubF src1 (LoadF src2)));
17086 
17087   format %{ "vsubss  $dst, $src1, $src2" %}
17088   ins_cost(150);
17089   ins_encode %{
17090     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17091   %}
17092   ins_pipe(pipe_slow);
17093 %}
17094 
17095 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17096   predicate(UseAVX > 0);
17097   match(Set dst (SubF src con));
17098 
17099   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17100   ins_cost(150);
17101   ins_encode %{
17102     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17103   %}
17104   ins_pipe(pipe_slow);
17105 %}
17106 
17107 instruct subD_reg(regD dst, regD src) %{
17108   predicate(UseAVX == 0);
17109   match(Set dst (SubD dst src));
17110 
17111   format %{ "subsd   $dst, $src" %}
17112   ins_cost(150);
17113   ins_encode %{
17114     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17115   %}
17116   ins_pipe(pipe_slow);
17117 %}
17118 
17119 instruct subD_mem(regD dst, memory src) %{
17120   predicate(UseAVX == 0);
17121   match(Set dst (SubD dst (LoadD src)));
17122 
17123   format %{ "subsd   $dst, $src" %}
17124   ins_cost(150);
17125   ins_encode %{
17126     __ subsd($dst$$XMMRegister, $src$$Address);
17127   %}
17128   ins_pipe(pipe_slow);
17129 %}
17130 
17131 instruct subD_imm(regD dst, immD con) %{
17132   predicate(UseAVX == 0);
17133   match(Set dst (SubD dst con));
17134   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17135   ins_cost(150);
17136   ins_encode %{
17137     __ subsd($dst$$XMMRegister, $constantaddress($con));
17138   %}
17139   ins_pipe(pipe_slow);
17140 %}
17141 
17142 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17143   predicate(UseAVX > 0);
17144   match(Set dst (SubD src1 src2));
17145 
17146   format %{ "vsubsd  $dst, $src1, $src2" %}
17147   ins_cost(150);
17148   ins_encode %{
17149     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17150   %}
17151   ins_pipe(pipe_slow);
17152 %}
17153 
17154 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17155   predicate(UseAVX > 0);
17156   match(Set dst (SubD src1 (LoadD src2)));
17157 
17158   format %{ "vsubsd  $dst, $src1, $src2" %}
17159   ins_cost(150);
17160   ins_encode %{
17161     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17162   %}
17163   ins_pipe(pipe_slow);
17164 %}
17165 
17166 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17167   predicate(UseAVX > 0);
17168   match(Set dst (SubD src con));
17169 
17170   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17171   ins_cost(150);
17172   ins_encode %{
17173     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17174   %}
17175   ins_pipe(pipe_slow);
17176 %}
17177 
17178 instruct mulF_reg(regF dst, regF src) %{
17179   predicate(UseAVX == 0);
17180   match(Set dst (MulF dst src));
17181 
17182   format %{ "mulss   $dst, $src" %}
17183   ins_cost(150);
17184   ins_encode %{
17185     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17186   %}
17187   ins_pipe(pipe_slow);
17188 %}
17189 
17190 instruct mulF_mem(regF dst, memory src) %{
17191   predicate(UseAVX == 0);
17192   match(Set dst (MulF dst (LoadF src)));
17193 
17194   format %{ "mulss   $dst, $src" %}
17195   ins_cost(150);
17196   ins_encode %{
17197     __ mulss($dst$$XMMRegister, $src$$Address);
17198   %}
17199   ins_pipe(pipe_slow);
17200 %}
17201 
17202 instruct mulF_imm(regF dst, immF con) %{
17203   predicate(UseAVX == 0);
17204   match(Set dst (MulF dst con));
17205   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17206   ins_cost(150);
17207   ins_encode %{
17208     __ mulss($dst$$XMMRegister, $constantaddress($con));
17209   %}
17210   ins_pipe(pipe_slow);
17211 %}
17212 
17213 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17214   predicate(UseAVX > 0);
17215   match(Set dst (MulF src1 src2));
17216 
17217   format %{ "vmulss  $dst, $src1, $src2" %}
17218   ins_cost(150);
17219   ins_encode %{
17220     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17221   %}
17222   ins_pipe(pipe_slow);
17223 %}
17224 
17225 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17226   predicate(UseAVX > 0);
17227   match(Set dst (MulF src1 (LoadF src2)));
17228 
17229   format %{ "vmulss  $dst, $src1, $src2" %}
17230   ins_cost(150);
17231   ins_encode %{
17232     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17233   %}
17234   ins_pipe(pipe_slow);
17235 %}
17236 
17237 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17238   predicate(UseAVX > 0);
17239   match(Set dst (MulF src con));
17240 
17241   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17242   ins_cost(150);
17243   ins_encode %{
17244     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17245   %}
17246   ins_pipe(pipe_slow);
17247 %}
17248 
17249 instruct mulD_reg(regD dst, regD src) %{
17250   predicate(UseAVX == 0);
17251   match(Set dst (MulD dst src));
17252 
17253   format %{ "mulsd   $dst, $src" %}
17254   ins_cost(150);
17255   ins_encode %{
17256     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17257   %}
17258   ins_pipe(pipe_slow);
17259 %}
17260 
17261 instruct mulD_mem(regD dst, memory src) %{
17262   predicate(UseAVX == 0);
17263   match(Set dst (MulD dst (LoadD src)));
17264 
17265   format %{ "mulsd   $dst, $src" %}
17266   ins_cost(150);
17267   ins_encode %{
17268     __ mulsd($dst$$XMMRegister, $src$$Address);
17269   %}
17270   ins_pipe(pipe_slow);
17271 %}
17272 
17273 instruct mulD_imm(regD dst, immD con) %{
17274   predicate(UseAVX == 0);
17275   match(Set dst (MulD dst con));
17276   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17277   ins_cost(150);
17278   ins_encode %{
17279     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17280   %}
17281   ins_pipe(pipe_slow);
17282 %}
17283 
17284 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17285   predicate(UseAVX > 0);
17286   match(Set dst (MulD src1 src2));
17287 
17288   format %{ "vmulsd  $dst, $src1, $src2" %}
17289   ins_cost(150);
17290   ins_encode %{
17291     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17292   %}
17293   ins_pipe(pipe_slow);
17294 %}
17295 
17296 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17297   predicate(UseAVX > 0);
17298   match(Set dst (MulD src1 (LoadD src2)));
17299 
17300   format %{ "vmulsd  $dst, $src1, $src2" %}
17301   ins_cost(150);
17302   ins_encode %{
17303     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17304   %}
17305   ins_pipe(pipe_slow);
17306 %}
17307 
17308 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17309   predicate(UseAVX > 0);
17310   match(Set dst (MulD src con));
17311 
17312   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17313   ins_cost(150);
17314   ins_encode %{
17315     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17316   %}
17317   ins_pipe(pipe_slow);
17318 %}
17319 
17320 instruct divF_reg(regF dst, regF src) %{
17321   predicate(UseAVX == 0);
17322   match(Set dst (DivF dst src));
17323 
17324   format %{ "divss   $dst, $src" %}
17325   ins_cost(150);
17326   ins_encode %{
17327     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17328   %}
17329   ins_pipe(pipe_slow);
17330 %}
17331 
17332 instruct divF_mem(regF dst, memory src) %{
17333   predicate(UseAVX == 0);
17334   match(Set dst (DivF dst (LoadF src)));
17335 
17336   format %{ "divss   $dst, $src" %}
17337   ins_cost(150);
17338   ins_encode %{
17339     __ divss($dst$$XMMRegister, $src$$Address);
17340   %}
17341   ins_pipe(pipe_slow);
17342 %}
17343 
17344 instruct divF_imm(regF dst, immF con) %{
17345   predicate(UseAVX == 0);
17346   match(Set dst (DivF dst con));
17347   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17348   ins_cost(150);
17349   ins_encode %{
17350     __ divss($dst$$XMMRegister, $constantaddress($con));
17351   %}
17352   ins_pipe(pipe_slow);
17353 %}
17354 
17355 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17356   predicate(UseAVX > 0);
17357   match(Set dst (DivF src1 src2));
17358 
17359   format %{ "vdivss  $dst, $src1, $src2" %}
17360   ins_cost(150);
17361   ins_encode %{
17362     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17363   %}
17364   ins_pipe(pipe_slow);
17365 %}
17366 
17367 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17368   predicate(UseAVX > 0);
17369   match(Set dst (DivF src1 (LoadF src2)));
17370 
17371   format %{ "vdivss  $dst, $src1, $src2" %}
17372   ins_cost(150);
17373   ins_encode %{
17374     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17375   %}
17376   ins_pipe(pipe_slow);
17377 %}
17378 
17379 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17380   predicate(UseAVX > 0);
17381   match(Set dst (DivF src con));
17382 
17383   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17384   ins_cost(150);
17385   ins_encode %{
17386     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17387   %}
17388   ins_pipe(pipe_slow);
17389 %}
17390 
17391 instruct divD_reg(regD dst, regD src) %{
17392   predicate(UseAVX == 0);
17393   match(Set dst (DivD dst src));
17394 
17395   format %{ "divsd   $dst, $src" %}
17396   ins_cost(150);
17397   ins_encode %{
17398     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17399   %}
17400   ins_pipe(pipe_slow);
17401 %}
17402 
17403 instruct divD_mem(regD dst, memory src) %{
17404   predicate(UseAVX == 0);
17405   match(Set dst (DivD dst (LoadD src)));
17406 
17407   format %{ "divsd   $dst, $src" %}
17408   ins_cost(150);
17409   ins_encode %{
17410     __ divsd($dst$$XMMRegister, $src$$Address);
17411   %}
17412   ins_pipe(pipe_slow);
17413 %}
17414 
17415 instruct divD_imm(regD dst, immD con) %{
17416   predicate(UseAVX == 0);
17417   match(Set dst (DivD dst con));
17418   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17419   ins_cost(150);
17420   ins_encode %{
17421     __ divsd($dst$$XMMRegister, $constantaddress($con));
17422   %}
17423   ins_pipe(pipe_slow);
17424 %}
17425 
17426 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17427   predicate(UseAVX > 0);
17428   match(Set dst (DivD src1 src2));
17429 
17430   format %{ "vdivsd  $dst, $src1, $src2" %}
17431   ins_cost(150);
17432   ins_encode %{
17433     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17434   %}
17435   ins_pipe(pipe_slow);
17436 %}
17437 
17438 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
17439   predicate(UseAVX > 0);
17440   match(Set dst (DivD src1 (LoadD src2)));
17441 
17442   format %{ "vdivsd  $dst, $src1, $src2" %}
17443   ins_cost(150);
17444   ins_encode %{
17445     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17446   %}
17447   ins_pipe(pipe_slow);
17448 %}
17449 
17450 instruct divD_reg_imm(regD dst, regD src, immD con) %{
17451   predicate(UseAVX > 0);
17452   match(Set dst (DivD src con));
17453 
17454   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17455   ins_cost(150);
17456   ins_encode %{
17457     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17458   %}
17459   ins_pipe(pipe_slow);
17460 %}
17461 
17462 instruct absF_reg(regF dst) %{
17463   predicate(UseAVX == 0);
17464   match(Set dst (AbsF dst));
17465   ins_cost(150);
17466   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
17467   ins_encode %{
17468     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
17469   %}
17470   ins_pipe(pipe_slow);
17471 %}
17472 
17473 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
17474   predicate(UseAVX > 0);
17475   match(Set dst (AbsF src));
17476   ins_cost(150);
17477   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
17478   ins_encode %{
17479     int vlen_enc = Assembler::AVX_128bit;
17480     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
17481               ExternalAddress(float_signmask()), vlen_enc);
17482   %}
17483   ins_pipe(pipe_slow);
17484 %}
17485 
17486 instruct absD_reg(regD dst) %{
17487   predicate(UseAVX == 0);
17488   match(Set dst (AbsD dst));
17489   ins_cost(150);
17490   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
17491             "# abs double by sign masking" %}
17492   ins_encode %{
17493     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
17494   %}
17495   ins_pipe(pipe_slow);
17496 %}
17497 
17498 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
17499   predicate(UseAVX > 0);
17500   match(Set dst (AbsD src));
17501   ins_cost(150);
17502   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
17503             "# abs double by sign masking" %}
17504   ins_encode %{
17505     int vlen_enc = Assembler::AVX_128bit;
17506     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
17507               ExternalAddress(double_signmask()), vlen_enc);
17508   %}
17509   ins_pipe(pipe_slow);
17510 %}
17511 
17512 instruct negF_reg(regF dst) %{
17513   predicate(UseAVX == 0);
17514   match(Set dst (NegF dst));
17515   ins_cost(150);
17516   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
17517   ins_encode %{
17518     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
17519   %}
17520   ins_pipe(pipe_slow);
17521 %}
17522 
17523 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
17524   predicate(UseAVX > 0);
17525   match(Set dst (NegF src));
17526   ins_cost(150);
17527   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
17528   ins_encode %{
17529     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
17530                  ExternalAddress(float_signflip()));
17531   %}
17532   ins_pipe(pipe_slow);
17533 %}
17534 
17535 instruct negD_reg(regD dst) %{
17536   predicate(UseAVX == 0);
17537   match(Set dst (NegD dst));
17538   ins_cost(150);
17539   format %{ "xorpd   $dst, [0x8000000000000000]\t"
17540             "# neg double by sign flipping" %}
17541   ins_encode %{
17542     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
17543   %}
17544   ins_pipe(pipe_slow);
17545 %}
17546 
17547 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
17548   predicate(UseAVX > 0);
17549   match(Set dst (NegD src));
17550   ins_cost(150);
17551   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
17552             "# neg double by sign flipping" %}
17553   ins_encode %{
17554     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
17555                  ExternalAddress(double_signflip()));
17556   %}
17557   ins_pipe(pipe_slow);
17558 %}
17559 
17560 // sqrtss instruction needs destination register to be pre initialized for best performance
17561 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17562 instruct sqrtF_reg(regF dst) %{
17563   match(Set dst (SqrtF dst));
17564   format %{ "sqrtss  $dst, $dst" %}
17565   ins_encode %{
17566     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
17567   %}
17568   ins_pipe(pipe_slow);
17569 %}
17570 
17571 // sqrtsd instruction needs destination register to be pre initialized for best performance
17572 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
17573 instruct sqrtD_reg(regD dst) %{
17574   match(Set dst (SqrtD dst));
17575   format %{ "sqrtsd  $dst, $dst" %}
17576   ins_encode %{
17577     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
17578   %}
17579   ins_pipe(pipe_slow);
17580 %}
17581 
17582 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
17583   effect(TEMP tmp);
17584   match(Set dst (ConvF2HF src));
17585   ins_cost(125);
17586   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
17587   ins_encode %{
17588     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
17589   %}
17590   ins_pipe( pipe_slow );
17591 %}
17592 
17593 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
17594   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
17595   effect(TEMP ktmp, TEMP rtmp);
17596   match(Set mem (StoreC mem (ConvF2HF src)));
17597   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
17598   ins_encode %{
17599     __ movl($rtmp$$Register, 0x1);
17600     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
17601     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
17602   %}
17603   ins_pipe( pipe_slow );
17604 %}
17605 
17606 instruct vconvF2HF(vec dst, vec src) %{
17607   match(Set dst (VectorCastF2HF src));
17608   format %{ "vector_conv_F2HF $dst $src" %}
17609   ins_encode %{
17610     int vlen_enc = vector_length_encoding(this, $src);
17611     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
17612   %}
17613   ins_pipe( pipe_slow );
17614 %}
17615 
17616 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
17617   predicate(n->as_StoreVector()->memory_size() >= 16);
17618   match(Set mem (StoreVector mem (VectorCastF2HF src)));
17619   format %{ "vcvtps2ph $mem,$src" %}
17620   ins_encode %{
17621     int vlen_enc = vector_length_encoding(this, $src);
17622     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
17623   %}
17624   ins_pipe( pipe_slow );
17625 %}
17626 
17627 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
17628   match(Set dst (ConvHF2F src));
17629   format %{ "vcvtph2ps $dst,$src" %}
17630   ins_encode %{
17631     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
17632   %}
17633   ins_pipe( pipe_slow );
17634 %}
17635 
17636 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
17637   match(Set dst (VectorCastHF2F (LoadVector mem)));
17638   format %{ "vcvtph2ps $dst,$mem" %}
17639   ins_encode %{
17640     int vlen_enc = vector_length_encoding(this);
17641     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
17642   %}
17643   ins_pipe( pipe_slow );
17644 %}
17645 
17646 instruct vconvHF2F(vec dst, vec src) %{
17647   match(Set dst (VectorCastHF2F src));
17648   ins_cost(125);
17649   format %{ "vector_conv_HF2F $dst,$src" %}
17650   ins_encode %{
17651     int vlen_enc = vector_length_encoding(this);
17652     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
17653   %}
17654   ins_pipe( pipe_slow );
17655 %}
17656 
17657 // ---------------------------------------- VectorReinterpret ------------------------------------
17658 instruct reinterpret_mask(kReg dst) %{
17659   predicate(n->bottom_type()->isa_pvectmask() &&
17660             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
17661   match(Set dst (VectorReinterpret dst));
17662   ins_cost(125);
17663   format %{ "vector_reinterpret $dst\t!" %}
17664   ins_encode %{
17665     // empty
17666   %}
17667   ins_pipe( pipe_slow );
17668 %}
17669 
17670 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
17671   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17672             n->bottom_type()->isa_pvectmask() &&
17673             n->in(1)->bottom_type()->isa_pvectmask() &&
17674             n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_SHORT &&
17675             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
17676   match(Set dst (VectorReinterpret src));
17677   effect(TEMP xtmp);
17678   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
17679   ins_encode %{
17680      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
17681      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17682      assert(src_sz == dst_sz , "src and dst size mismatch");
17683      int vlen_enc = vector_length_encoding(src_sz);
17684      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17685      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17686   %}
17687   ins_pipe( pipe_slow );
17688 %}
17689 
17690 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
17691   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17692             n->bottom_type()->isa_pvectmask() &&
17693             n->in(1)->bottom_type()->isa_pvectmask() &&
17694             (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_INT ||
17695              n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_FLOAT) &&
17696             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
17697   match(Set dst (VectorReinterpret src));
17698   effect(TEMP xtmp);
17699   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
17700   ins_encode %{
17701      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
17702      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17703      assert(src_sz == dst_sz , "src and dst size mismatch");
17704      int vlen_enc = vector_length_encoding(src_sz);
17705      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17706      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17707   %}
17708   ins_pipe( pipe_slow );
17709 %}
17710 
17711 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
17712   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
17713             n->bottom_type()->isa_pvectmask() &&
17714             n->in(1)->bottom_type()->isa_pvectmask() &&
17715             (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_LONG ||
17716              n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_DOUBLE) &&
17717             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
17718   match(Set dst (VectorReinterpret src));
17719   effect(TEMP xtmp);
17720   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
17721   ins_encode %{
17722      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
17723      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
17724      assert(src_sz == dst_sz , "src and dst size mismatch");
17725      int vlen_enc = vector_length_encoding(src_sz);
17726      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
17727      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
17728   %}
17729   ins_pipe( pipe_slow );
17730 %}
17731 
17732 instruct reinterpret(vec dst) %{
17733   predicate(!n->bottom_type()->isa_pvectmask() &&
17734             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
17735   match(Set dst (VectorReinterpret dst));
17736   ins_cost(125);
17737   format %{ "vector_reinterpret $dst\t!" %}
17738   ins_encode %{
17739     // empty
17740   %}
17741   ins_pipe( pipe_slow );
17742 %}
17743 
17744 instruct reinterpret_expand(vec dst, vec src) %{
17745   predicate(UseAVX == 0 &&
17746             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
17747   match(Set dst (VectorReinterpret src));
17748   ins_cost(125);
17749   effect(TEMP dst);
17750   format %{ "vector_reinterpret_expand $dst,$src" %}
17751   ins_encode %{
17752     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
17753     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
17754 
17755     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
17756     if (src_vlen_in_bytes == 4) {
17757       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
17758     } else {
17759       assert(src_vlen_in_bytes == 8, "");
17760       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
17761     }
17762     __ pand($dst$$XMMRegister, $src$$XMMRegister);
17763   %}
17764   ins_pipe( pipe_slow );
17765 %}
17766 
17767 instruct vreinterpret_expand4(legVec dst, vec src) %{
17768   predicate(UseAVX > 0 &&
17769             !n->bottom_type()->isa_pvectmask() &&
17770             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
17771             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
17772   match(Set dst (VectorReinterpret src));
17773   ins_cost(125);
17774   format %{ "vector_reinterpret_expand $dst,$src" %}
17775   ins_encode %{
17776     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
17777   %}
17778   ins_pipe( pipe_slow );
17779 %}
17780 
17781 
17782 instruct vreinterpret_expand(legVec dst, vec src) %{
17783   predicate(UseAVX > 0 &&
17784             !n->bottom_type()->isa_pvectmask() &&
17785             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
17786             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
17787   match(Set dst (VectorReinterpret src));
17788   ins_cost(125);
17789   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
17790   ins_encode %{
17791     switch (Matcher::vector_length_in_bytes(this, $src)) {
17792       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
17793       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
17794       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
17795       default: ShouldNotReachHere();
17796     }
17797   %}
17798   ins_pipe( pipe_slow );
17799 %}
17800 
17801 instruct reinterpret_shrink(vec dst, legVec src) %{
17802   predicate(!n->bottom_type()->isa_pvectmask() &&
17803             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
17804   match(Set dst (VectorReinterpret src));
17805   ins_cost(125);
17806   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
17807   ins_encode %{
17808     switch (Matcher::vector_length_in_bytes(this)) {
17809       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
17810       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
17811       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
17812       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
17813       default: ShouldNotReachHere();
17814     }
17815   %}
17816   ins_pipe( pipe_slow );
17817 %}
17818 
17819 // ----------------------------------------------------------------------------------------------------
17820 
17821 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
17822   match(Set dst (RoundDoubleMode src rmode));
17823   format %{ "roundsd $dst,$src" %}
17824   ins_cost(150);
17825   ins_encode %{
17826     assert(UseSSE >= 4, "required");
17827     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
17828       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
17829     }
17830     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
17831   %}
17832   ins_pipe(pipe_slow);
17833 %}
17834 
17835 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
17836   match(Set dst (RoundDoubleMode con rmode));
17837   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
17838   ins_cost(150);
17839   ins_encode %{
17840     assert(UseSSE >= 4, "required");
17841     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
17842   %}
17843   ins_pipe(pipe_slow);
17844 %}
17845 
17846 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
17847   predicate(Matcher::vector_length(n) < 8);
17848   match(Set dst (RoundDoubleModeV src rmode));
17849   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
17850   ins_encode %{
17851     assert(UseAVX > 0, "required");
17852     int vlen_enc = vector_length_encoding(this);
17853     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
17854   %}
17855   ins_pipe( pipe_slow );
17856 %}
17857 
17858 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
17859   predicate(Matcher::vector_length(n) == 8);
17860   match(Set dst (RoundDoubleModeV src rmode));
17861   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
17862   ins_encode %{
17863     assert(UseAVX > 2, "required");
17864     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
17865   %}
17866   ins_pipe( pipe_slow );
17867 %}
17868 
17869 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
17870   predicate(Matcher::vector_length(n) < 8);
17871   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
17872   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
17873   ins_encode %{
17874     assert(UseAVX > 0, "required");
17875     int vlen_enc = vector_length_encoding(this);
17876     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
17877   %}
17878   ins_pipe( pipe_slow );
17879 %}
17880 
17881 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
17882   predicate(Matcher::vector_length(n) == 8);
17883   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
17884   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
17885   ins_encode %{
17886     assert(UseAVX > 2, "required");
17887     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
17888   %}
17889   ins_pipe( pipe_slow );
17890 %}
17891 
17892 instruct onspinwait() %{
17893   match(OnSpinWait);
17894   ins_cost(200);
17895 
17896   format %{
17897     $$template
17898     $$emit$$"pause\t! membar_onspinwait"
17899   %}
17900   ins_encode %{
17901     __ pause();
17902   %}
17903   ins_pipe(pipe_slow);
17904 %}
17905 
17906 // a * b + c
17907 instruct fmaD_reg(regD a, regD b, regD c) %{
17908   match(Set c (FmaD  c (Binary a b)));
17909   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
17910   ins_cost(150);
17911   ins_encode %{
17912     assert(UseFMA, "Needs FMA instructions support.");
17913     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
17914   %}
17915   ins_pipe( pipe_slow );
17916 %}
17917 
17918 // a * b + c
17919 instruct fmaF_reg(regF a, regF b, regF c) %{
17920   match(Set c (FmaF  c (Binary a b)));
17921   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
17922   ins_cost(150);
17923   ins_encode %{
17924     assert(UseFMA, "Needs FMA instructions support.");
17925     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
17926   %}
17927   ins_pipe( pipe_slow );
17928 %}
17929 
17930 // ====================VECTOR INSTRUCTIONS=====================================
17931 
17932 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
17933 instruct MoveVec2Leg(legVec dst, vec src) %{
17934   match(Set dst src);
17935   format %{ "" %}
17936   ins_encode %{
17937     ShouldNotReachHere();
17938   %}
17939   ins_pipe( fpu_reg_reg );
17940 %}
17941 
17942 instruct MoveLeg2Vec(vec dst, legVec src) %{
17943   match(Set dst src);
17944   format %{ "" %}
17945   ins_encode %{
17946     ShouldNotReachHere();
17947   %}
17948   ins_pipe( fpu_reg_reg );
17949 %}
17950 
17951 // ============================================================================
17952 
17953 // Load vectors generic operand pattern
17954 instruct loadV(vec dst, memory mem) %{
17955   match(Set dst (LoadVector mem));
17956   ins_cost(125);
17957   format %{ "load_vector $dst,$mem" %}
17958   ins_encode %{
17959     BasicType bt = Matcher::vector_element_basic_type(this);
17960     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
17961   %}
17962   ins_pipe( pipe_slow );
17963 %}
17964 
17965 // Store vectors generic operand pattern.
17966 instruct storeV(memory mem, vec src) %{
17967   match(Set mem (StoreVector mem src));
17968   ins_cost(145);
17969   format %{ "store_vector $mem,$src\n\t" %}
17970   ins_encode %{
17971     switch (Matcher::vector_length_in_bytes(this, $src)) {
17972       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
17973       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
17974       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
17975       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
17976       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
17977       default: ShouldNotReachHere();
17978     }
17979   %}
17980   ins_pipe( pipe_slow );
17981 %}
17982 
17983 // ---------------------------------------- Gather ------------------------------------
17984 
17985 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
17986 
17987 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
17988   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
17989             Matcher::vector_length_in_bytes(n) <= 32);
17990   match(Set dst (LoadVectorGather mem idx));
17991   effect(TEMP dst, TEMP tmp, TEMP mask);
17992   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
17993   ins_encode %{
17994     int vlen_enc = vector_length_encoding(this);
17995     BasicType elem_bt = Matcher::vector_element_basic_type(this);
17996     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
17997     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
17998     __ lea($tmp$$Register, $mem$$Address);
17999     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18000   %}
18001   ins_pipe( pipe_slow );
18002 %}
18003 
18004 
18005 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18006   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18007             !is_subword_type(Matcher::vector_element_basic_type(n)));
18008   match(Set dst (LoadVectorGather mem idx));
18009   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18010   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18011   ins_encode %{
18012     int vlen_enc = vector_length_encoding(this);
18013     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18014     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18015     __ lea($tmp$$Register, $mem$$Address);
18016     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18017   %}
18018   ins_pipe( pipe_slow );
18019 %}
18020 
18021 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18022   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18023             !is_subword_type(Matcher::vector_element_basic_type(n)));
18024   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18025   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18026   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18027   ins_encode %{
18028     assert(UseAVX > 2, "sanity");
18029     int vlen_enc = vector_length_encoding(this);
18030     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18031     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18032     // Note: Since gather instruction partially updates the opmask register used
18033     // for predication hense moving mask operand to a temporary.
18034     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18035     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18036     __ lea($tmp$$Register, $mem$$Address);
18037     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18038   %}
18039   ins_pipe( pipe_slow );
18040 %}
18041 
18042 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18043   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18044   match(Set dst (LoadVectorGather mem idx_base));
18045   effect(TEMP tmp, TEMP rtmp);
18046   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18047   ins_encode %{
18048     int vlen_enc = vector_length_encoding(this);
18049     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18050     __ lea($tmp$$Register, $mem$$Address);
18051     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18052   %}
18053   ins_pipe( pipe_slow );
18054 %}
18055 
18056 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18057                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18058   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18059   match(Set dst (LoadVectorGather mem idx_base));
18060   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18061   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18062   ins_encode %{
18063     int vlen_enc = vector_length_encoding(this);
18064     int vector_len = Matcher::vector_length(this);
18065     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18066     __ lea($tmp$$Register, $mem$$Address);
18067     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18068     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18069                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18070   %}
18071   ins_pipe( pipe_slow );
18072 %}
18073 
18074 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18075   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18076   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18077   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18078   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18079   ins_encode %{
18080     int vlen_enc = vector_length_encoding(this);
18081     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18082     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18083     __ lea($tmp$$Register, $mem$$Address);
18084     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18085     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18086   %}
18087   ins_pipe( pipe_slow );
18088 %}
18089 
18090 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18091                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18092   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18093   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18094   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18095   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18096   ins_encode %{
18097     int vlen_enc = vector_length_encoding(this);
18098     int vector_len = Matcher::vector_length(this);
18099     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18100     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18101     __ lea($tmp$$Register, $mem$$Address);
18102     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18103     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18104     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18105                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18106   %}
18107   ins_pipe( pipe_slow );
18108 %}
18109 
18110 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18111   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18112   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18113   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18114   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18115   ins_encode %{
18116     int vlen_enc = vector_length_encoding(this);
18117     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18118     __ lea($tmp$$Register, $mem$$Address);
18119     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18120     if (elem_bt == T_SHORT) {
18121       __ movl($mask_idx$$Register, 0x55555555);
18122       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18123     }
18124     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18125     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18126   %}
18127   ins_pipe( pipe_slow );
18128 %}
18129 
18130 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18131                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18132   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18133   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18134   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18135   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18136   ins_encode %{
18137     int vlen_enc = vector_length_encoding(this);
18138     int vector_len = Matcher::vector_length(this);
18139     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18140     __ lea($tmp$$Register, $mem$$Address);
18141     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18142     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18143     if (elem_bt == T_SHORT) {
18144       __ movl($mask_idx$$Register, 0x55555555);
18145       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18146     }
18147     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18148     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18149                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18150   %}
18151   ins_pipe( pipe_slow );
18152 %}
18153 
18154 // ====================Scatter=======================================
18155 
18156 // Scatter INT, LONG, FLOAT, DOUBLE
18157 
18158 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18159   predicate(UseAVX > 2);
18160   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18161   effect(TEMP tmp, TEMP ktmp);
18162   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18163   ins_encode %{
18164     int vlen_enc = vector_length_encoding(this, $src);
18165     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18166 
18167     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18168     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18169 
18170     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18171     __ lea($tmp$$Register, $mem$$Address);
18172     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18173   %}
18174   ins_pipe( pipe_slow );
18175 %}
18176 
18177 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18178   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18179   effect(TEMP tmp, TEMP ktmp);
18180   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18181   ins_encode %{
18182     int vlen_enc = vector_length_encoding(this, $src);
18183     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18184     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18185     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18186     // Note: Since scatter instruction partially updates the opmask register used
18187     // for predication hense moving mask operand to a temporary.
18188     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18189     __ lea($tmp$$Register, $mem$$Address);
18190     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18191   %}
18192   ins_pipe( pipe_slow );
18193 %}
18194 
18195 // ====================REPLICATE=======================================
18196 
18197 // Replicate byte scalar to be vector
18198 instruct vReplB_reg(vec dst, rRegI src) %{
18199   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18200   match(Set dst (Replicate src));
18201   format %{ "replicateB $dst,$src" %}
18202   ins_encode %{
18203     uint vlen = Matcher::vector_length(this);
18204     if (UseAVX >= 2) {
18205       int vlen_enc = vector_length_encoding(this);
18206       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18207         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18208         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18209       } else {
18210         __ movdl($dst$$XMMRegister, $src$$Register);
18211         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18212       }
18213     } else {
18214        assert(UseAVX < 2, "");
18215       __ movdl($dst$$XMMRegister, $src$$Register);
18216       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18217       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18218       if (vlen >= 16) {
18219         assert(vlen == 16, "");
18220         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18221       }
18222     }
18223   %}
18224   ins_pipe( pipe_slow );
18225 %}
18226 
18227 instruct ReplB_mem(vec dst, memory mem) %{
18228   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18229   match(Set dst (Replicate (LoadB mem)));
18230   format %{ "replicateB $dst,$mem" %}
18231   ins_encode %{
18232     int vlen_enc = vector_length_encoding(this);
18233     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18234   %}
18235   ins_pipe( pipe_slow );
18236 %}
18237 
18238 // ====================ReplicateS=======================================
18239 
18240 instruct vReplS_reg(vec dst, rRegI src) %{
18241   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18242   match(Set dst (Replicate src));
18243   format %{ "replicateS $dst,$src" %}
18244   ins_encode %{
18245     uint vlen = Matcher::vector_length(this);
18246     int vlen_enc = vector_length_encoding(this);
18247     if (UseAVX >= 2) {
18248       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18249         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18250         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18251       } else {
18252         __ movdl($dst$$XMMRegister, $src$$Register);
18253         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18254       }
18255     } else {
18256       assert(UseAVX < 2, "");
18257       __ movdl($dst$$XMMRegister, $src$$Register);
18258       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18259       if (vlen >= 8) {
18260         assert(vlen == 8, "");
18261         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18262       }
18263     }
18264   %}
18265   ins_pipe( pipe_slow );
18266 %}
18267 
18268 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18269   match(Set dst (Replicate con));
18270   effect(TEMP rtmp);
18271   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18272   ins_encode %{
18273     int vlen_enc = vector_length_encoding(this);
18274     BasicType bt = Matcher::vector_element_basic_type(this);
18275     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18276     __ movl($rtmp$$Register, $con$$constant);
18277     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18278   %}
18279   ins_pipe( pipe_slow );
18280 %}
18281 
18282 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18283   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18284   match(Set dst (Replicate src));
18285   effect(TEMP rtmp);
18286   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18287   ins_encode %{
18288     int vlen_enc = vector_length_encoding(this);
18289     __ evmovw($rtmp$$Register, $src$$XMMRegister);
18290     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18291   %}
18292   ins_pipe( pipe_slow );
18293 %}
18294 
18295 instruct ReplS_mem(vec dst, memory mem) %{
18296   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18297   match(Set dst (Replicate (LoadS mem)));
18298   format %{ "replicateS $dst,$mem" %}
18299   ins_encode %{
18300     int vlen_enc = vector_length_encoding(this);
18301     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18302   %}
18303   ins_pipe( pipe_slow );
18304 %}
18305 
18306 // ====================ReplicateI=======================================
18307 
18308 instruct ReplI_reg(vec dst, rRegI src) %{
18309   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18310   match(Set dst (Replicate src));
18311   format %{ "replicateI $dst,$src" %}
18312   ins_encode %{
18313     uint vlen = Matcher::vector_length(this);
18314     int vlen_enc = vector_length_encoding(this);
18315     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18316       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18317     } else if (VM_Version::supports_avx2()) {
18318       __ movdl($dst$$XMMRegister, $src$$Register);
18319       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18320     } else {
18321       __ movdl($dst$$XMMRegister, $src$$Register);
18322       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18323     }
18324   %}
18325   ins_pipe( pipe_slow );
18326 %}
18327 
18328 instruct ReplI_mem(vec dst, memory mem) %{
18329   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18330   match(Set dst (Replicate (LoadI mem)));
18331   format %{ "replicateI $dst,$mem" %}
18332   ins_encode %{
18333     int vlen_enc = vector_length_encoding(this);
18334     if (VM_Version::supports_avx2()) {
18335       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18336     } else if (VM_Version::supports_avx()) {
18337       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18338     } else {
18339       __ movdl($dst$$XMMRegister, $mem$$Address);
18340       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18341     }
18342   %}
18343   ins_pipe( pipe_slow );
18344 %}
18345 
18346 instruct ReplI_imm(vec dst, immI con) %{
18347   predicate(Matcher::is_non_long_integral_vector(n));
18348   match(Set dst (Replicate con));
18349   format %{ "replicateI $dst,$con" %}
18350   ins_encode %{
18351     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18352                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18353                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18354     BasicType bt = Matcher::vector_element_basic_type(this);
18355     int vlen = Matcher::vector_length_in_bytes(this);
18356     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18357   %}
18358   ins_pipe( pipe_slow );
18359 %}
18360 
18361 // Replicate scalar zero to be vector
18362 instruct ReplI_zero(vec dst, immI_0 zero) %{
18363   predicate(Matcher::is_non_long_integral_vector(n));
18364   match(Set dst (Replicate zero));
18365   format %{ "replicateI $dst,$zero" %}
18366   ins_encode %{
18367     int vlen_enc = vector_length_encoding(this);
18368     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18369       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18370     } else {
18371       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18372     }
18373   %}
18374   ins_pipe( fpu_reg_reg );
18375 %}
18376 
18377 instruct ReplI_M1(vec dst, immI_M1 con) %{
18378   predicate(Matcher::is_non_long_integral_vector(n));
18379   match(Set dst (Replicate con));
18380   format %{ "vallones $dst" %}
18381   ins_encode %{
18382     int vector_len = vector_length_encoding(this);
18383     __ vallones($dst$$XMMRegister, vector_len);
18384   %}
18385   ins_pipe( pipe_slow );
18386 %}
18387 
18388 // ====================ReplicateL=======================================
18389 
18390 // Replicate long (8 byte) scalar to be vector
18391 instruct ReplL_reg(vec dst, rRegL src) %{
18392   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18393   match(Set dst (Replicate src));
18394   format %{ "replicateL $dst,$src" %}
18395   ins_encode %{
18396     int vlen = Matcher::vector_length(this);
18397     int vlen_enc = vector_length_encoding(this);
18398     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18399       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18400     } else if (VM_Version::supports_avx2()) {
18401       __ movdq($dst$$XMMRegister, $src$$Register);
18402       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18403     } else {
18404       __ movdq($dst$$XMMRegister, $src$$Register);
18405       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18406     }
18407   %}
18408   ins_pipe( pipe_slow );
18409 %}
18410 
18411 instruct ReplL_mem(vec dst, memory mem) %{
18412   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18413   match(Set dst (Replicate (LoadL mem)));
18414   format %{ "replicateL $dst,$mem" %}
18415   ins_encode %{
18416     int vlen_enc = vector_length_encoding(this);
18417     if (VM_Version::supports_avx2()) {
18418       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18419     } else if (VM_Version::supports_sse3()) {
18420       __ movddup($dst$$XMMRegister, $mem$$Address);
18421     } else {
18422       __ movq($dst$$XMMRegister, $mem$$Address);
18423       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18424     }
18425   %}
18426   ins_pipe( pipe_slow );
18427 %}
18428 
18429 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18430 instruct ReplL_imm(vec dst, immL con) %{
18431   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18432   match(Set dst (Replicate con));
18433   format %{ "replicateL $dst,$con" %}
18434   ins_encode %{
18435     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18436     int vlen = Matcher::vector_length_in_bytes(this);
18437     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
18438   %}
18439   ins_pipe( pipe_slow );
18440 %}
18441 
18442 instruct ReplL_zero(vec dst, immL0 zero) %{
18443   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18444   match(Set dst (Replicate zero));
18445   format %{ "replicateL $dst,$zero" %}
18446   ins_encode %{
18447     int vlen_enc = vector_length_encoding(this);
18448     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18449       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18450     } else {
18451       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18452     }
18453   %}
18454   ins_pipe( fpu_reg_reg );
18455 %}
18456 
18457 instruct ReplL_M1(vec dst, immL_M1 con) %{
18458   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18459   match(Set dst (Replicate con));
18460   format %{ "vallones $dst" %}
18461   ins_encode %{
18462     int vector_len = vector_length_encoding(this);
18463     __ vallones($dst$$XMMRegister, vector_len);
18464   %}
18465   ins_pipe( pipe_slow );
18466 %}
18467 
18468 // ====================ReplicateF=======================================
18469 
18470 instruct vReplF_reg(vec dst, vlRegF src) %{
18471   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18472   match(Set dst (Replicate src));
18473   format %{ "replicateF $dst,$src" %}
18474   ins_encode %{
18475     uint vlen = Matcher::vector_length(this);
18476     int vlen_enc = vector_length_encoding(this);
18477     if (vlen <= 4) {
18478       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18479     } else if (VM_Version::supports_avx2()) {
18480       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18481     } else {
18482       assert(vlen == 8, "sanity");
18483       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
18484       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18485     }
18486   %}
18487   ins_pipe( pipe_slow );
18488 %}
18489 
18490 instruct ReplF_reg(vec dst, vlRegF src) %{
18491   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18492   match(Set dst (Replicate src));
18493   format %{ "replicateF $dst,$src" %}
18494   ins_encode %{
18495     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
18496   %}
18497   ins_pipe( pipe_slow );
18498 %}
18499 
18500 instruct ReplF_mem(vec dst, memory mem) %{
18501   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
18502   match(Set dst (Replicate (LoadF mem)));
18503   format %{ "replicateF $dst,$mem" %}
18504   ins_encode %{
18505     int vlen_enc = vector_length_encoding(this);
18506     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18507   %}
18508   ins_pipe( pipe_slow );
18509 %}
18510 
18511 // Replicate float scalar immediate to be vector by loading from const table.
18512 instruct ReplF_imm(vec dst, immF con) %{
18513   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18514   match(Set dst (Replicate con));
18515   format %{ "replicateF $dst,$con" %}
18516   ins_encode %{
18517     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
18518                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
18519     int vlen = Matcher::vector_length_in_bytes(this);
18520     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
18521   %}
18522   ins_pipe( pipe_slow );
18523 %}
18524 
18525 instruct ReplF_zero(vec dst, immF0 zero) %{
18526   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
18527   match(Set dst (Replicate zero));
18528   format %{ "replicateF $dst,$zero" %}
18529   ins_encode %{
18530     int vlen_enc = vector_length_encoding(this);
18531     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18532       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18533     } else {
18534       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18535     }
18536   %}
18537   ins_pipe( fpu_reg_reg );
18538 %}
18539 
18540 // ====================ReplicateD=======================================
18541 
18542 // Replicate double (8 bytes) scalar to be vector
18543 instruct vReplD_reg(vec dst, vlRegD src) %{
18544   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18545   match(Set dst (Replicate src));
18546   format %{ "replicateD $dst,$src" %}
18547   ins_encode %{
18548     uint vlen = Matcher::vector_length(this);
18549     int vlen_enc = vector_length_encoding(this);
18550     if (vlen <= 2) {
18551       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18552     } else if (VM_Version::supports_avx2()) {
18553       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
18554     } else {
18555       assert(vlen == 4, "sanity");
18556       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
18557       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
18558     }
18559   %}
18560   ins_pipe( pipe_slow );
18561 %}
18562 
18563 instruct ReplD_reg(vec dst, vlRegD src) %{
18564   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18565   match(Set dst (Replicate src));
18566   format %{ "replicateD $dst,$src" %}
18567   ins_encode %{
18568     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
18569   %}
18570   ins_pipe( pipe_slow );
18571 %}
18572 
18573 instruct ReplD_mem(vec dst, memory mem) %{
18574   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
18575   match(Set dst (Replicate (LoadD mem)));
18576   format %{ "replicateD $dst,$mem" %}
18577   ins_encode %{
18578     if (Matcher::vector_length(this) >= 4) {
18579       int vlen_enc = vector_length_encoding(this);
18580       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18581     } else {
18582       __ movddup($dst$$XMMRegister, $mem$$Address);
18583     }
18584   %}
18585   ins_pipe( pipe_slow );
18586 %}
18587 
18588 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
18589 instruct ReplD_imm(vec dst, immD con) %{
18590   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18591   match(Set dst (Replicate con));
18592   format %{ "replicateD $dst,$con" %}
18593   ins_encode %{
18594     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18595     int vlen = Matcher::vector_length_in_bytes(this);
18596     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
18597   %}
18598   ins_pipe( pipe_slow );
18599 %}
18600 
18601 instruct ReplD_zero(vec dst, immD0 zero) %{
18602   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
18603   match(Set dst (Replicate zero));
18604   format %{ "replicateD $dst,$zero" %}
18605   ins_encode %{
18606     int vlen_enc = vector_length_encoding(this);
18607     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
18608       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18609     } else {
18610       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
18611     }
18612   %}
18613   ins_pipe( fpu_reg_reg );
18614 %}
18615 
18616 // ====================VECTOR INSERT=======================================
18617 
18618 instruct insert(vec dst, rRegI val, immU8 idx) %{
18619   predicate(Matcher::vector_length_in_bytes(n) < 32);
18620   match(Set dst (VectorInsert (Binary dst val) idx));
18621   format %{ "vector_insert $dst,$val,$idx" %}
18622   ins_encode %{
18623     assert(UseSSE >= 4, "required");
18624     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
18625 
18626     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18627 
18628     assert(is_integral_type(elem_bt), "");
18629     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18630 
18631     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
18632   %}
18633   ins_pipe( pipe_slow );
18634 %}
18635 
18636 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
18637   predicate(Matcher::vector_length_in_bytes(n) == 32);
18638   match(Set dst (VectorInsert (Binary src val) idx));
18639   effect(TEMP vtmp);
18640   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18641   ins_encode %{
18642     int vlen_enc = Assembler::AVX_256bit;
18643     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18644     int elem_per_lane = 16/type2aelembytes(elem_bt);
18645     int log2epr = log2(elem_per_lane);
18646 
18647     assert(is_integral_type(elem_bt), "sanity");
18648     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18649 
18650     uint x_idx = $idx$$constant & right_n_bits(log2epr);
18651     uint y_idx = ($idx$$constant >> log2epr) & 1;
18652     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18653     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18654     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18655   %}
18656   ins_pipe( pipe_slow );
18657 %}
18658 
18659 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
18660   predicate(Matcher::vector_length_in_bytes(n) == 64);
18661   match(Set dst (VectorInsert (Binary src val) idx));
18662   effect(TEMP vtmp);
18663   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18664   ins_encode %{
18665     assert(UseAVX > 2, "sanity");
18666 
18667     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18668     int elem_per_lane = 16/type2aelembytes(elem_bt);
18669     int log2epr = log2(elem_per_lane);
18670 
18671     assert(is_integral_type(elem_bt), "");
18672     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18673 
18674     uint x_idx = $idx$$constant & right_n_bits(log2epr);
18675     uint y_idx = ($idx$$constant >> log2epr) & 3;
18676     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18677     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18678     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18679   %}
18680   ins_pipe( pipe_slow );
18681 %}
18682 
18683 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
18684   predicate(Matcher::vector_length(n) == 2);
18685   match(Set dst (VectorInsert (Binary dst val) idx));
18686   format %{ "vector_insert $dst,$val,$idx" %}
18687   ins_encode %{
18688     assert(UseSSE >= 4, "required");
18689     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18690     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18691 
18692     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
18693   %}
18694   ins_pipe( pipe_slow );
18695 %}
18696 
18697 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
18698   predicate(Matcher::vector_length(n) == 4);
18699   match(Set dst (VectorInsert (Binary src val) idx));
18700   effect(TEMP vtmp);
18701   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18702   ins_encode %{
18703     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
18704     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18705 
18706     uint x_idx = $idx$$constant & right_n_bits(1);
18707     uint y_idx = ($idx$$constant >> 1) & 1;
18708     int vlen_enc = Assembler::AVX_256bit;
18709     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18710     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18711     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18712   %}
18713   ins_pipe( pipe_slow );
18714 %}
18715 
18716 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
18717   predicate(Matcher::vector_length(n) == 8);
18718   match(Set dst (VectorInsert (Binary src val) idx));
18719   effect(TEMP vtmp);
18720   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18721   ins_encode %{
18722     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
18723     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18724 
18725     uint x_idx = $idx$$constant & right_n_bits(1);
18726     uint y_idx = ($idx$$constant >> 1) & 3;
18727     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18728     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
18729     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18730   %}
18731   ins_pipe( pipe_slow );
18732 %}
18733 
18734 instruct insertF(vec dst, regF val, immU8 idx) %{
18735   predicate(Matcher::vector_length(n) < 8);
18736   match(Set dst (VectorInsert (Binary dst val) idx));
18737   format %{ "vector_insert $dst,$val,$idx" %}
18738   ins_encode %{
18739     assert(UseSSE >= 4, "sanity");
18740 
18741     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
18742     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18743 
18744     uint x_idx = $idx$$constant & right_n_bits(2);
18745     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
18746   %}
18747   ins_pipe( pipe_slow );
18748 %}
18749 
18750 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
18751   predicate(Matcher::vector_length(n) >= 8);
18752   match(Set dst (VectorInsert (Binary src val) idx));
18753   effect(TEMP vtmp);
18754   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18755   ins_encode %{
18756     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
18757     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18758 
18759     int vlen = Matcher::vector_length(this);
18760     uint x_idx = $idx$$constant & right_n_bits(2);
18761     if (vlen == 8) {
18762       uint y_idx = ($idx$$constant >> 2) & 1;
18763       int vlen_enc = Assembler::AVX_256bit;
18764       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18765       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
18766       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18767     } else {
18768       assert(vlen == 16, "sanity");
18769       uint y_idx = ($idx$$constant >> 2) & 3;
18770       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18771       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
18772       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18773     }
18774   %}
18775   ins_pipe( pipe_slow );
18776 %}
18777 
18778 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
18779   predicate(Matcher::vector_length(n) == 2);
18780   match(Set dst (VectorInsert (Binary dst val) idx));
18781   effect(TEMP tmp);
18782   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
18783   ins_encode %{
18784     assert(UseSSE >= 4, "sanity");
18785     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
18786     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18787 
18788     __ movq($tmp$$Register, $val$$XMMRegister);
18789     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
18790   %}
18791   ins_pipe( pipe_slow );
18792 %}
18793 
18794 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
18795   predicate(Matcher::vector_length(n) == 4);
18796   match(Set dst (VectorInsert (Binary src val) idx));
18797   effect(TEMP vtmp, TEMP tmp);
18798   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
18799   ins_encode %{
18800     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
18801     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18802 
18803     uint x_idx = $idx$$constant & right_n_bits(1);
18804     uint y_idx = ($idx$$constant >> 1) & 1;
18805     int vlen_enc = Assembler::AVX_256bit;
18806     __ movq($tmp$$Register, $val$$XMMRegister);
18807     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18808     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
18809     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18810   %}
18811   ins_pipe( pipe_slow );
18812 %}
18813 
18814 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
18815   predicate(Matcher::vector_length(n) == 8);
18816   match(Set dst (VectorInsert (Binary src val) idx));
18817   effect(TEMP tmp, TEMP vtmp);
18818   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
18819   ins_encode %{
18820     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
18821     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
18822 
18823     uint x_idx = $idx$$constant & right_n_bits(1);
18824     uint y_idx = ($idx$$constant >> 1) & 3;
18825     __ movq($tmp$$Register, $val$$XMMRegister);
18826     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
18827     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
18828     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
18829   %}
18830   ins_pipe( pipe_slow );
18831 %}
18832 
18833 // ====================REDUCTION ARITHMETIC=======================================
18834 
18835 // =======================Int Reduction==========================================
18836 
18837 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
18838   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
18839   match(Set dst (AddReductionVI src1 src2));
18840   match(Set dst (MulReductionVI src1 src2));
18841   match(Set dst (AndReductionV  src1 src2));
18842   match(Set dst ( OrReductionV  src1 src2));
18843   match(Set dst (XorReductionV  src1 src2));
18844   match(Set dst (MinReductionV  src1 src2));
18845   match(Set dst (MaxReductionV  src1 src2));
18846   match(Set dst (UMinReductionV  src1 src2));
18847   match(Set dst (UMaxReductionV  src1 src2));
18848   effect(TEMP vtmp1, TEMP vtmp2);
18849   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18850   ins_encode %{
18851     int opcode = this->ideal_Opcode();
18852     int vlen = Matcher::vector_length(this, $src2);
18853     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18854   %}
18855   ins_pipe( pipe_slow );
18856 %}
18857 
18858 // =======================Long Reduction==========================================
18859 
18860 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
18861   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
18862   match(Set dst (AddReductionVL src1 src2));
18863   match(Set dst (MulReductionVL src1 src2));
18864   match(Set dst (AndReductionV  src1 src2));
18865   match(Set dst ( OrReductionV  src1 src2));
18866   match(Set dst (XorReductionV  src1 src2));
18867   match(Set dst (MinReductionV  src1 src2));
18868   match(Set dst (MaxReductionV  src1 src2));
18869   match(Set dst (UMinReductionV  src1 src2));
18870   match(Set dst (UMaxReductionV  src1 src2));
18871   effect(TEMP vtmp1, TEMP vtmp2);
18872   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18873   ins_encode %{
18874     int opcode = this->ideal_Opcode();
18875     int vlen = Matcher::vector_length(this, $src2);
18876     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18877   %}
18878   ins_pipe( pipe_slow );
18879 %}
18880 
18881 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
18882   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
18883   match(Set dst (AddReductionVL src1 src2));
18884   match(Set dst (MulReductionVL src1 src2));
18885   match(Set dst (AndReductionV  src1 src2));
18886   match(Set dst ( OrReductionV  src1 src2));
18887   match(Set dst (XorReductionV  src1 src2));
18888   match(Set dst (MinReductionV  src1 src2));
18889   match(Set dst (MaxReductionV  src1 src2));
18890   match(Set dst (UMinReductionV  src1 src2));
18891   match(Set dst (UMaxReductionV  src1 src2));
18892   effect(TEMP vtmp1, TEMP vtmp2);
18893   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18894   ins_encode %{
18895     int opcode = this->ideal_Opcode();
18896     int vlen = Matcher::vector_length(this, $src2);
18897     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18898   %}
18899   ins_pipe( pipe_slow );
18900 %}
18901 
18902 // =======================Float Reduction==========================================
18903 
18904 instruct reductionF128(regF dst, vec src, vec vtmp) %{
18905   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
18906   match(Set dst (AddReductionVF dst src));
18907   match(Set dst (MulReductionVF dst src));
18908   effect(TEMP dst, TEMP vtmp);
18909   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
18910   ins_encode %{
18911     int opcode = this->ideal_Opcode();
18912     int vlen = Matcher::vector_length(this, $src);
18913     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
18914   %}
18915   ins_pipe( pipe_slow );
18916 %}
18917 
18918 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
18919   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
18920   match(Set dst (AddReductionVF dst src));
18921   match(Set dst (MulReductionVF dst src));
18922   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18923   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
18924   ins_encode %{
18925     int opcode = this->ideal_Opcode();
18926     int vlen = Matcher::vector_length(this, $src);
18927     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18928   %}
18929   ins_pipe( pipe_slow );
18930 %}
18931 
18932 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
18933   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
18934   match(Set dst (AddReductionVF dst src));
18935   match(Set dst (MulReductionVF dst src));
18936   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18937   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
18938   ins_encode %{
18939     int opcode = this->ideal_Opcode();
18940     int vlen = Matcher::vector_length(this, $src);
18941     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18942   %}
18943   ins_pipe( pipe_slow );
18944 %}
18945 
18946 
18947 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
18948   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18949   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18950   // src1 contains reduction identity
18951   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
18952   match(Set dst (AddReductionVF src1 src2));
18953   match(Set dst (MulReductionVF src1 src2));
18954   effect(TEMP dst);
18955   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
18956   ins_encode %{
18957     int opcode = this->ideal_Opcode();
18958     int vlen = Matcher::vector_length(this, $src2);
18959     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
18960   %}
18961   ins_pipe( pipe_slow );
18962 %}
18963 
18964 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
18965   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18966   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18967   // src1 contains reduction identity
18968   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
18969   match(Set dst (AddReductionVF src1 src2));
18970   match(Set dst (MulReductionVF src1 src2));
18971   effect(TEMP dst, TEMP vtmp);
18972   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
18973   ins_encode %{
18974     int opcode = this->ideal_Opcode();
18975     int vlen = Matcher::vector_length(this, $src2);
18976     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
18977   %}
18978   ins_pipe( pipe_slow );
18979 %}
18980 
18981 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
18982   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
18983   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
18984   // src1 contains reduction identity
18985   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
18986   match(Set dst (AddReductionVF src1 src2));
18987   match(Set dst (MulReductionVF src1 src2));
18988   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
18989   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
18990   ins_encode %{
18991     int opcode = this->ideal_Opcode();
18992     int vlen = Matcher::vector_length(this, $src2);
18993     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
18994   %}
18995   ins_pipe( pipe_slow );
18996 %}
18997 
18998 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
18999   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19000   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19001   // src1 contains reduction identity
19002   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19003   match(Set dst (AddReductionVF src1 src2));
19004   match(Set dst (MulReductionVF src1 src2));
19005   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19006   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19007   ins_encode %{
19008     int opcode = this->ideal_Opcode();
19009     int vlen = Matcher::vector_length(this, $src2);
19010     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19011   %}
19012   ins_pipe( pipe_slow );
19013 %}
19014 
19015 // =======================Double Reduction==========================================
19016 
19017 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19018   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19019   match(Set dst (AddReductionVD dst src));
19020   match(Set dst (MulReductionVD dst src));
19021   effect(TEMP dst, TEMP vtmp);
19022   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19023   ins_encode %{
19024     int opcode = this->ideal_Opcode();
19025     int vlen = Matcher::vector_length(this, $src);
19026     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19027 %}
19028   ins_pipe( pipe_slow );
19029 %}
19030 
19031 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19032   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19033   match(Set dst (AddReductionVD dst src));
19034   match(Set dst (MulReductionVD dst src));
19035   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19036   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19037   ins_encode %{
19038     int opcode = this->ideal_Opcode();
19039     int vlen = Matcher::vector_length(this, $src);
19040     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19041   %}
19042   ins_pipe( pipe_slow );
19043 %}
19044 
19045 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19046   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19047   match(Set dst (AddReductionVD dst src));
19048   match(Set dst (MulReductionVD dst src));
19049   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19050   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19051   ins_encode %{
19052     int opcode = this->ideal_Opcode();
19053     int vlen = Matcher::vector_length(this, $src);
19054     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19055   %}
19056   ins_pipe( pipe_slow );
19057 %}
19058 
19059 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19060   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19061   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19062   // src1 contains reduction identity
19063   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19064   match(Set dst (AddReductionVD src1 src2));
19065   match(Set dst (MulReductionVD src1 src2));
19066   effect(TEMP dst);
19067   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19068   ins_encode %{
19069     int opcode = this->ideal_Opcode();
19070     int vlen = Matcher::vector_length(this, $src2);
19071     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19072 %}
19073   ins_pipe( pipe_slow );
19074 %}
19075 
19076 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19077   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19078   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19079   // src1 contains reduction identity
19080   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19081   match(Set dst (AddReductionVD src1 src2));
19082   match(Set dst (MulReductionVD src1 src2));
19083   effect(TEMP dst, TEMP vtmp);
19084   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19085   ins_encode %{
19086     int opcode = this->ideal_Opcode();
19087     int vlen = Matcher::vector_length(this, $src2);
19088     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19089   %}
19090   ins_pipe( pipe_slow );
19091 %}
19092 
19093 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19094   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19095   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19096   // src1 contains reduction identity
19097   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19098   match(Set dst (AddReductionVD src1 src2));
19099   match(Set dst (MulReductionVD src1 src2));
19100   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19101   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19102   ins_encode %{
19103     int opcode = this->ideal_Opcode();
19104     int vlen = Matcher::vector_length(this, $src2);
19105     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19106   %}
19107   ins_pipe( pipe_slow );
19108 %}
19109 
19110 // =======================Byte Reduction==========================================
19111 
19112 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19113   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19114   match(Set dst (AddReductionVI src1 src2));
19115   match(Set dst (AndReductionV  src1 src2));
19116   match(Set dst ( OrReductionV  src1 src2));
19117   match(Set dst (XorReductionV  src1 src2));
19118   match(Set dst (MinReductionV  src1 src2));
19119   match(Set dst (MaxReductionV  src1 src2));
19120   match(Set dst (UMinReductionV  src1 src2));
19121   match(Set dst (UMaxReductionV  src1 src2));
19122   effect(TEMP vtmp1, TEMP vtmp2);
19123   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19124   ins_encode %{
19125     int opcode = this->ideal_Opcode();
19126     int vlen = Matcher::vector_length(this, $src2);
19127     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19128   %}
19129   ins_pipe( pipe_slow );
19130 %}
19131 
19132 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19133   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19134   match(Set dst (AddReductionVI src1 src2));
19135   match(Set dst (AndReductionV  src1 src2));
19136   match(Set dst ( OrReductionV  src1 src2));
19137   match(Set dst (XorReductionV  src1 src2));
19138   match(Set dst (MinReductionV  src1 src2));
19139   match(Set dst (MaxReductionV  src1 src2));
19140   match(Set dst (UMinReductionV  src1 src2));
19141   match(Set dst (UMaxReductionV  src1 src2));
19142   effect(TEMP vtmp1, TEMP vtmp2);
19143   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19144   ins_encode %{
19145     int opcode = this->ideal_Opcode();
19146     int vlen = Matcher::vector_length(this, $src2);
19147     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19148   %}
19149   ins_pipe( pipe_slow );
19150 %}
19151 
19152 // =======================Short Reduction==========================================
19153 
19154 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19155   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19156   match(Set dst (AddReductionVI src1 src2));
19157   match(Set dst (MulReductionVI src1 src2));
19158   match(Set dst (AndReductionV  src1 src2));
19159   match(Set dst ( OrReductionV  src1 src2));
19160   match(Set dst (XorReductionV  src1 src2));
19161   match(Set dst (MinReductionV  src1 src2));
19162   match(Set dst (MaxReductionV  src1 src2));
19163   match(Set dst (UMinReductionV  src1 src2));
19164   match(Set dst (UMaxReductionV  src1 src2));
19165   effect(TEMP vtmp1, TEMP vtmp2);
19166   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19167   ins_encode %{
19168     int opcode = this->ideal_Opcode();
19169     int vlen = Matcher::vector_length(this, $src2);
19170     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19171   %}
19172   ins_pipe( pipe_slow );
19173 %}
19174 
19175 // =======================Mul Reduction==========================================
19176 
19177 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19178   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19179             Matcher::vector_length(n->in(2)) <= 32); // src2
19180   match(Set dst (MulReductionVI src1 src2));
19181   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19182   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19183   ins_encode %{
19184     int opcode = this->ideal_Opcode();
19185     int vlen = Matcher::vector_length(this, $src2);
19186     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19187   %}
19188   ins_pipe( pipe_slow );
19189 %}
19190 
19191 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19192   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19193             Matcher::vector_length(n->in(2)) == 64); // src2
19194   match(Set dst (MulReductionVI src1 src2));
19195   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19196   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19197   ins_encode %{
19198     int opcode = this->ideal_Opcode();
19199     int vlen = Matcher::vector_length(this, $src2);
19200     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19201   %}
19202   ins_pipe( pipe_slow );
19203 %}
19204 
19205 //--------------------Min/Max Float Reduction --------------------
19206 // Float Min Reduction
19207 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19208                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19209   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19210             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19211              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19212             Matcher::vector_length(n->in(2)) == 2);
19213   match(Set dst (MinReductionV src1 src2));
19214   match(Set dst (MaxReductionV src1 src2));
19215   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19216   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19217   ins_encode %{
19218     assert(UseAVX > 0, "sanity");
19219 
19220     int opcode = this->ideal_Opcode();
19221     int vlen = Matcher::vector_length(this, $src2);
19222     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19223                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19224   %}
19225   ins_pipe( pipe_slow );
19226 %}
19227 
19228 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19229                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19230   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19231             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19232              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19233             Matcher::vector_length(n->in(2)) >= 4);
19234   match(Set dst (MinReductionV src1 src2));
19235   match(Set dst (MaxReductionV src1 src2));
19236   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19237   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19238   ins_encode %{
19239     assert(UseAVX > 0, "sanity");
19240 
19241     int opcode = this->ideal_Opcode();
19242     int vlen = Matcher::vector_length(this, $src2);
19243     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19244                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19245   %}
19246   ins_pipe( pipe_slow );
19247 %}
19248 
19249 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19250                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19251   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19252             Matcher::vector_length(n->in(2)) == 2);
19253   match(Set dst (MinReductionV dst src));
19254   match(Set dst (MaxReductionV dst src));
19255   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19256   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19257   ins_encode %{
19258     assert(UseAVX > 0, "sanity");
19259 
19260     int opcode = this->ideal_Opcode();
19261     int vlen = Matcher::vector_length(this, $src);
19262     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19263                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19264   %}
19265   ins_pipe( pipe_slow );
19266 %}
19267 
19268 
19269 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19270                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19271   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19272             Matcher::vector_length(n->in(2)) >= 4);
19273   match(Set dst (MinReductionV dst src));
19274   match(Set dst (MaxReductionV dst src));
19275   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19276   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19277   ins_encode %{
19278     assert(UseAVX > 0, "sanity");
19279 
19280     int opcode = this->ideal_Opcode();
19281     int vlen = Matcher::vector_length(this, $src);
19282     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19283                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19284   %}
19285   ins_pipe( pipe_slow );
19286 %}
19287 
19288 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19289   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19290             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19291              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19292             Matcher::vector_length(n->in(2)) == 2);
19293   match(Set dst (MinReductionV src1 src2));
19294   match(Set dst (MaxReductionV src1 src2));
19295   effect(TEMP dst, TEMP xtmp1);
19296   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19297   ins_encode %{
19298     int opcode = this->ideal_Opcode();
19299     int vlen = Matcher::vector_length(this, $src2);
19300     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19301                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19302   %}
19303   ins_pipe( pipe_slow );
19304 %}
19305 
19306 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19307   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19308             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19309              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19310             Matcher::vector_length(n->in(2)) >= 4);
19311   match(Set dst (MinReductionV src1 src2));
19312   match(Set dst (MaxReductionV src1 src2));
19313   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19314   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19315   ins_encode %{
19316     int opcode = this->ideal_Opcode();
19317     int vlen = Matcher::vector_length(this, $src2);
19318     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19319                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19320   %}
19321   ins_pipe( pipe_slow );
19322 %}
19323 
19324 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19325   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19326             Matcher::vector_length(n->in(2)) == 2);
19327   match(Set dst (MinReductionV dst src));
19328   match(Set dst (MaxReductionV dst src));
19329   effect(TEMP dst, TEMP xtmp1);
19330   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19331   ins_encode %{
19332     int opcode = this->ideal_Opcode();
19333     int vlen = Matcher::vector_length(this, $src);
19334     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19335                          $xtmp1$$XMMRegister);
19336   %}
19337   ins_pipe( pipe_slow );
19338 %}
19339 
19340 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19341   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19342             Matcher::vector_length(n->in(2)) >= 4);
19343   match(Set dst (MinReductionV dst src));
19344   match(Set dst (MaxReductionV dst src));
19345   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19346   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19347   ins_encode %{
19348     int opcode = this->ideal_Opcode();
19349     int vlen = Matcher::vector_length(this, $src);
19350     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19351                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19352   %}
19353   ins_pipe( pipe_slow );
19354 %}
19355 
19356 //--------------------Min Double Reduction --------------------
19357 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19358                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19359   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19360             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19361              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19362             Matcher::vector_length(n->in(2)) == 2);
19363   match(Set dst (MinReductionV src1 src2));
19364   match(Set dst (MaxReductionV src1 src2));
19365   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19366   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19367   ins_encode %{
19368     assert(UseAVX > 0, "sanity");
19369 
19370     int opcode = this->ideal_Opcode();
19371     int vlen = Matcher::vector_length(this, $src2);
19372     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19373                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19374   %}
19375   ins_pipe( pipe_slow );
19376 %}
19377 
19378 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19379                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19380   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19381             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19382              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19383             Matcher::vector_length(n->in(2)) >= 4);
19384   match(Set dst (MinReductionV src1 src2));
19385   match(Set dst (MaxReductionV src1 src2));
19386   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19387   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19388   ins_encode %{
19389     assert(UseAVX > 0, "sanity");
19390 
19391     int opcode = this->ideal_Opcode();
19392     int vlen = Matcher::vector_length(this, $src2);
19393     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19394                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19395   %}
19396   ins_pipe( pipe_slow );
19397 %}
19398 
19399 
19400 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19401                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19402   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19403             Matcher::vector_length(n->in(2)) == 2);
19404   match(Set dst (MinReductionV dst src));
19405   match(Set dst (MaxReductionV dst src));
19406   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19407   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19408   ins_encode %{
19409     assert(UseAVX > 0, "sanity");
19410 
19411     int opcode = this->ideal_Opcode();
19412     int vlen = Matcher::vector_length(this, $src);
19413     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19414                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19415   %}
19416   ins_pipe( pipe_slow );
19417 %}
19418 
19419 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19420                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19421   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19422             Matcher::vector_length(n->in(2)) >= 4);
19423   match(Set dst (MinReductionV dst src));
19424   match(Set dst (MaxReductionV dst src));
19425   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19426   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19427   ins_encode %{
19428     assert(UseAVX > 0, "sanity");
19429 
19430     int opcode = this->ideal_Opcode();
19431     int vlen = Matcher::vector_length(this, $src);
19432     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19433                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19434   %}
19435   ins_pipe( pipe_slow );
19436 %}
19437 
19438 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
19439   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19440             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19441              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19442             Matcher::vector_length(n->in(2)) == 2);
19443   match(Set dst (MinReductionV src1 src2));
19444   match(Set dst (MaxReductionV src1 src2));
19445   effect(TEMP dst, TEMP xtmp1);
19446   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
19447   ins_encode %{
19448     int opcode = this->ideal_Opcode();
19449     int vlen = Matcher::vector_length(this, $src2);
19450     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
19451                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
19452   %}
19453   ins_pipe( pipe_slow );
19454 %}
19455 
19456 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
19457   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19458             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19459              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19460             Matcher::vector_length(n->in(2)) >= 4);
19461   match(Set dst (MinReductionV src1 src2));
19462   match(Set dst (MaxReductionV src1 src2));
19463   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19464   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
19465   ins_encode %{
19466     int opcode = this->ideal_Opcode();
19467     int vlen = Matcher::vector_length(this, $src2);
19468     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19469                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19470   %}
19471   ins_pipe( pipe_slow );
19472 %}
19473 
19474 
19475 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
19476   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19477             Matcher::vector_length(n->in(2)) == 2);
19478   match(Set dst (MinReductionV dst src));
19479   match(Set dst (MaxReductionV dst src));
19480   effect(TEMP dst, TEMP xtmp1);
19481   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
19482   ins_encode %{
19483     int opcode = this->ideal_Opcode();
19484     int vlen = Matcher::vector_length(this, $src);
19485     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19486                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19487   %}
19488   ins_pipe( pipe_slow );
19489 %}
19490 
19491 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
19492   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19493             Matcher::vector_length(n->in(2)) >= 4);
19494   match(Set dst (MinReductionV dst src));
19495   match(Set dst (MaxReductionV dst src));
19496   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19497   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
19498   ins_encode %{
19499     int opcode = this->ideal_Opcode();
19500     int vlen = Matcher::vector_length(this, $src);
19501     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19502                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19503   %}
19504   ins_pipe( pipe_slow );
19505 %}
19506 
19507 // ====================VECTOR ARITHMETIC=======================================
19508 
19509 // --------------------------------- ADD --------------------------------------
19510 
19511 // Bytes vector add
19512 instruct vaddB(vec dst, vec src) %{
19513   predicate(UseAVX == 0);
19514   match(Set dst (AddVB dst src));
19515   format %{ "paddb   $dst,$src\t! add packedB" %}
19516   ins_encode %{
19517     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
19518   %}
19519   ins_pipe( pipe_slow );
19520 %}
19521 
19522 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
19523   predicate(UseAVX > 0);
19524   match(Set dst (AddVB src1 src2));
19525   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
19526   ins_encode %{
19527     int vlen_enc = vector_length_encoding(this);
19528     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19529   %}
19530   ins_pipe( pipe_slow );
19531 %}
19532 
19533 instruct vaddB_mem(vec dst, vec src, memory mem) %{
19534   predicate((UseAVX > 0) &&
19535             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19536   match(Set dst (AddVB src (LoadVector mem)));
19537   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
19538   ins_encode %{
19539     int vlen_enc = vector_length_encoding(this);
19540     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19541   %}
19542   ins_pipe( pipe_slow );
19543 %}
19544 
19545 // Shorts/Chars vector add
19546 instruct vaddS(vec dst, vec src) %{
19547   predicate(UseAVX == 0);
19548   match(Set dst (AddVS dst src));
19549   format %{ "paddw   $dst,$src\t! add packedS" %}
19550   ins_encode %{
19551     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
19552   %}
19553   ins_pipe( pipe_slow );
19554 %}
19555 
19556 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
19557   predicate(UseAVX > 0);
19558   match(Set dst (AddVS src1 src2));
19559   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
19560   ins_encode %{
19561     int vlen_enc = vector_length_encoding(this);
19562     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19563   %}
19564   ins_pipe( pipe_slow );
19565 %}
19566 
19567 instruct vaddS_mem(vec dst, vec src, memory mem) %{
19568   predicate((UseAVX > 0) &&
19569             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19570   match(Set dst (AddVS src (LoadVector mem)));
19571   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
19572   ins_encode %{
19573     int vlen_enc = vector_length_encoding(this);
19574     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19575   %}
19576   ins_pipe( pipe_slow );
19577 %}
19578 
19579 // Integers vector add
19580 instruct vaddI(vec dst, vec src) %{
19581   predicate(UseAVX == 0);
19582   match(Set dst (AddVI dst src));
19583   format %{ "paddd   $dst,$src\t! add packedI" %}
19584   ins_encode %{
19585     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
19586   %}
19587   ins_pipe( pipe_slow );
19588 %}
19589 
19590 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
19591   predicate(UseAVX > 0);
19592   match(Set dst (AddVI src1 src2));
19593   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
19594   ins_encode %{
19595     int vlen_enc = vector_length_encoding(this);
19596     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19597   %}
19598   ins_pipe( pipe_slow );
19599 %}
19600 
19601 
19602 instruct vaddI_mem(vec dst, vec src, memory mem) %{
19603   predicate((UseAVX > 0) &&
19604             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19605   match(Set dst (AddVI src (LoadVector mem)));
19606   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
19607   ins_encode %{
19608     int vlen_enc = vector_length_encoding(this);
19609     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19610   %}
19611   ins_pipe( pipe_slow );
19612 %}
19613 
19614 // Longs vector add
19615 instruct vaddL(vec dst, vec src) %{
19616   predicate(UseAVX == 0);
19617   match(Set dst (AddVL dst src));
19618   format %{ "paddq   $dst,$src\t! add packedL" %}
19619   ins_encode %{
19620     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
19621   %}
19622   ins_pipe( pipe_slow );
19623 %}
19624 
19625 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
19626   predicate(UseAVX > 0);
19627   match(Set dst (AddVL src1 src2));
19628   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
19629   ins_encode %{
19630     int vlen_enc = vector_length_encoding(this);
19631     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19632   %}
19633   ins_pipe( pipe_slow );
19634 %}
19635 
19636 instruct vaddL_mem(vec dst, vec src, memory mem) %{
19637   predicate((UseAVX > 0) &&
19638             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19639   match(Set dst (AddVL src (LoadVector mem)));
19640   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
19641   ins_encode %{
19642     int vlen_enc = vector_length_encoding(this);
19643     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19644   %}
19645   ins_pipe( pipe_slow );
19646 %}
19647 
19648 // Floats vector add
19649 instruct vaddF(vec dst, vec src) %{
19650   predicate(UseAVX == 0);
19651   match(Set dst (AddVF dst src));
19652   format %{ "addps   $dst,$src\t! add packedF" %}
19653   ins_encode %{
19654     __ addps($dst$$XMMRegister, $src$$XMMRegister);
19655   %}
19656   ins_pipe( pipe_slow );
19657 %}
19658 
19659 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
19660   predicate(UseAVX > 0);
19661   match(Set dst (AddVF src1 src2));
19662   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
19663   ins_encode %{
19664     int vlen_enc = vector_length_encoding(this);
19665     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19666   %}
19667   ins_pipe( pipe_slow );
19668 %}
19669 
19670 instruct vaddF_mem(vec dst, vec src, memory mem) %{
19671   predicate((UseAVX > 0) &&
19672             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19673   match(Set dst (AddVF src (LoadVector mem)));
19674   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
19675   ins_encode %{
19676     int vlen_enc = vector_length_encoding(this);
19677     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19678   %}
19679   ins_pipe( pipe_slow );
19680 %}
19681 
19682 // Doubles vector add
19683 instruct vaddD(vec dst, vec src) %{
19684   predicate(UseAVX == 0);
19685   match(Set dst (AddVD dst src));
19686   format %{ "addpd   $dst,$src\t! add packedD" %}
19687   ins_encode %{
19688     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
19689   %}
19690   ins_pipe( pipe_slow );
19691 %}
19692 
19693 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
19694   predicate(UseAVX > 0);
19695   match(Set dst (AddVD src1 src2));
19696   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
19697   ins_encode %{
19698     int vlen_enc = vector_length_encoding(this);
19699     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19700   %}
19701   ins_pipe( pipe_slow );
19702 %}
19703 
19704 instruct vaddD_mem(vec dst, vec src, memory mem) %{
19705   predicate((UseAVX > 0) &&
19706             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19707   match(Set dst (AddVD src (LoadVector mem)));
19708   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
19709   ins_encode %{
19710     int vlen_enc = vector_length_encoding(this);
19711     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19712   %}
19713   ins_pipe( pipe_slow );
19714 %}
19715 
19716 // --------------------------------- SUB --------------------------------------
19717 
19718 // Bytes vector sub
19719 instruct vsubB(vec dst, vec src) %{
19720   predicate(UseAVX == 0);
19721   match(Set dst (SubVB dst src));
19722   format %{ "psubb   $dst,$src\t! sub packedB" %}
19723   ins_encode %{
19724     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
19725   %}
19726   ins_pipe( pipe_slow );
19727 %}
19728 
19729 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
19730   predicate(UseAVX > 0);
19731   match(Set dst (SubVB src1 src2));
19732   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
19733   ins_encode %{
19734     int vlen_enc = vector_length_encoding(this);
19735     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19736   %}
19737   ins_pipe( pipe_slow );
19738 %}
19739 
19740 instruct vsubB_mem(vec dst, vec src, memory mem) %{
19741   predicate((UseAVX > 0) &&
19742             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19743   match(Set dst (SubVB src (LoadVector mem)));
19744   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
19745   ins_encode %{
19746     int vlen_enc = vector_length_encoding(this);
19747     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19748   %}
19749   ins_pipe( pipe_slow );
19750 %}
19751 
19752 // Shorts/Chars vector sub
19753 instruct vsubS(vec dst, vec src) %{
19754   predicate(UseAVX == 0);
19755   match(Set dst (SubVS dst src));
19756   format %{ "psubw   $dst,$src\t! sub packedS" %}
19757   ins_encode %{
19758     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
19759   %}
19760   ins_pipe( pipe_slow );
19761 %}
19762 
19763 
19764 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
19765   predicate(UseAVX > 0);
19766   match(Set dst (SubVS src1 src2));
19767   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
19768   ins_encode %{
19769     int vlen_enc = vector_length_encoding(this);
19770     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19771   %}
19772   ins_pipe( pipe_slow );
19773 %}
19774 
19775 instruct vsubS_mem(vec dst, vec src, memory mem) %{
19776   predicate((UseAVX > 0) &&
19777             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19778   match(Set dst (SubVS src (LoadVector mem)));
19779   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
19780   ins_encode %{
19781     int vlen_enc = vector_length_encoding(this);
19782     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19783   %}
19784   ins_pipe( pipe_slow );
19785 %}
19786 
19787 // Integers vector sub
19788 instruct vsubI(vec dst, vec src) %{
19789   predicate(UseAVX == 0);
19790   match(Set dst (SubVI dst src));
19791   format %{ "psubd   $dst,$src\t! sub packedI" %}
19792   ins_encode %{
19793     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
19794   %}
19795   ins_pipe( pipe_slow );
19796 %}
19797 
19798 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
19799   predicate(UseAVX > 0);
19800   match(Set dst (SubVI src1 src2));
19801   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
19802   ins_encode %{
19803     int vlen_enc = vector_length_encoding(this);
19804     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19805   %}
19806   ins_pipe( pipe_slow );
19807 %}
19808 
19809 instruct vsubI_mem(vec dst, vec src, memory mem) %{
19810   predicate((UseAVX > 0) &&
19811             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19812   match(Set dst (SubVI src (LoadVector mem)));
19813   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
19814   ins_encode %{
19815     int vlen_enc = vector_length_encoding(this);
19816     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19817   %}
19818   ins_pipe( pipe_slow );
19819 %}
19820 
19821 // Longs vector sub
19822 instruct vsubL(vec dst, vec src) %{
19823   predicate(UseAVX == 0);
19824   match(Set dst (SubVL dst src));
19825   format %{ "psubq   $dst,$src\t! sub packedL" %}
19826   ins_encode %{
19827     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
19828   %}
19829   ins_pipe( pipe_slow );
19830 %}
19831 
19832 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
19833   predicate(UseAVX > 0);
19834   match(Set dst (SubVL src1 src2));
19835   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
19836   ins_encode %{
19837     int vlen_enc = vector_length_encoding(this);
19838     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19839   %}
19840   ins_pipe( pipe_slow );
19841 %}
19842 
19843 
19844 instruct vsubL_mem(vec dst, vec src, memory mem) %{
19845   predicate((UseAVX > 0) &&
19846             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19847   match(Set dst (SubVL src (LoadVector mem)));
19848   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
19849   ins_encode %{
19850     int vlen_enc = vector_length_encoding(this);
19851     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19852   %}
19853   ins_pipe( pipe_slow );
19854 %}
19855 
19856 // Floats vector sub
19857 instruct vsubF(vec dst, vec src) %{
19858   predicate(UseAVX == 0);
19859   match(Set dst (SubVF dst src));
19860   format %{ "subps   $dst,$src\t! sub packedF" %}
19861   ins_encode %{
19862     __ subps($dst$$XMMRegister, $src$$XMMRegister);
19863   %}
19864   ins_pipe( pipe_slow );
19865 %}
19866 
19867 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
19868   predicate(UseAVX > 0);
19869   match(Set dst (SubVF src1 src2));
19870   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
19871   ins_encode %{
19872     int vlen_enc = vector_length_encoding(this);
19873     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19874   %}
19875   ins_pipe( pipe_slow );
19876 %}
19877 
19878 instruct vsubF_mem(vec dst, vec src, memory mem) %{
19879   predicate((UseAVX > 0) &&
19880             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19881   match(Set dst (SubVF src (LoadVector mem)));
19882   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
19883   ins_encode %{
19884     int vlen_enc = vector_length_encoding(this);
19885     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19886   %}
19887   ins_pipe( pipe_slow );
19888 %}
19889 
19890 // Doubles vector sub
19891 instruct vsubD(vec dst, vec src) %{
19892   predicate(UseAVX == 0);
19893   match(Set dst (SubVD dst src));
19894   format %{ "subpd   $dst,$src\t! sub packedD" %}
19895   ins_encode %{
19896     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
19897   %}
19898   ins_pipe( pipe_slow );
19899 %}
19900 
19901 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
19902   predicate(UseAVX > 0);
19903   match(Set dst (SubVD src1 src2));
19904   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
19905   ins_encode %{
19906     int vlen_enc = vector_length_encoding(this);
19907     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19908   %}
19909   ins_pipe( pipe_slow );
19910 %}
19911 
19912 instruct vsubD_mem(vec dst, vec src, memory mem) %{
19913   predicate((UseAVX > 0) &&
19914             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
19915   match(Set dst (SubVD src (LoadVector mem)));
19916   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
19917   ins_encode %{
19918     int vlen_enc = vector_length_encoding(this);
19919     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
19920   %}
19921   ins_pipe( pipe_slow );
19922 %}
19923 
19924 // --------------------------------- MUL --------------------------------------
19925 
19926 // Byte vector mul
19927 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
19928   predicate(Matcher::vector_length_in_bytes(n) <= 8);
19929   match(Set dst (MulVB src1 src2));
19930   effect(TEMP dst, TEMP xtmp);
19931   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
19932   ins_encode %{
19933     assert(UseSSE > 3, "required");
19934     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
19935     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
19936     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
19937     __ psllw($dst$$XMMRegister, 8);
19938     __ psrlw($dst$$XMMRegister, 8);
19939     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
19940   %}
19941   ins_pipe( pipe_slow );
19942 %}
19943 
19944 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
19945   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
19946   match(Set dst (MulVB src1 src2));
19947   effect(TEMP dst, TEMP xtmp);
19948   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
19949   ins_encode %{
19950     assert(UseSSE > 3, "required");
19951     // Odd-index elements
19952     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
19953     __ psrlw($dst$$XMMRegister, 8);
19954     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
19955     __ psrlw($xtmp$$XMMRegister, 8);
19956     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
19957     __ psllw($dst$$XMMRegister, 8);
19958     // Even-index elements
19959     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
19960     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
19961     __ psllw($xtmp$$XMMRegister, 8);
19962     __ psrlw($xtmp$$XMMRegister, 8);
19963     // Combine
19964     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
19965   %}
19966   ins_pipe( pipe_slow );
19967 %}
19968 
19969 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
19970   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
19971   match(Set dst (MulVB src1 src2));
19972   effect(TEMP xtmp1, TEMP xtmp2);
19973   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
19974   ins_encode %{
19975     int vlen_enc = vector_length_encoding(this);
19976     // Odd-index elements
19977     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
19978     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
19979     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
19980     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
19981     // Even-index elements
19982     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
19983     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
19984     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
19985     // Combine
19986     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
19987   %}
19988   ins_pipe( pipe_slow );
19989 %}
19990 
19991 // Shorts/Chars vector mul
19992 instruct vmulS(vec dst, vec src) %{
19993   predicate(UseAVX == 0);
19994   match(Set dst (MulVS dst src));
19995   format %{ "pmullw  $dst,$src\t! mul packedS" %}
19996   ins_encode %{
19997     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
19998   %}
19999   ins_pipe( pipe_slow );
20000 %}
20001 
20002 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20003   predicate(UseAVX > 0);
20004   match(Set dst (MulVS src1 src2));
20005   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20006   ins_encode %{
20007     int vlen_enc = vector_length_encoding(this);
20008     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20009   %}
20010   ins_pipe( pipe_slow );
20011 %}
20012 
20013 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20014   predicate((UseAVX > 0) &&
20015             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20016   match(Set dst (MulVS src (LoadVector mem)));
20017   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20018   ins_encode %{
20019     int vlen_enc = vector_length_encoding(this);
20020     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20021   %}
20022   ins_pipe( pipe_slow );
20023 %}
20024 
20025 // Integers vector mul
20026 instruct vmulI(vec dst, vec src) %{
20027   predicate(UseAVX == 0);
20028   match(Set dst (MulVI dst src));
20029   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20030   ins_encode %{
20031     assert(UseSSE > 3, "required");
20032     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20033   %}
20034   ins_pipe( pipe_slow );
20035 %}
20036 
20037 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20038   predicate(UseAVX > 0);
20039   match(Set dst (MulVI src1 src2));
20040   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20041   ins_encode %{
20042     int vlen_enc = vector_length_encoding(this);
20043     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20044   %}
20045   ins_pipe( pipe_slow );
20046 %}
20047 
20048 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20049   predicate((UseAVX > 0) &&
20050             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20051   match(Set dst (MulVI src (LoadVector mem)));
20052   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20053   ins_encode %{
20054     int vlen_enc = vector_length_encoding(this);
20055     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20056   %}
20057   ins_pipe( pipe_slow );
20058 %}
20059 
20060 // Longs vector mul
20061 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20062   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20063              VM_Version::supports_avx512dq()) ||
20064             VM_Version::supports_avx512vldq());
20065   match(Set dst (MulVL src1 src2));
20066   ins_cost(500);
20067   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20068   ins_encode %{
20069     assert(UseAVX > 2, "required");
20070     int vlen_enc = vector_length_encoding(this);
20071     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20072   %}
20073   ins_pipe( pipe_slow );
20074 %}
20075 
20076 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20077   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20078              VM_Version::supports_avx512dq()) ||
20079             (Matcher::vector_length_in_bytes(n) > 8 &&
20080              VM_Version::supports_avx512vldq()));
20081   match(Set dst (MulVL src (LoadVector mem)));
20082   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20083   ins_cost(500);
20084   ins_encode %{
20085     assert(UseAVX > 2, "required");
20086     int vlen_enc = vector_length_encoding(this);
20087     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20088   %}
20089   ins_pipe( pipe_slow );
20090 %}
20091 
20092 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20093   predicate(UseAVX == 0);
20094   match(Set dst (MulVL src1 src2));
20095   ins_cost(500);
20096   effect(TEMP dst, TEMP xtmp);
20097   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20098   ins_encode %{
20099     assert(VM_Version::supports_sse4_1(), "required");
20100     // Get the lo-hi products, only the lower 32 bits is in concerns
20101     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20102     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20103     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20104     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20105     __ psllq($dst$$XMMRegister, 32);
20106     // Get the lo-lo products
20107     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20108     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20109     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20110   %}
20111   ins_pipe( pipe_slow );
20112 %}
20113 
20114 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20115   predicate(UseAVX > 0 &&
20116             ((Matcher::vector_length_in_bytes(n) == 64 &&
20117               !VM_Version::supports_avx512dq()) ||
20118              (Matcher::vector_length_in_bytes(n) < 64 &&
20119               !VM_Version::supports_avx512vldq())));
20120   match(Set dst (MulVL src1 src2));
20121   effect(TEMP xtmp1, TEMP xtmp2);
20122   ins_cost(500);
20123   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20124   ins_encode %{
20125     int vlen_enc = vector_length_encoding(this);
20126     // Get the lo-hi products, only the lower 32 bits is in concerns
20127     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20128     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20129     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20130     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20131     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20132     // Get the lo-lo products
20133     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20134     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20135   %}
20136   ins_pipe( pipe_slow );
20137 %}
20138 
20139 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20140   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20141   match(Set dst (MulVL src1 src2));
20142   ins_cost(100);
20143   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20144   ins_encode %{
20145     int vlen_enc = vector_length_encoding(this);
20146     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20147   %}
20148   ins_pipe( pipe_slow );
20149 %}
20150 
20151 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20152   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20153   match(Set dst (MulVL src1 src2));
20154   ins_cost(100);
20155   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20156   ins_encode %{
20157     int vlen_enc = vector_length_encoding(this);
20158     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20159   %}
20160   ins_pipe( pipe_slow );
20161 %}
20162 
20163 // Floats vector mul
20164 instruct vmulF(vec dst, vec src) %{
20165   predicate(UseAVX == 0);
20166   match(Set dst (MulVF dst src));
20167   format %{ "mulps   $dst,$src\t! mul packedF" %}
20168   ins_encode %{
20169     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20170   %}
20171   ins_pipe( pipe_slow );
20172 %}
20173 
20174 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20175   predicate(UseAVX > 0);
20176   match(Set dst (MulVF src1 src2));
20177   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20178   ins_encode %{
20179     int vlen_enc = vector_length_encoding(this);
20180     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20181   %}
20182   ins_pipe( pipe_slow );
20183 %}
20184 
20185 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20186   predicate((UseAVX > 0) &&
20187             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20188   match(Set dst (MulVF src (LoadVector mem)));
20189   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20190   ins_encode %{
20191     int vlen_enc = vector_length_encoding(this);
20192     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20193   %}
20194   ins_pipe( pipe_slow );
20195 %}
20196 
20197 // Doubles vector mul
20198 instruct vmulD(vec dst, vec src) %{
20199   predicate(UseAVX == 0);
20200   match(Set dst (MulVD dst src));
20201   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20202   ins_encode %{
20203     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20204   %}
20205   ins_pipe( pipe_slow );
20206 %}
20207 
20208 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20209   predicate(UseAVX > 0);
20210   match(Set dst (MulVD src1 src2));
20211   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20212   ins_encode %{
20213     int vlen_enc = vector_length_encoding(this);
20214     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20215   %}
20216   ins_pipe( pipe_slow );
20217 %}
20218 
20219 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20220   predicate((UseAVX > 0) &&
20221             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20222   match(Set dst (MulVD src (LoadVector mem)));
20223   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20224   ins_encode %{
20225     int vlen_enc = vector_length_encoding(this);
20226     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20227   %}
20228   ins_pipe( pipe_slow );
20229 %}
20230 
20231 // --------------------------------- DIV --------------------------------------
20232 
20233 // Floats vector div
20234 instruct vdivF(vec dst, vec src) %{
20235   predicate(UseAVX == 0);
20236   match(Set dst (DivVF dst src));
20237   format %{ "divps   $dst,$src\t! div packedF" %}
20238   ins_encode %{
20239     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20240   %}
20241   ins_pipe( pipe_slow );
20242 %}
20243 
20244 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20245   predicate(UseAVX > 0);
20246   match(Set dst (DivVF src1 src2));
20247   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20248   ins_encode %{
20249     int vlen_enc = vector_length_encoding(this);
20250     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20251   %}
20252   ins_pipe( pipe_slow );
20253 %}
20254 
20255 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20256   predicate((UseAVX > 0) &&
20257             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20258   match(Set dst (DivVF src (LoadVector mem)));
20259   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20260   ins_encode %{
20261     int vlen_enc = vector_length_encoding(this);
20262     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20263   %}
20264   ins_pipe( pipe_slow );
20265 %}
20266 
20267 // Doubles vector div
20268 instruct vdivD(vec dst, vec src) %{
20269   predicate(UseAVX == 0);
20270   match(Set dst (DivVD dst src));
20271   format %{ "divpd   $dst,$src\t! div packedD" %}
20272   ins_encode %{
20273     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20274   %}
20275   ins_pipe( pipe_slow );
20276 %}
20277 
20278 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20279   predicate(UseAVX > 0);
20280   match(Set dst (DivVD src1 src2));
20281   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20282   ins_encode %{
20283     int vlen_enc = vector_length_encoding(this);
20284     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20285   %}
20286   ins_pipe( pipe_slow );
20287 %}
20288 
20289 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20290   predicate((UseAVX > 0) &&
20291             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20292   match(Set dst (DivVD src (LoadVector mem)));
20293   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20294   ins_encode %{
20295     int vlen_enc = vector_length_encoding(this);
20296     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20297   %}
20298   ins_pipe( pipe_slow );
20299 %}
20300 
20301 // ------------------------------ MinMax ---------------------------------------
20302 
20303 // Byte, Short, Int vector Min/Max
20304 instruct minmax_reg_sse(vec dst, vec src) %{
20305   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20306             UseAVX == 0);
20307   match(Set dst (MinV dst src));
20308   match(Set dst (MaxV dst src));
20309   format %{ "vector_minmax  $dst,$src\t!  " %}
20310   ins_encode %{
20311     assert(UseSSE >= 4, "required");
20312 
20313     int opcode = this->ideal_Opcode();
20314     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20315     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20316   %}
20317   ins_pipe( pipe_slow );
20318 %}
20319 
20320 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20321   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20322             UseAVX > 0);
20323   match(Set dst (MinV src1 src2));
20324   match(Set dst (MaxV src1 src2));
20325   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20326   ins_encode %{
20327     int opcode = this->ideal_Opcode();
20328     int vlen_enc = vector_length_encoding(this);
20329     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20330 
20331     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20332   %}
20333   ins_pipe( pipe_slow );
20334 %}
20335 
20336 // Long vector Min/Max
20337 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20338   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20339             UseAVX == 0);
20340   match(Set dst (MinV dst src));
20341   match(Set dst (MaxV src dst));
20342   effect(TEMP dst, TEMP tmp);
20343   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20344   ins_encode %{
20345     assert(UseSSE >= 4, "required");
20346 
20347     int opcode = this->ideal_Opcode();
20348     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20349     assert(elem_bt == T_LONG, "sanity");
20350 
20351     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20352   %}
20353   ins_pipe( pipe_slow );
20354 %}
20355 
20356 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20357   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20358             UseAVX > 0 && !VM_Version::supports_avx512vl());
20359   match(Set dst (MinV src1 src2));
20360   match(Set dst (MaxV src1 src2));
20361   effect(TEMP dst);
20362   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20363   ins_encode %{
20364     int vlen_enc = vector_length_encoding(this);
20365     int opcode = this->ideal_Opcode();
20366     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20367     assert(elem_bt == T_LONG, "sanity");
20368 
20369     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20370   %}
20371   ins_pipe( pipe_slow );
20372 %}
20373 
20374 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20375   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20376             Matcher::vector_element_basic_type(n) == T_LONG);
20377   match(Set dst (MinV src1 src2));
20378   match(Set dst (MaxV src1 src2));
20379   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20380   ins_encode %{
20381     assert(UseAVX > 2, "required");
20382 
20383     int vlen_enc = vector_length_encoding(this);
20384     int opcode = this->ideal_Opcode();
20385     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20386     assert(elem_bt == T_LONG, "sanity");
20387 
20388     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20389   %}
20390   ins_pipe( pipe_slow );
20391 %}
20392 
20393 // Float/Double vector Min/Max
20394 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20395   predicate(VM_Version::supports_avx10_2() &&
20396             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20397   match(Set dst (MinV a b));
20398   match(Set dst (MaxV a b));
20399   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20400   ins_encode %{
20401     int vlen_enc = vector_length_encoding(this);
20402     int opcode = this->ideal_Opcode();
20403     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20404     __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20405   %}
20406   ins_pipe( pipe_slow );
20407 %}
20408 
20409 // Float/Double vector Min/Max
20410 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20411   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20412             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20413             UseAVX > 0);
20414   match(Set dst (MinV a b));
20415   match(Set dst (MaxV a b));
20416   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20417   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20418   ins_encode %{
20419     assert(UseAVX > 0, "required");
20420 
20421     int opcode = this->ideal_Opcode();
20422     int vlen_enc = vector_length_encoding(this);
20423     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20424 
20425     __ vminmax_fp(opcode, elem_bt,
20426                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20427                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20428   %}
20429   ins_pipe( pipe_slow );
20430 %}
20431 
20432 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20433   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20434             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20435   match(Set dst (MinV a b));
20436   match(Set dst (MaxV a b));
20437   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
20438   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
20439   ins_encode %{
20440     assert(UseAVX > 2, "required");
20441 
20442     int opcode = this->ideal_Opcode();
20443     int vlen_enc = vector_length_encoding(this);
20444     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20445 
20446     __ evminmax_fp(opcode, elem_bt,
20447                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20448                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20449   %}
20450   ins_pipe( pipe_slow );
20451 %}
20452 
20453 // ------------------------------ Unsigned vector Min/Max ----------------------
20454 
20455 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
20456   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20457   match(Set dst (UMinV a b));
20458   match(Set dst (UMaxV a b));
20459   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20460   ins_encode %{
20461     int opcode = this->ideal_Opcode();
20462     int vlen_enc = vector_length_encoding(this);
20463     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20464     assert(is_integral_type(elem_bt), "");
20465     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20466   %}
20467   ins_pipe( pipe_slow );
20468 %}
20469 
20470 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
20471   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
20472   match(Set dst (UMinV a (LoadVector b)));
20473   match(Set dst (UMaxV a (LoadVector b)));
20474   format %{ "vector_uminmax $dst,$a,$b\t!" %}
20475   ins_encode %{
20476     int opcode = this->ideal_Opcode();
20477     int vlen_enc = vector_length_encoding(this);
20478     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20479     assert(is_integral_type(elem_bt), "");
20480     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
20481   %}
20482   ins_pipe( pipe_slow );
20483 %}
20484 
20485 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
20486   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
20487   match(Set dst (UMinV a b));
20488   match(Set dst (UMaxV a b));
20489   effect(TEMP xtmp1, TEMP xtmp2);
20490   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
20491   ins_encode %{
20492     int opcode = this->ideal_Opcode();
20493     int vlen_enc = vector_length_encoding(this);
20494     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20495   %}
20496   ins_pipe( pipe_slow );
20497 %}
20498 
20499 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
20500   match(Set dst (UMinV (Binary dst src2) mask));
20501   match(Set dst (UMaxV (Binary dst src2) mask));
20502   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20503   ins_encode %{
20504     int vlen_enc = vector_length_encoding(this);
20505     BasicType bt = Matcher::vector_element_basic_type(this);
20506     int opc = this->ideal_Opcode();
20507     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20508                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
20509   %}
20510   ins_pipe( pipe_slow );
20511 %}
20512 
20513 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
20514   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
20515   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
20516   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
20517   ins_encode %{
20518     int vlen_enc = vector_length_encoding(this);
20519     BasicType bt = Matcher::vector_element_basic_type(this);
20520     int opc = this->ideal_Opcode();
20521     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
20522                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
20523   %}
20524   ins_pipe( pipe_slow );
20525 %}
20526 
20527 // --------------------------------- Signum/CopySign ---------------------------
20528 
20529 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
20530   match(Set dst (SignumF dst (Binary zero one)));
20531   effect(KILL cr);
20532   format %{ "signumF $dst, $dst" %}
20533   ins_encode %{
20534     int opcode = this->ideal_Opcode();
20535     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20536   %}
20537   ins_pipe( pipe_slow );
20538 %}
20539 
20540 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
20541   match(Set dst (SignumD dst (Binary zero one)));
20542   effect(KILL cr);
20543   format %{ "signumD $dst, $dst" %}
20544   ins_encode %{
20545     int opcode = this->ideal_Opcode();
20546     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
20547   %}
20548   ins_pipe( pipe_slow );
20549 %}
20550 
20551 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
20552   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
20553   match(Set dst (SignumVF src (Binary zero one)));
20554   match(Set dst (SignumVD src (Binary zero one)));
20555   effect(TEMP dst, TEMP xtmp1);
20556   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
20557   ins_encode %{
20558     int opcode = this->ideal_Opcode();
20559     int vec_enc = vector_length_encoding(this);
20560     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20561                          $xtmp1$$XMMRegister, vec_enc);
20562   %}
20563   ins_pipe( pipe_slow );
20564 %}
20565 
20566 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
20567   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
20568   match(Set dst (SignumVF src (Binary zero one)));
20569   match(Set dst (SignumVD src (Binary zero one)));
20570   effect(TEMP dst, TEMP ktmp1);
20571   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
20572   ins_encode %{
20573     int opcode = this->ideal_Opcode();
20574     int vec_enc = vector_length_encoding(this);
20575     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
20576                           $ktmp1$$KRegister, vec_enc);
20577   %}
20578   ins_pipe( pipe_slow );
20579 %}
20580 
20581 // ---------------------------------------
20582 // For copySign use 0xE4 as writemask for vpternlog
20583 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
20584 // C (xmm2) is set to 0x7FFFFFFF
20585 // Wherever xmm2 is 0, we want to pick from B (sign)
20586 // Wherever xmm2 is 1, we want to pick from A (src)
20587 //
20588 // A B C Result
20589 // 0 0 0 0
20590 // 0 0 1 0
20591 // 0 1 0 1
20592 // 0 1 1 0
20593 // 1 0 0 0
20594 // 1 0 1 1
20595 // 1 1 0 1
20596 // 1 1 1 1
20597 //
20598 // Result going from high bit to low bit is 0x11100100 = 0xe4
20599 // ---------------------------------------
20600 
20601 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
20602   match(Set dst (CopySignF dst src));
20603   effect(TEMP tmp1, TEMP tmp2);
20604   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20605   ins_encode %{
20606     __ movl($tmp2$$Register, 0x7FFFFFFF);
20607     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
20608     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20609   %}
20610   ins_pipe( pipe_slow );
20611 %}
20612 
20613 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
20614   match(Set dst (CopySignD dst (Binary src zero)));
20615   ins_cost(100);
20616   effect(TEMP tmp1, TEMP tmp2);
20617   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
20618   ins_encode %{
20619     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
20620     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
20621     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
20622   %}
20623   ins_pipe( pipe_slow );
20624 %}
20625 
20626 //----------------------------- CompressBits/ExpandBits ------------------------
20627 
20628 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20629   predicate(n->bottom_type()->isa_int());
20630   match(Set dst (CompressBits src mask));
20631   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
20632   ins_encode %{
20633     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
20634   %}
20635   ins_pipe( pipe_slow );
20636 %}
20637 
20638 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
20639   predicate(n->bottom_type()->isa_int());
20640   match(Set dst (ExpandBits src mask));
20641   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
20642   ins_encode %{
20643     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
20644   %}
20645   ins_pipe( pipe_slow );
20646 %}
20647 
20648 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20649   predicate(n->bottom_type()->isa_int());
20650   match(Set dst (CompressBits src (LoadI mask)));
20651   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
20652   ins_encode %{
20653     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
20654   %}
20655   ins_pipe( pipe_slow );
20656 %}
20657 
20658 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
20659   predicate(n->bottom_type()->isa_int());
20660   match(Set dst (ExpandBits src (LoadI mask)));
20661   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
20662   ins_encode %{
20663     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
20664   %}
20665   ins_pipe( pipe_slow );
20666 %}
20667 
20668 // --------------------------------- Sqrt --------------------------------------
20669 
20670 instruct vsqrtF_reg(vec dst, vec src) %{
20671   match(Set dst (SqrtVF src));
20672   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
20673   ins_encode %{
20674     assert(UseAVX > 0, "required");
20675     int vlen_enc = vector_length_encoding(this);
20676     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20677   %}
20678   ins_pipe( pipe_slow );
20679 %}
20680 
20681 instruct vsqrtF_mem(vec dst, memory mem) %{
20682   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20683   match(Set dst (SqrtVF (LoadVector mem)));
20684   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
20685   ins_encode %{
20686     assert(UseAVX > 0, "required");
20687     int vlen_enc = vector_length_encoding(this);
20688     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
20689   %}
20690   ins_pipe( pipe_slow );
20691 %}
20692 
20693 // Floating point vector sqrt
20694 instruct vsqrtD_reg(vec dst, vec src) %{
20695   match(Set dst (SqrtVD src));
20696   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
20697   ins_encode %{
20698     assert(UseAVX > 0, "required");
20699     int vlen_enc = vector_length_encoding(this);
20700     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20701   %}
20702   ins_pipe( pipe_slow );
20703 %}
20704 
20705 instruct vsqrtD_mem(vec dst, memory mem) %{
20706   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
20707   match(Set dst (SqrtVD (LoadVector mem)));
20708   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
20709   ins_encode %{
20710     assert(UseAVX > 0, "required");
20711     int vlen_enc = vector_length_encoding(this);
20712     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
20713   %}
20714   ins_pipe( pipe_slow );
20715 %}
20716 
20717 // ------------------------------ Shift ---------------------------------------
20718 
20719 // Left and right shift count vectors are the same on x86
20720 // (only lowest bits of xmm reg are used for count).
20721 instruct vshiftcnt(vec dst, rRegI cnt) %{
20722   match(Set dst (LShiftCntV cnt));
20723   match(Set dst (RShiftCntV cnt));
20724   format %{ "movdl    $dst,$cnt\t! load shift count" %}
20725   ins_encode %{
20726     __ movdl($dst$$XMMRegister, $cnt$$Register);
20727   %}
20728   ins_pipe( pipe_slow );
20729 %}
20730 
20731 // Byte vector shift
20732 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
20733   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
20734   match(Set dst ( LShiftVB src shift));
20735   match(Set dst ( RShiftVB src shift));
20736   match(Set dst (URShiftVB src shift));
20737   effect(TEMP dst, USE src, USE shift, TEMP tmp);
20738   format %{"vector_byte_shift $dst,$src,$shift" %}
20739   ins_encode %{
20740     assert(UseSSE > 3, "required");
20741     int opcode = this->ideal_Opcode();
20742     bool sign = (opcode != Op_URShiftVB);
20743     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
20744     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
20745     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
20746     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
20747     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20748   %}
20749   ins_pipe( pipe_slow );
20750 %}
20751 
20752 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
20753   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
20754             UseAVX <= 1);
20755   match(Set dst ( LShiftVB src shift));
20756   match(Set dst ( RShiftVB src shift));
20757   match(Set dst (URShiftVB src shift));
20758   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
20759   format %{"vector_byte_shift $dst,$src,$shift" %}
20760   ins_encode %{
20761     assert(UseSSE > 3, "required");
20762     int opcode = this->ideal_Opcode();
20763     bool sign = (opcode != Op_URShiftVB);
20764     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
20765     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
20766     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
20767     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
20768     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
20769     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
20770     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
20771     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
20772     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
20773   %}
20774   ins_pipe( pipe_slow );
20775 %}
20776 
20777 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
20778   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
20779             UseAVX > 1);
20780   match(Set dst ( LShiftVB src shift));
20781   match(Set dst ( RShiftVB src shift));
20782   match(Set dst (URShiftVB src shift));
20783   effect(TEMP dst, TEMP tmp);
20784   format %{"vector_byte_shift $dst,$src,$shift" %}
20785   ins_encode %{
20786     int opcode = this->ideal_Opcode();
20787     bool sign = (opcode != Op_URShiftVB);
20788     int vlen_enc = Assembler::AVX_256bit;
20789     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
20790     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20791     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
20792     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
20793     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
20794   %}
20795   ins_pipe( pipe_slow );
20796 %}
20797 
20798 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
20799   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
20800   match(Set dst ( LShiftVB src shift));
20801   match(Set dst ( RShiftVB src shift));
20802   match(Set dst (URShiftVB src shift));
20803   effect(TEMP dst, TEMP tmp);
20804   format %{"vector_byte_shift $dst,$src,$shift" %}
20805   ins_encode %{
20806     assert(UseAVX > 1, "required");
20807     int opcode = this->ideal_Opcode();
20808     bool sign = (opcode != Op_URShiftVB);
20809     int vlen_enc = Assembler::AVX_256bit;
20810     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
20811     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
20812     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
20813     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20814     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20815     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
20816     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
20817     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
20818     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
20819   %}
20820   ins_pipe( pipe_slow );
20821 %}
20822 
20823 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
20824   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
20825   match(Set dst ( LShiftVB src shift));
20826   match(Set dst  (RShiftVB src shift));
20827   match(Set dst (URShiftVB src shift));
20828   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
20829   format %{"vector_byte_shift $dst,$src,$shift" %}
20830   ins_encode %{
20831     assert(UseAVX > 2, "required");
20832     int opcode = this->ideal_Opcode();
20833     bool sign = (opcode != Op_URShiftVB);
20834     int vlen_enc = Assembler::AVX_512bit;
20835     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
20836     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
20837     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
20838     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20839     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20840     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
20841     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20842     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20843     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20844     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
20845     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
20846     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
20847   %}
20848   ins_pipe( pipe_slow );
20849 %}
20850 
20851 // Shorts vector logical right shift produces incorrect Java result
20852 // for negative data because java code convert short value into int with
20853 // sign extension before a shift. But char vectors are fine since chars are
20854 // unsigned values.
20855 // Shorts/Chars vector left shift
20856 instruct vshiftS(vec dst, vec src, vec shift) %{
20857   predicate(!n->as_ShiftV()->is_var_shift());
20858   match(Set dst ( LShiftVS src shift));
20859   match(Set dst ( RShiftVS src shift));
20860   match(Set dst (URShiftVS src shift));
20861   effect(TEMP dst, USE src, USE shift);
20862   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
20863   ins_encode %{
20864     int opcode = this->ideal_Opcode();
20865     if (UseAVX > 0) {
20866       int vlen_enc = vector_length_encoding(this);
20867       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20868     } else {
20869       int vlen = Matcher::vector_length(this);
20870       if (vlen == 2) {
20871         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
20872         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20873       } else if (vlen == 4) {
20874         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
20875         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20876       } else {
20877         assert (vlen == 8, "sanity");
20878         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20879         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20880       }
20881     }
20882   %}
20883   ins_pipe( pipe_slow );
20884 %}
20885 
20886 // Integers vector left shift
20887 instruct vshiftI(vec dst, vec src, vec shift) %{
20888   predicate(!n->as_ShiftV()->is_var_shift());
20889   match(Set dst ( LShiftVI src shift));
20890   match(Set dst ( RShiftVI src shift));
20891   match(Set dst (URShiftVI src shift));
20892   effect(TEMP dst, USE src, USE shift);
20893   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
20894   ins_encode %{
20895     int opcode = this->ideal_Opcode();
20896     if (UseAVX > 0) {
20897       int vlen_enc = vector_length_encoding(this);
20898       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20899     } else {
20900       int vlen = Matcher::vector_length(this);
20901       if (vlen == 2) {
20902         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
20903         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20904       } else {
20905         assert(vlen == 4, "sanity");
20906         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20907         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20908       }
20909     }
20910   %}
20911   ins_pipe( pipe_slow );
20912 %}
20913 
20914 // Integers vector left constant shift
20915 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
20916   match(Set dst (LShiftVI src (LShiftCntV shift)));
20917   match(Set dst (RShiftVI src (RShiftCntV shift)));
20918   match(Set dst (URShiftVI src (RShiftCntV shift)));
20919   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
20920   ins_encode %{
20921     int opcode = this->ideal_Opcode();
20922     if (UseAVX > 0) {
20923       int vector_len = vector_length_encoding(this);
20924       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
20925     } else {
20926       int vlen = Matcher::vector_length(this);
20927       if (vlen == 2) {
20928         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
20929         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
20930       } else {
20931         assert(vlen == 4, "sanity");
20932         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20933         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
20934       }
20935     }
20936   %}
20937   ins_pipe( pipe_slow );
20938 %}
20939 
20940 // Longs vector shift
20941 instruct vshiftL(vec dst, vec src, vec shift) %{
20942   predicate(!n->as_ShiftV()->is_var_shift());
20943   match(Set dst ( LShiftVL src shift));
20944   match(Set dst (URShiftVL src shift));
20945   effect(TEMP dst, USE src, USE shift);
20946   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
20947   ins_encode %{
20948     int opcode = this->ideal_Opcode();
20949     if (UseAVX > 0) {
20950       int vlen_enc = vector_length_encoding(this);
20951       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
20952     } else {
20953       assert(Matcher::vector_length(this) == 2, "");
20954       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20955       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
20956     }
20957   %}
20958   ins_pipe( pipe_slow );
20959 %}
20960 
20961 // Longs vector constant shift
20962 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
20963   match(Set dst (LShiftVL src (LShiftCntV shift)));
20964   match(Set dst (URShiftVL src (RShiftCntV shift)));
20965   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
20966   ins_encode %{
20967     int opcode = this->ideal_Opcode();
20968     if (UseAVX > 0) {
20969       int vector_len = vector_length_encoding(this);
20970       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
20971     } else {
20972       assert(Matcher::vector_length(this) == 2, "");
20973       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20974       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
20975     }
20976   %}
20977   ins_pipe( pipe_slow );
20978 %}
20979 
20980 // -------------------ArithmeticRightShift -----------------------------------
20981 // Long vector arithmetic right shift
20982 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
20983   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
20984   match(Set dst (RShiftVL src shift));
20985   effect(TEMP dst, TEMP tmp);
20986   format %{ "vshiftq $dst,$src,$shift" %}
20987   ins_encode %{
20988     uint vlen = Matcher::vector_length(this);
20989     if (vlen == 2) {
20990       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
20991       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
20992       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
20993       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
20994       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
20995       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
20996     } else {
20997       assert(vlen == 4, "sanity");
20998       assert(UseAVX > 1, "required");
20999       int vlen_enc = Assembler::AVX_256bit;
21000       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21001       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21002       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21003       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21004       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21005     }
21006   %}
21007   ins_pipe( pipe_slow );
21008 %}
21009 
21010 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21011   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21012   match(Set dst (RShiftVL src shift));
21013   format %{ "vshiftq $dst,$src,$shift" %}
21014   ins_encode %{
21015     int vlen_enc = vector_length_encoding(this);
21016     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21017   %}
21018   ins_pipe( pipe_slow );
21019 %}
21020 
21021 // ------------------- Variable Shift -----------------------------
21022 // Byte variable shift
21023 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21024   predicate(Matcher::vector_length(n) <= 8 &&
21025             n->as_ShiftV()->is_var_shift() &&
21026             !VM_Version::supports_avx512bw());
21027   match(Set dst ( LShiftVB src shift));
21028   match(Set dst ( RShiftVB src shift));
21029   match(Set dst (URShiftVB src shift));
21030   effect(TEMP dst, TEMP vtmp);
21031   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21032   ins_encode %{
21033     assert(UseAVX >= 2, "required");
21034 
21035     int opcode = this->ideal_Opcode();
21036     int vlen_enc = Assembler::AVX_128bit;
21037     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21038     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21039   %}
21040   ins_pipe( pipe_slow );
21041 %}
21042 
21043 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21044   predicate(Matcher::vector_length(n) == 16 &&
21045             n->as_ShiftV()->is_var_shift() &&
21046             !VM_Version::supports_avx512bw());
21047   match(Set dst ( LShiftVB src shift));
21048   match(Set dst ( RShiftVB src shift));
21049   match(Set dst (URShiftVB src shift));
21050   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21051   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21052   ins_encode %{
21053     assert(UseAVX >= 2, "required");
21054 
21055     int opcode = this->ideal_Opcode();
21056     int vlen_enc = Assembler::AVX_128bit;
21057     // Shift lower half and get word result in dst
21058     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21059 
21060     // Shift upper half and get word result in vtmp1
21061     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21062     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21063     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21064 
21065     // Merge and down convert the two word results to byte in dst
21066     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21067   %}
21068   ins_pipe( pipe_slow );
21069 %}
21070 
21071 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21072   predicate(Matcher::vector_length(n) == 32 &&
21073             n->as_ShiftV()->is_var_shift() &&
21074             !VM_Version::supports_avx512bw());
21075   match(Set dst ( LShiftVB src shift));
21076   match(Set dst ( RShiftVB src shift));
21077   match(Set dst (URShiftVB src shift));
21078   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21079   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21080   ins_encode %{
21081     assert(UseAVX >= 2, "required");
21082 
21083     int opcode = this->ideal_Opcode();
21084     int vlen_enc = Assembler::AVX_128bit;
21085     // Process lower 128 bits and get result in dst
21086     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21087     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21088     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21089     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21090     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21091 
21092     // Process higher 128 bits and get result in vtmp3
21093     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21094     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21095     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21096     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21097     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21098     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21099     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21100 
21101     // Merge the two results in dst
21102     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21103   %}
21104   ins_pipe( pipe_slow );
21105 %}
21106 
21107 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21108   predicate(Matcher::vector_length(n) <= 32 &&
21109             n->as_ShiftV()->is_var_shift() &&
21110             VM_Version::supports_avx512bw());
21111   match(Set dst ( LShiftVB src shift));
21112   match(Set dst ( RShiftVB src shift));
21113   match(Set dst (URShiftVB src shift));
21114   effect(TEMP dst, TEMP vtmp);
21115   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21116   ins_encode %{
21117     assert(UseAVX > 2, "required");
21118 
21119     int opcode = this->ideal_Opcode();
21120     int vlen_enc = vector_length_encoding(this);
21121     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21122   %}
21123   ins_pipe( pipe_slow );
21124 %}
21125 
21126 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21127   predicate(Matcher::vector_length(n) == 64 &&
21128             n->as_ShiftV()->is_var_shift() &&
21129             VM_Version::supports_avx512bw());
21130   match(Set dst ( LShiftVB src shift));
21131   match(Set dst ( RShiftVB src shift));
21132   match(Set dst (URShiftVB src shift));
21133   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21134   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21135   ins_encode %{
21136     assert(UseAVX > 2, "required");
21137 
21138     int opcode = this->ideal_Opcode();
21139     int vlen_enc = Assembler::AVX_256bit;
21140     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21141     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21142     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21143     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21144     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21145   %}
21146   ins_pipe( pipe_slow );
21147 %}
21148 
21149 // Short variable shift
21150 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21151   predicate(Matcher::vector_length(n) <= 8 &&
21152             n->as_ShiftV()->is_var_shift() &&
21153             !VM_Version::supports_avx512bw());
21154   match(Set dst ( LShiftVS src shift));
21155   match(Set dst ( RShiftVS src shift));
21156   match(Set dst (URShiftVS src shift));
21157   effect(TEMP dst, TEMP vtmp);
21158   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21159   ins_encode %{
21160     assert(UseAVX >= 2, "required");
21161 
21162     int opcode = this->ideal_Opcode();
21163     bool sign = (opcode != Op_URShiftVS);
21164     int vlen_enc = Assembler::AVX_256bit;
21165     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21166     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21167     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21168     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21169     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21170     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21171   %}
21172   ins_pipe( pipe_slow );
21173 %}
21174 
21175 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21176   predicate(Matcher::vector_length(n) == 16 &&
21177             n->as_ShiftV()->is_var_shift() &&
21178             !VM_Version::supports_avx512bw());
21179   match(Set dst ( LShiftVS src shift));
21180   match(Set dst ( RShiftVS src shift));
21181   match(Set dst (URShiftVS src shift));
21182   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21183   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21184   ins_encode %{
21185     assert(UseAVX >= 2, "required");
21186 
21187     int opcode = this->ideal_Opcode();
21188     bool sign = (opcode != Op_URShiftVS);
21189     int vlen_enc = Assembler::AVX_256bit;
21190     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21191     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21192     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21193     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21194     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21195 
21196     // Shift upper half, with result in dst using vtmp1 as TEMP
21197     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21198     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21199     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21200     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21201     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21202     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21203 
21204     // Merge lower and upper half result into dst
21205     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21206     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21207   %}
21208   ins_pipe( pipe_slow );
21209 %}
21210 
21211 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21212   predicate(n->as_ShiftV()->is_var_shift() &&
21213             VM_Version::supports_avx512bw());
21214   match(Set dst ( LShiftVS src shift));
21215   match(Set dst ( RShiftVS src shift));
21216   match(Set dst (URShiftVS src shift));
21217   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21218   ins_encode %{
21219     assert(UseAVX > 2, "required");
21220 
21221     int opcode = this->ideal_Opcode();
21222     int vlen_enc = vector_length_encoding(this);
21223     if (!VM_Version::supports_avx512vl()) {
21224       vlen_enc = Assembler::AVX_512bit;
21225     }
21226     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21227   %}
21228   ins_pipe( pipe_slow );
21229 %}
21230 
21231 //Integer variable shift
21232 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21233   predicate(n->as_ShiftV()->is_var_shift());
21234   match(Set dst ( LShiftVI src shift));
21235   match(Set dst ( RShiftVI src shift));
21236   match(Set dst (URShiftVI src shift));
21237   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21238   ins_encode %{
21239     assert(UseAVX >= 2, "required");
21240 
21241     int opcode = this->ideal_Opcode();
21242     int vlen_enc = vector_length_encoding(this);
21243     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21244   %}
21245   ins_pipe( pipe_slow );
21246 %}
21247 
21248 //Long variable shift
21249 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21250   predicate(n->as_ShiftV()->is_var_shift());
21251   match(Set dst ( LShiftVL src shift));
21252   match(Set dst (URShiftVL src shift));
21253   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21254   ins_encode %{
21255     assert(UseAVX >= 2, "required");
21256 
21257     int opcode = this->ideal_Opcode();
21258     int vlen_enc = vector_length_encoding(this);
21259     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21260   %}
21261   ins_pipe( pipe_slow );
21262 %}
21263 
21264 //Long variable right shift arithmetic
21265 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21266   predicate(Matcher::vector_length(n) <= 4 &&
21267             n->as_ShiftV()->is_var_shift() &&
21268             UseAVX == 2);
21269   match(Set dst (RShiftVL src shift));
21270   effect(TEMP dst, TEMP vtmp);
21271   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21272   ins_encode %{
21273     int opcode = this->ideal_Opcode();
21274     int vlen_enc = vector_length_encoding(this);
21275     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21276                  $vtmp$$XMMRegister);
21277   %}
21278   ins_pipe( pipe_slow );
21279 %}
21280 
21281 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21282   predicate(n->as_ShiftV()->is_var_shift() &&
21283             UseAVX > 2);
21284   match(Set dst (RShiftVL src shift));
21285   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21286   ins_encode %{
21287     int opcode = this->ideal_Opcode();
21288     int vlen_enc = vector_length_encoding(this);
21289     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21290   %}
21291   ins_pipe( pipe_slow );
21292 %}
21293 
21294 // --------------------------------- AND --------------------------------------
21295 
21296 instruct vand(vec dst, vec src) %{
21297   predicate(UseAVX == 0);
21298   match(Set dst (AndV dst src));
21299   format %{ "pand    $dst,$src\t! and vectors" %}
21300   ins_encode %{
21301     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21302   %}
21303   ins_pipe( pipe_slow );
21304 %}
21305 
21306 instruct vand_reg(vec dst, vec src1, vec src2) %{
21307   predicate(UseAVX > 0);
21308   match(Set dst (AndV src1 src2));
21309   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21310   ins_encode %{
21311     int vlen_enc = vector_length_encoding(this);
21312     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21313   %}
21314   ins_pipe( pipe_slow );
21315 %}
21316 
21317 instruct vand_mem(vec dst, vec src, memory mem) %{
21318   predicate((UseAVX > 0) &&
21319             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21320   match(Set dst (AndV src (LoadVector mem)));
21321   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21322   ins_encode %{
21323     int vlen_enc = vector_length_encoding(this);
21324     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21325   %}
21326   ins_pipe( pipe_slow );
21327 %}
21328 
21329 // --------------------------------- OR ---------------------------------------
21330 
21331 instruct vor(vec dst, vec src) %{
21332   predicate(UseAVX == 0);
21333   match(Set dst (OrV dst src));
21334   format %{ "por     $dst,$src\t! or vectors" %}
21335   ins_encode %{
21336     __ por($dst$$XMMRegister, $src$$XMMRegister);
21337   %}
21338   ins_pipe( pipe_slow );
21339 %}
21340 
21341 instruct vor_reg(vec dst, vec src1, vec src2) %{
21342   predicate(UseAVX > 0);
21343   match(Set dst (OrV src1 src2));
21344   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21345   ins_encode %{
21346     int vlen_enc = vector_length_encoding(this);
21347     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21348   %}
21349   ins_pipe( pipe_slow );
21350 %}
21351 
21352 instruct vor_mem(vec dst, vec src, memory mem) %{
21353   predicate((UseAVX > 0) &&
21354             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21355   match(Set dst (OrV src (LoadVector mem)));
21356   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21357   ins_encode %{
21358     int vlen_enc = vector_length_encoding(this);
21359     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21360   %}
21361   ins_pipe( pipe_slow );
21362 %}
21363 
21364 // --------------------------------- XOR --------------------------------------
21365 
21366 instruct vxor(vec dst, vec src) %{
21367   predicate(UseAVX == 0);
21368   match(Set dst (XorV dst src));
21369   format %{ "pxor    $dst,$src\t! xor vectors" %}
21370   ins_encode %{
21371     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21372   %}
21373   ins_pipe( pipe_slow );
21374 %}
21375 
21376 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21377   predicate(UseAVX > 0);
21378   match(Set dst (XorV src1 src2));
21379   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21380   ins_encode %{
21381     int vlen_enc = vector_length_encoding(this);
21382     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21383   %}
21384   ins_pipe( pipe_slow );
21385 %}
21386 
21387 instruct vxor_mem(vec dst, vec src, memory mem) %{
21388   predicate((UseAVX > 0) &&
21389             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21390   match(Set dst (XorV src (LoadVector mem)));
21391   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21392   ins_encode %{
21393     int vlen_enc = vector_length_encoding(this);
21394     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21395   %}
21396   ins_pipe( pipe_slow );
21397 %}
21398 
21399 // --------------------------------- VectorCast --------------------------------------
21400 
21401 instruct vcastBtoX(vec dst, vec src) %{
21402   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21403   match(Set dst (VectorCastB2X src));
21404   format %{ "vector_cast_b2x $dst,$src\t!" %}
21405   ins_encode %{
21406     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21407     int vlen_enc = vector_length_encoding(this);
21408     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21409   %}
21410   ins_pipe( pipe_slow );
21411 %}
21412 
21413 instruct vcastBtoD(legVec dst, legVec src) %{
21414   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21415   match(Set dst (VectorCastB2X src));
21416   format %{ "vector_cast_b2x $dst,$src\t!" %}
21417   ins_encode %{
21418     int vlen_enc = vector_length_encoding(this);
21419     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21420   %}
21421   ins_pipe( pipe_slow );
21422 %}
21423 
21424 instruct castStoX(vec dst, vec src) %{
21425   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21426             Matcher::vector_length(n->in(1)) <= 8 && // src
21427             Matcher::vector_element_basic_type(n) == T_BYTE);
21428   match(Set dst (VectorCastS2X src));
21429   format %{ "vector_cast_s2x $dst,$src" %}
21430   ins_encode %{
21431     assert(UseAVX > 0, "required");
21432 
21433     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21434     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21435   %}
21436   ins_pipe( pipe_slow );
21437 %}
21438 
21439 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
21440   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21441             Matcher::vector_length(n->in(1)) == 16 && // src
21442             Matcher::vector_element_basic_type(n) == T_BYTE);
21443   effect(TEMP dst, TEMP vtmp);
21444   match(Set dst (VectorCastS2X src));
21445   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
21446   ins_encode %{
21447     assert(UseAVX > 0, "required");
21448 
21449     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
21450     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21451     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
21452     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21453   %}
21454   ins_pipe( pipe_slow );
21455 %}
21456 
21457 instruct vcastStoX_evex(vec dst, vec src) %{
21458   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
21459             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21460   match(Set dst (VectorCastS2X src));
21461   format %{ "vector_cast_s2x $dst,$src\t!" %}
21462   ins_encode %{
21463     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21464     int src_vlen_enc = vector_length_encoding(this, $src);
21465     int vlen_enc = vector_length_encoding(this);
21466     switch (to_elem_bt) {
21467       case T_BYTE:
21468         if (!VM_Version::supports_avx512vl()) {
21469           vlen_enc = Assembler::AVX_512bit;
21470         }
21471         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21472         break;
21473       case T_INT:
21474         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21475         break;
21476       case T_FLOAT:
21477         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21478         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21479         break;
21480       case T_LONG:
21481         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21482         break;
21483       case T_DOUBLE: {
21484         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
21485         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
21486         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21487         break;
21488       }
21489       default:
21490         ShouldNotReachHere();
21491     }
21492   %}
21493   ins_pipe( pipe_slow );
21494 %}
21495 
21496 instruct castItoX(vec dst, vec src) %{
21497   predicate(UseAVX <= 2 &&
21498             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
21499             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21500   match(Set dst (VectorCastI2X src));
21501   format %{ "vector_cast_i2x $dst,$src" %}
21502   ins_encode %{
21503     assert(UseAVX > 0, "required");
21504 
21505     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21506     int vlen_enc = vector_length_encoding(this, $src);
21507 
21508     if (to_elem_bt == T_BYTE) {
21509       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21510       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21511       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21512     } else {
21513       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21514       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21515       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21516     }
21517   %}
21518   ins_pipe( pipe_slow );
21519 %}
21520 
21521 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
21522   predicate(UseAVX <= 2 &&
21523             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
21524             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
21525   match(Set dst (VectorCastI2X src));
21526   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
21527   effect(TEMP dst, TEMP vtmp);
21528   ins_encode %{
21529     assert(UseAVX > 0, "required");
21530 
21531     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21532     int vlen_enc = vector_length_encoding(this, $src);
21533 
21534     if (to_elem_bt == T_BYTE) {
21535       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
21536       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21537       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21538       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21539     } else {
21540       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
21541       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21542       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
21543       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21544     }
21545   %}
21546   ins_pipe( pipe_slow );
21547 %}
21548 
21549 instruct vcastItoX_evex(vec dst, vec src) %{
21550   predicate(UseAVX > 2 ||
21551             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
21552   match(Set dst (VectorCastI2X src));
21553   format %{ "vector_cast_i2x $dst,$src\t!" %}
21554   ins_encode %{
21555     assert(UseAVX > 0, "required");
21556 
21557     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
21558     int src_vlen_enc = vector_length_encoding(this, $src);
21559     int dst_vlen_enc = vector_length_encoding(this);
21560     switch (dst_elem_bt) {
21561       case T_BYTE:
21562         if (!VM_Version::supports_avx512vl()) {
21563           src_vlen_enc = Assembler::AVX_512bit;
21564         }
21565         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21566         break;
21567       case T_SHORT:
21568         if (!VM_Version::supports_avx512vl()) {
21569           src_vlen_enc = Assembler::AVX_512bit;
21570         }
21571         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
21572         break;
21573       case T_FLOAT:
21574         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21575         break;
21576       case T_LONG:
21577         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21578         break;
21579       case T_DOUBLE:
21580         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
21581         break;
21582       default:
21583         ShouldNotReachHere();
21584     }
21585   %}
21586   ins_pipe( pipe_slow );
21587 %}
21588 
21589 instruct vcastLtoBS(vec dst, vec src) %{
21590   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
21591             UseAVX <= 2);
21592   match(Set dst (VectorCastL2X src));
21593   format %{ "vector_cast_l2x  $dst,$src" %}
21594   ins_encode %{
21595     assert(UseAVX > 0, "required");
21596 
21597     int vlen = Matcher::vector_length_in_bytes(this, $src);
21598     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
21599     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
21600                                                       : ExternalAddress(vector_int_to_short_mask());
21601     if (vlen <= 16) {
21602       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
21603       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21604       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21605     } else {
21606       assert(vlen <= 32, "required");
21607       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
21608       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
21609       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
21610       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21611     }
21612     if (to_elem_bt == T_BYTE) {
21613       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
21614     }
21615   %}
21616   ins_pipe( pipe_slow );
21617 %}
21618 
21619 instruct vcastLtoX_evex(vec dst, vec src) %{
21620   predicate(UseAVX > 2 ||
21621             (Matcher::vector_element_basic_type(n) == T_INT ||
21622              Matcher::vector_element_basic_type(n) == T_FLOAT ||
21623              Matcher::vector_element_basic_type(n) == T_DOUBLE));
21624   match(Set dst (VectorCastL2X src));
21625   format %{ "vector_cast_l2x  $dst,$src\t!" %}
21626   ins_encode %{
21627     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21628     int vlen = Matcher::vector_length_in_bytes(this, $src);
21629     int vlen_enc = vector_length_encoding(this, $src);
21630     switch (to_elem_bt) {
21631       case T_BYTE:
21632         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21633           vlen_enc = Assembler::AVX_512bit;
21634         }
21635         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21636         break;
21637       case T_SHORT:
21638         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
21639           vlen_enc = Assembler::AVX_512bit;
21640         }
21641         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21642         break;
21643       case T_INT:
21644         if (vlen == 8) {
21645           if ($dst$$XMMRegister != $src$$XMMRegister) {
21646             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21647           }
21648         } else if (vlen == 16) {
21649           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
21650         } else if (vlen == 32) {
21651           if (UseAVX > 2) {
21652             if (!VM_Version::supports_avx512vl()) {
21653               vlen_enc = Assembler::AVX_512bit;
21654             }
21655             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21656           } else {
21657             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
21658             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
21659           }
21660         } else { // vlen == 64
21661           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21662         }
21663         break;
21664       case T_FLOAT:
21665         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21666         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21667         break;
21668       case T_DOUBLE:
21669         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
21670         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21671         break;
21672 
21673       default: assert(false, "%s", type2name(to_elem_bt));
21674     }
21675   %}
21676   ins_pipe( pipe_slow );
21677 %}
21678 
21679 instruct vcastFtoD_reg(vec dst, vec src) %{
21680   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
21681   match(Set dst (VectorCastF2X src));
21682   format %{ "vector_cast_f2d  $dst,$src\t!" %}
21683   ins_encode %{
21684     int vlen_enc = vector_length_encoding(this);
21685     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21686   %}
21687   ins_pipe( pipe_slow );
21688 %}
21689 
21690 
21691 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21692   predicate(!VM_Version::supports_avx10_2() &&
21693             !VM_Version::supports_avx512vl() &&
21694             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21695             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
21696             is_integral_type(Matcher::vector_element_basic_type(n)));
21697   match(Set dst (VectorCastF2X src));
21698   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
21699   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
21700   ins_encode %{
21701     int vlen_enc = vector_length_encoding(this, $src);
21702     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21703     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
21704     // 32 bit addresses for register indirect addressing mode since stub constants
21705     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
21706     // However, targets are free to increase this limit, but having a large code cache size
21707     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
21708     // cap we save a temporary register allocation which in limiting case can prevent
21709     // spilling in high register pressure blocks.
21710     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21711                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
21712                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21713   %}
21714   ins_pipe( pipe_slow );
21715 %}
21716 
21717 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21718   predicate(!VM_Version::supports_avx10_2() &&
21719             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
21720             is_integral_type(Matcher::vector_element_basic_type(n)));
21721   match(Set dst (VectorCastF2X src));
21722   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21723   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
21724   ins_encode %{
21725     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21726     if (to_elem_bt == T_LONG) {
21727       int vlen_enc = vector_length_encoding(this);
21728       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21729                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
21730                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
21731     } else {
21732       int vlen_enc = vector_length_encoding(this, $src);
21733       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21734                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
21735                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21736     }
21737   %}
21738   ins_pipe( pipe_slow );
21739 %}
21740 
21741 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
21742   predicate(VM_Version::supports_avx10_2() &&
21743             is_integral_type(Matcher::vector_element_basic_type(n)));
21744   match(Set dst (VectorCastF2X src));
21745   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
21746   ins_encode %{
21747     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21748     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
21749     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21750   %}
21751   ins_pipe( pipe_slow );
21752 %}
21753 
21754 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
21755   predicate(VM_Version::supports_avx10_2() &&
21756             is_integral_type(Matcher::vector_element_basic_type(n)));
21757   match(Set dst (VectorCastF2X (LoadVector src)));
21758   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
21759   ins_encode %{
21760     int vlen = Matcher::vector_length(this);
21761     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21762     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
21763     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
21764   %}
21765   ins_pipe( pipe_slow );
21766 %}
21767 
21768 instruct vcastDtoF_reg(vec dst, vec src) %{
21769   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
21770   match(Set dst (VectorCastD2X src));
21771   format %{ "vector_cast_d2x  $dst,$src\t!" %}
21772   ins_encode %{
21773     int vlen_enc = vector_length_encoding(this, $src);
21774     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21775   %}
21776   ins_pipe( pipe_slow );
21777 %}
21778 
21779 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
21780   predicate(!VM_Version::supports_avx10_2() &&
21781             !VM_Version::supports_avx512vl() &&
21782             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
21783             is_integral_type(Matcher::vector_element_basic_type(n)));
21784   match(Set dst (VectorCastD2X src));
21785   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
21786   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
21787   ins_encode %{
21788     int vlen_enc = vector_length_encoding(this, $src);
21789     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21790     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21791                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
21792                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
21793   %}
21794   ins_pipe( pipe_slow );
21795 %}
21796 
21797 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21798   predicate(!VM_Version::supports_avx10_2() &&
21799             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
21800             is_integral_type(Matcher::vector_element_basic_type(n)));
21801   match(Set dst (VectorCastD2X src));
21802   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21803   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
21804   ins_encode %{
21805     int vlen_enc = vector_length_encoding(this, $src);
21806     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21807     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
21808                               ExternalAddress(vector_float_signflip());
21809     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
21810                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
21811   %}
21812   ins_pipe( pipe_slow );
21813 %}
21814 
21815 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
21816   predicate(VM_Version::supports_avx10_2() &&
21817             is_integral_type(Matcher::vector_element_basic_type(n)));
21818   match(Set dst (VectorCastD2X src));
21819   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
21820   ins_encode %{
21821     int vlen_enc = vector_length_encoding(this, $src);
21822     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21823     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21824   %}
21825   ins_pipe( pipe_slow );
21826 %}
21827 
21828 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
21829   predicate(VM_Version::supports_avx10_2() &&
21830             is_integral_type(Matcher::vector_element_basic_type(n)));
21831   match(Set dst (VectorCastD2X (LoadVector src)));
21832   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
21833   ins_encode %{
21834     int vlen = Matcher::vector_length(this);
21835     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
21836     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21837     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
21838   %}
21839   ins_pipe( pipe_slow );
21840 %}
21841 
21842 instruct vucast(vec dst, vec src) %{
21843   match(Set dst (VectorUCastB2X src));
21844   match(Set dst (VectorUCastS2X src));
21845   match(Set dst (VectorUCastI2X src));
21846   format %{ "vector_ucast $dst,$src\t!" %}
21847   ins_encode %{
21848     assert(UseAVX > 0, "required");
21849 
21850     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
21851     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21852     int vlen_enc = vector_length_encoding(this);
21853     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
21854   %}
21855   ins_pipe( pipe_slow );
21856 %}
21857 
21858 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
21859   predicate(!VM_Version::supports_avx512vl() &&
21860             Matcher::vector_length_in_bytes(n) < 64 &&
21861             Matcher::vector_element_basic_type(n) == T_INT);
21862   match(Set dst (RoundVF src));
21863   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
21864   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
21865   ins_encode %{
21866     int vlen_enc = vector_length_encoding(this);
21867     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
21868     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
21869                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
21870                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
21871   %}
21872   ins_pipe( pipe_slow );
21873 %}
21874 
21875 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21876   predicate((VM_Version::supports_avx512vl() ||
21877              Matcher::vector_length_in_bytes(n) == 64) &&
21878              Matcher::vector_element_basic_type(n) == T_INT);
21879   match(Set dst (RoundVF src));
21880   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
21881   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
21882   ins_encode %{
21883     int vlen_enc = vector_length_encoding(this);
21884     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
21885     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
21886                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
21887                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
21888   %}
21889   ins_pipe( pipe_slow );
21890 %}
21891 
21892 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
21893   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
21894   match(Set dst (RoundVD src));
21895   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
21896   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
21897   ins_encode %{
21898     int vlen_enc = vector_length_encoding(this);
21899     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
21900     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
21901                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
21902                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
21903   %}
21904   ins_pipe( pipe_slow );
21905 %}
21906 
21907 // --------------------------------- VectorMaskCmp --------------------------------------
21908 
21909 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
21910   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
21911             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
21912             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
21913             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
21914   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21915   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
21916   ins_encode %{
21917     int vlen_enc = vector_length_encoding(this, $src1);
21918     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
21919     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
21920       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21921     } else {
21922       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21923     }
21924   %}
21925   ins_pipe( pipe_slow );
21926 %}
21927 
21928 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
21929   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
21930             n->bottom_type()->isa_pvectmask() == nullptr &&
21931             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
21932   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21933   effect(TEMP ktmp);
21934   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
21935   ins_encode %{
21936     int vlen_enc = Assembler::AVX_512bit;
21937     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
21938     KRegister mask = k0; // The comparison itself is not being masked.
21939     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
21940       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21941       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
21942     } else {
21943       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21944       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
21945     }
21946   %}
21947   ins_pipe( pipe_slow );
21948 %}
21949 
21950 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
21951   predicate(n->bottom_type()->isa_pvectmask() &&
21952             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
21953   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21954   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
21955   ins_encode %{
21956     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
21957     int vlen_enc = vector_length_encoding(this, $src1);
21958     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
21959     KRegister mask = k0; // The comparison itself is not being masked.
21960     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
21961       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21962     } else {
21963       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
21964     }
21965   %}
21966   ins_pipe( pipe_slow );
21967 %}
21968 
21969 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
21970   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
21971             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
21972             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
21973             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
21974             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
21975             (n->in(2)->get_int() == BoolTest::eq ||
21976              n->in(2)->get_int() == BoolTest::lt ||
21977              n->in(2)->get_int() == BoolTest::gt)); // cond
21978   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21979   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
21980   ins_encode %{
21981     int vlen_enc = vector_length_encoding(this, $src1);
21982     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
21983     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
21984     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
21985   %}
21986   ins_pipe( pipe_slow );
21987 %}
21988 
21989 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
21990   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
21991             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
21992             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
21993             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
21994             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
21995             (n->in(2)->get_int() == BoolTest::ne ||
21996              n->in(2)->get_int() == BoolTest::le ||
21997              n->in(2)->get_int() == BoolTest::ge)); // cond
21998   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
21999   effect(TEMP dst, TEMP xtmp);
22000   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22001   ins_encode %{
22002     int vlen_enc = vector_length_encoding(this, $src1);
22003     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22004     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22005     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22006   %}
22007   ins_pipe( pipe_slow );
22008 %}
22009 
22010 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22011   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22012             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22013             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22014             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22015             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22016   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22017   effect(TEMP dst, TEMP xtmp);
22018   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22019   ins_encode %{
22020     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22021     int vlen_enc = vector_length_encoding(this, $src1);
22022     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22023     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22024 
22025     if (vlen_enc == Assembler::AVX_128bit) {
22026       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22027     } else {
22028       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22029     }
22030     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22031     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22032     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22033   %}
22034   ins_pipe( pipe_slow );
22035 %}
22036 
22037 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22038   predicate((n->bottom_type()->isa_pvectmask() == nullptr &&
22039              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22040              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22041   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22042   effect(TEMP ktmp);
22043   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22044   ins_encode %{
22045     assert(UseAVX > 2, "required");
22046 
22047     int vlen_enc = vector_length_encoding(this, $src1);
22048     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22049     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22050     KRegister mask = k0; // The comparison itself is not being masked.
22051     bool merge = false;
22052     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22053 
22054     switch (src1_elem_bt) {
22055       case T_INT: {
22056         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22057         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22058         break;
22059       }
22060       case T_LONG: {
22061         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22062         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22063         break;
22064       }
22065       default: assert(false, "%s", type2name(src1_elem_bt));
22066     }
22067   %}
22068   ins_pipe( pipe_slow );
22069 %}
22070 
22071 
22072 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22073   predicate(n->bottom_type()->isa_pvectmask() &&
22074             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22075   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22076   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22077   ins_encode %{
22078     assert(UseAVX > 2, "required");
22079     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22080 
22081     int vlen_enc = vector_length_encoding(this, $src1);
22082     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22083     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22084     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22085 
22086     // Comparison i
22087     switch (src1_elem_bt) {
22088       case T_BYTE: {
22089         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22090         break;
22091       }
22092       case T_SHORT: {
22093         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22094         break;
22095       }
22096       case T_INT: {
22097         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22098         break;
22099       }
22100       case T_LONG: {
22101         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22102         break;
22103       }
22104       default: assert(false, "%s", type2name(src1_elem_bt));
22105     }
22106   %}
22107   ins_pipe( pipe_slow );
22108 %}
22109 
22110 // Extract
22111 
22112 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22113   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22114   match(Set dst (ExtractI src idx));
22115   match(Set dst (ExtractS src idx));
22116   match(Set dst (ExtractB src idx));
22117   format %{ "extractI $dst,$src,$idx\t!" %}
22118   ins_encode %{
22119     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22120 
22121     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22122     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22123   %}
22124   ins_pipe( pipe_slow );
22125 %}
22126 
22127 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22128   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22129             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22130   match(Set dst (ExtractI src idx));
22131   match(Set dst (ExtractS src idx));
22132   match(Set dst (ExtractB src idx));
22133   effect(TEMP vtmp);
22134   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22135   ins_encode %{
22136     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22137 
22138     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22139     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22140     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22141   %}
22142   ins_pipe( pipe_slow );
22143 %}
22144 
22145 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22146   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22147   match(Set dst (ExtractL src idx));
22148   format %{ "extractL $dst,$src,$idx\t!" %}
22149   ins_encode %{
22150     assert(UseSSE >= 4, "required");
22151     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22152 
22153     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22154   %}
22155   ins_pipe( pipe_slow );
22156 %}
22157 
22158 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22159   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22160             Matcher::vector_length(n->in(1)) == 8);  // src
22161   match(Set dst (ExtractL src idx));
22162   effect(TEMP vtmp);
22163   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22164   ins_encode %{
22165     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22166 
22167     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22168     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22169   %}
22170   ins_pipe( pipe_slow );
22171 %}
22172 
22173 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22174   predicate(Matcher::vector_length(n->in(1)) <= 4);
22175   match(Set dst (ExtractF src idx));
22176   effect(TEMP dst, TEMP vtmp);
22177   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22178   ins_encode %{
22179     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22180 
22181     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22182   %}
22183   ins_pipe( pipe_slow );
22184 %}
22185 
22186 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22187   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22188             Matcher::vector_length(n->in(1)/*src*/) == 16);
22189   match(Set dst (ExtractF src idx));
22190   effect(TEMP vtmp);
22191   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22192   ins_encode %{
22193     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22194 
22195     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22196     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22197   %}
22198   ins_pipe( pipe_slow );
22199 %}
22200 
22201 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22202   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22203   match(Set dst (ExtractD src idx));
22204   format %{ "extractD $dst,$src,$idx\t!" %}
22205   ins_encode %{
22206     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22207 
22208     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22209   %}
22210   ins_pipe( pipe_slow );
22211 %}
22212 
22213 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22214   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22215             Matcher::vector_length(n->in(1)) == 8);  // src
22216   match(Set dst (ExtractD src idx));
22217   effect(TEMP vtmp);
22218   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22219   ins_encode %{
22220     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22221 
22222     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22223     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22224   %}
22225   ins_pipe( pipe_slow );
22226 %}
22227 
22228 // --------------------------------- Vector Blend --------------------------------------
22229 
22230 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22231   predicate(UseAVX == 0);
22232   match(Set dst (VectorBlend (Binary dst src) mask));
22233   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22234   effect(TEMP tmp);
22235   ins_encode %{
22236     assert(UseSSE >= 4, "required");
22237 
22238     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22239       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22240     }
22241     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22242   %}
22243   ins_pipe( pipe_slow );
22244 %}
22245 
22246 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22247   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22248             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22249             Matcher::vector_length_in_bytes(n) <= 32 &&
22250             is_integral_type(Matcher::vector_element_basic_type(n)));
22251   match(Set dst (VectorBlend (Binary src1 src2) mask));
22252   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22253   ins_encode %{
22254     int vlen_enc = vector_length_encoding(this);
22255     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22256   %}
22257   ins_pipe( pipe_slow );
22258 %}
22259 
22260 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22261   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22262             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22263             Matcher::vector_length_in_bytes(n) <= 32 &&
22264             !is_integral_type(Matcher::vector_element_basic_type(n)));
22265   match(Set dst (VectorBlend (Binary src1 src2) mask));
22266   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22267   ins_encode %{
22268     int vlen_enc = vector_length_encoding(this);
22269     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22270   %}
22271   ins_pipe( pipe_slow );
22272 %}
22273 
22274 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22275   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22276             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22277             Matcher::vector_length_in_bytes(n) <= 32);
22278   match(Set dst (VectorBlend (Binary src1 src2) mask));
22279   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22280   effect(TEMP vtmp, TEMP dst);
22281   ins_encode %{
22282     int vlen_enc = vector_length_encoding(this);
22283     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22284     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22285     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22286   %}
22287   ins_pipe( pipe_slow );
22288 %}
22289 
22290 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22291   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22292             n->in(2)->bottom_type()->isa_pvectmask() == nullptr);
22293   match(Set dst (VectorBlend (Binary src1 src2) mask));
22294   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22295   effect(TEMP ktmp);
22296   ins_encode %{
22297      int vlen_enc = Assembler::AVX_512bit;
22298      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22299     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22300     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22301   %}
22302   ins_pipe( pipe_slow );
22303 %}
22304 
22305 
22306 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22307   predicate(n->in(2)->bottom_type()->isa_pvectmask() &&
22308             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22309              VM_Version::supports_avx512bw()));
22310   match(Set dst (VectorBlend (Binary src1 src2) mask));
22311   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22312   ins_encode %{
22313     int vlen_enc = vector_length_encoding(this);
22314     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22315     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22316   %}
22317   ins_pipe( pipe_slow );
22318 %}
22319 
22320 // --------------------------------- ABS --------------------------------------
22321 // a = |a|
22322 instruct vabsB_reg(vec dst, vec src) %{
22323   match(Set dst (AbsVB  src));
22324   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22325   ins_encode %{
22326     uint vlen = Matcher::vector_length(this);
22327     if (vlen <= 16) {
22328       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22329     } else {
22330       int vlen_enc = vector_length_encoding(this);
22331       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22332     }
22333   %}
22334   ins_pipe( pipe_slow );
22335 %}
22336 
22337 instruct vabsS_reg(vec dst, vec src) %{
22338   match(Set dst (AbsVS  src));
22339   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22340   ins_encode %{
22341     uint vlen = Matcher::vector_length(this);
22342     if (vlen <= 8) {
22343       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22344     } else {
22345       int vlen_enc = vector_length_encoding(this);
22346       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22347     }
22348   %}
22349   ins_pipe( pipe_slow );
22350 %}
22351 
22352 instruct vabsI_reg(vec dst, vec src) %{
22353   match(Set dst (AbsVI  src));
22354   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22355   ins_encode %{
22356     uint vlen = Matcher::vector_length(this);
22357     if (vlen <= 4) {
22358       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22359     } else {
22360       int vlen_enc = vector_length_encoding(this);
22361       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22362     }
22363   %}
22364   ins_pipe( pipe_slow );
22365 %}
22366 
22367 instruct vabsL_reg(vec dst, vec src) %{
22368   match(Set dst (AbsVL  src));
22369   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22370   ins_encode %{
22371     assert(UseAVX > 2, "required");
22372     int vlen_enc = vector_length_encoding(this);
22373     if (!VM_Version::supports_avx512vl()) {
22374       vlen_enc = Assembler::AVX_512bit;
22375     }
22376     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22377   %}
22378   ins_pipe( pipe_slow );
22379 %}
22380 
22381 // --------------------------------- ABSNEG --------------------------------------
22382 
22383 instruct vabsnegF(vec dst, vec src) %{
22384   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22385   match(Set dst (AbsVF src));
22386   match(Set dst (NegVF src));
22387   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22388   ins_cost(150);
22389   ins_encode %{
22390     int opcode = this->ideal_Opcode();
22391     int vlen = Matcher::vector_length(this);
22392     if (vlen == 2) {
22393       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22394     } else {
22395       assert(vlen == 8 || vlen == 16, "required");
22396       int vlen_enc = vector_length_encoding(this);
22397       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22398     }
22399   %}
22400   ins_pipe( pipe_slow );
22401 %}
22402 
22403 instruct vabsneg4F(vec dst) %{
22404   predicate(Matcher::vector_length(n) == 4);
22405   match(Set dst (AbsVF dst));
22406   match(Set dst (NegVF dst));
22407   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22408   ins_cost(150);
22409   ins_encode %{
22410     int opcode = this->ideal_Opcode();
22411     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22412   %}
22413   ins_pipe( pipe_slow );
22414 %}
22415 
22416 instruct vabsnegD(vec dst, vec src) %{
22417   match(Set dst (AbsVD  src));
22418   match(Set dst (NegVD  src));
22419   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22420   ins_encode %{
22421     int opcode = this->ideal_Opcode();
22422     uint vlen = Matcher::vector_length(this);
22423     if (vlen == 2) {
22424       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22425     } else {
22426       int vlen_enc = vector_length_encoding(this);
22427       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22428     }
22429   %}
22430   ins_pipe( pipe_slow );
22431 %}
22432 
22433 //------------------------------------- VectorTest --------------------------------------------
22434 
22435 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22436   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22437   match(Set cr (VectorTest src1 src2));
22438   effect(TEMP vtmp);
22439   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
22440   ins_encode %{
22441     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22442     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22443     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
22444   %}
22445   ins_pipe( pipe_slow );
22446 %}
22447 
22448 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
22449   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
22450   match(Set cr (VectorTest src1 src2));
22451   format %{ "vptest_ge16  $src1, $src2\n\t" %}
22452   ins_encode %{
22453     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
22454     int vlen = Matcher::vector_length_in_bytes(this, $src1);
22455     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
22456   %}
22457   ins_pipe( pipe_slow );
22458 %}
22459 
22460 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22461   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22462              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22463             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
22464   match(Set cr (VectorTest src1 src2));
22465   effect(TEMP tmp);
22466   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22467   ins_encode %{
22468     uint masklen = Matcher::vector_length(this, $src1);
22469     __ kmovwl($tmp$$Register, $src1$$KRegister);
22470     __ andl($tmp$$Register, (1 << masklen) - 1);
22471     __ cmpl($tmp$$Register, (1 << masklen) - 1);
22472   %}
22473   ins_pipe( pipe_slow );
22474 %}
22475 
22476 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
22477   predicate((Matcher::vector_length(n->in(1)) < 8 ||
22478              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
22479             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
22480   match(Set cr (VectorTest src1 src2));
22481   effect(TEMP tmp);
22482   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
22483   ins_encode %{
22484     uint masklen = Matcher::vector_length(this, $src1);
22485     __ kmovwl($tmp$$Register, $src1$$KRegister);
22486     __ andl($tmp$$Register, (1 << masklen) - 1);
22487   %}
22488   ins_pipe( pipe_slow );
22489 %}
22490 
22491 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
22492   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
22493             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
22494   match(Set cr (VectorTest src1 src2));
22495   format %{ "ktest_ge8  $src1, $src2\n\t" %}
22496   ins_encode %{
22497     uint masklen = Matcher::vector_length(this, $src1);
22498     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
22499   %}
22500   ins_pipe( pipe_slow );
22501 %}
22502 
22503 //------------------------------------- LoadMask --------------------------------------------
22504 
22505 instruct loadMask(legVec dst, legVec src) %{
22506   predicate(n->bottom_type()->isa_pvectmask() == nullptr && !VM_Version::supports_avx512vlbw());
22507   match(Set dst (VectorLoadMask src));
22508   effect(TEMP dst);
22509   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
22510   ins_encode %{
22511     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22512     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22513     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
22514   %}
22515   ins_pipe( pipe_slow );
22516 %}
22517 
22518 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
22519   predicate(n->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
22520   match(Set dst (VectorLoadMask src));
22521   effect(TEMP xtmp);
22522   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
22523   ins_encode %{
22524     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22525                         true, Assembler::AVX_512bit);
22526   %}
22527   ins_pipe( pipe_slow );
22528 %}
22529 
22530 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
22531   predicate(n->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
22532   match(Set dst (VectorLoadMask src));
22533   effect(TEMP xtmp);
22534   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
22535   ins_encode %{
22536     int vlen_enc = vector_length_encoding(in(1));
22537     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
22538                         false, vlen_enc);
22539   %}
22540   ins_pipe( pipe_slow );
22541 %}
22542 
22543 //------------------------------------- StoreMask --------------------------------------------
22544 
22545 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
22546   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22547   match(Set dst (VectorStoreMask src size));
22548   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22549   ins_encode %{
22550     int vlen = Matcher::vector_length(this);
22551     if (vlen <= 16 && UseAVX <= 2) {
22552       assert(UseSSE >= 3, "required");
22553       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22554     } else {
22555       assert(UseAVX > 0, "required");
22556       int src_vlen_enc = vector_length_encoding(this, $src);
22557       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22558     }
22559   %}
22560   ins_pipe( pipe_slow );
22561 %}
22562 
22563 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
22564   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22565   match(Set dst (VectorStoreMask src size));
22566   effect(TEMP_DEF dst, TEMP xtmp);
22567   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22568   ins_encode %{
22569     int vlen_enc = Assembler::AVX_128bit;
22570     int vlen = Matcher::vector_length(this);
22571     if (vlen <= 8) {
22572       assert(UseSSE >= 3, "required");
22573       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22574       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22575       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22576     } else {
22577       assert(UseAVX > 0, "required");
22578       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22579       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22580       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22581     }
22582   %}
22583   ins_pipe( pipe_slow );
22584 %}
22585 
22586 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
22587   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22588   match(Set dst (VectorStoreMask src size));
22589   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22590   effect(TEMP_DEF dst, TEMP xtmp);
22591   ins_encode %{
22592     int vlen_enc = Assembler::AVX_128bit;
22593     int vlen = Matcher::vector_length(this);
22594     if (vlen <= 4) {
22595       assert(UseSSE >= 3, "required");
22596       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22597       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22598       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22599       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22600     } else {
22601       assert(UseAVX > 0, "required");
22602       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22603       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
22604       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22605       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
22606       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22607     }
22608   %}
22609   ins_pipe( pipe_slow );
22610 %}
22611 
22612 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
22613   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
22614   match(Set dst (VectorStoreMask src size));
22615   effect(TEMP_DEF dst, TEMP xtmp);
22616   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22617   ins_encode %{
22618     assert(UseSSE >= 3, "required");
22619     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
22620     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
22621     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
22622     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
22623     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
22624   %}
22625   ins_pipe( pipe_slow );
22626 %}
22627 
22628 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
22629   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
22630   match(Set dst (VectorStoreMask src size));
22631   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
22632   effect(TEMP_DEF dst, TEMP vtmp);
22633   ins_encode %{
22634     int vlen_enc = Assembler::AVX_128bit;
22635     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
22636     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22637     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
22638     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22639     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22640     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22641     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22642   %}
22643   ins_pipe( pipe_slow );
22644 %}
22645 
22646 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
22647   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22648   match(Set dst (VectorStoreMask src size));
22649   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22650   ins_encode %{
22651     int src_vlen_enc = vector_length_encoding(this, $src);
22652     int dst_vlen_enc = vector_length_encoding(this);
22653     if (!VM_Version::supports_avx512vl()) {
22654       src_vlen_enc = Assembler::AVX_512bit;
22655     }
22656     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22657     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22658   %}
22659   ins_pipe( pipe_slow );
22660 %}
22661 
22662 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
22663   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
22664   match(Set dst (VectorStoreMask src size));
22665   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
22666   ins_encode %{
22667     int src_vlen_enc = vector_length_encoding(this, $src);
22668     int dst_vlen_enc = vector_length_encoding(this);
22669     if (!VM_Version::supports_avx512vl()) {
22670       src_vlen_enc = Assembler::AVX_512bit;
22671     }
22672     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22673     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22674   %}
22675   ins_pipe( pipe_slow );
22676 %}
22677 
22678 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
22679   predicate(n->in(1)->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
22680   match(Set dst (VectorStoreMask mask size));
22681   effect(TEMP_DEF dst);
22682   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22683   ins_encode %{
22684     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
22685     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
22686                  false, Assembler::AVX_512bit, noreg);
22687     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
22688   %}
22689   ins_pipe( pipe_slow );
22690 %}
22691 
22692 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
22693   predicate(n->in(1)->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
22694   match(Set dst (VectorStoreMask mask size));
22695   effect(TEMP_DEF dst);
22696   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
22697   ins_encode %{
22698     int dst_vlen_enc = vector_length_encoding(this);
22699     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
22700     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
22701   %}
22702   ins_pipe( pipe_slow );
22703 %}
22704 
22705 instruct vmaskcast_evex(kReg dst) %{
22706   match(Set dst (VectorMaskCast dst));
22707   ins_cost(0);
22708   format %{ "vector_mask_cast $dst" %}
22709   ins_encode %{
22710     // empty
22711   %}
22712   ins_pipe(empty);
22713 %}
22714 
22715 instruct vmaskcast(vec dst) %{
22716   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
22717   match(Set dst (VectorMaskCast dst));
22718   ins_cost(0);
22719   format %{ "vector_mask_cast $dst" %}
22720   ins_encode %{
22721     // empty
22722   %}
22723   ins_pipe(empty);
22724 %}
22725 
22726 instruct vmaskcast_avx(vec dst, vec src) %{
22727   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
22728   match(Set dst (VectorMaskCast src));
22729   format %{ "vector_mask_cast $dst, $src" %}
22730   ins_encode %{
22731     int vlen = Matcher::vector_length(this);
22732     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
22733     BasicType dst_bt = Matcher::vector_element_basic_type(this);
22734     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
22735   %}
22736   ins_pipe(pipe_slow);
22737 %}
22738 
22739 //-------------------------------- Load Iota Indices ----------------------------------
22740 
22741 instruct loadIotaIndices(vec dst, immI_0 src) %{
22742   match(Set dst (VectorLoadConst src));
22743   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
22744   ins_encode %{
22745      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22746      BasicType bt = Matcher::vector_element_basic_type(this);
22747      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
22748   %}
22749   ins_pipe( pipe_slow );
22750 %}
22751 
22752 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
22753   match(Set dst (PopulateIndex src1 src2));
22754   effect(TEMP dst, TEMP vtmp);
22755   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
22756   ins_encode %{
22757      assert($src2$$constant == 1, "required");
22758      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22759      int vlen_enc = vector_length_encoding(this);
22760      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22761      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
22762      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
22763      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22764   %}
22765   ins_pipe( pipe_slow );
22766 %}
22767 
22768 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
22769   match(Set dst (PopulateIndex src1 src2));
22770   effect(TEMP dst, TEMP vtmp);
22771   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
22772   ins_encode %{
22773      assert($src2$$constant == 1, "required");
22774      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22775      int vlen_enc = vector_length_encoding(this);
22776      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22777      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
22778      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
22779      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22780   %}
22781   ins_pipe( pipe_slow );
22782 %}
22783 
22784 //-------------------------------- Rearrange ----------------------------------
22785 
22786 // LoadShuffle/Rearrange for Byte
22787 instruct rearrangeB(vec dst, vec shuffle) %{
22788   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22789             Matcher::vector_length(n) < 32);
22790   match(Set dst (VectorRearrange dst shuffle));
22791   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
22792   ins_encode %{
22793     assert(UseSSE >= 4, "required");
22794     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
22795   %}
22796   ins_pipe( pipe_slow );
22797 %}
22798 
22799 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
22800   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22801             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
22802   match(Set dst (VectorRearrange src shuffle));
22803   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22804   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
22805   ins_encode %{
22806     assert(UseAVX >= 2, "required");
22807     // Swap src into vtmp1
22808     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
22809     // Shuffle swapped src to get entries from other 128 bit lane
22810     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22811     // Shuffle original src to get entries from self 128 bit lane
22812     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22813     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
22814     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
22815     // Perform the blend
22816     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
22817   %}
22818   ins_pipe( pipe_slow );
22819 %}
22820 
22821 
22822 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
22823   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22824             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
22825   match(Set dst (VectorRearrange src shuffle));
22826   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
22827   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
22828   ins_encode %{
22829     int vlen_enc = vector_length_encoding(this);
22830     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
22831                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
22832                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
22833   %}
22834   ins_pipe( pipe_slow );
22835 %}
22836 
22837 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
22838   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
22839             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
22840   match(Set dst (VectorRearrange src shuffle));
22841   format %{ "vector_rearrange $dst, $shuffle, $src" %}
22842   ins_encode %{
22843     int vlen_enc = vector_length_encoding(this);
22844     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
22845   %}
22846   ins_pipe( pipe_slow );
22847 %}
22848 
22849 // LoadShuffle/Rearrange for Short
22850 
22851 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
22852   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22853             !VM_Version::supports_avx512bw());
22854   match(Set dst (VectorLoadShuffle src));
22855   effect(TEMP dst, TEMP vtmp);
22856   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
22857   ins_encode %{
22858     // Create a byte shuffle mask from short shuffle mask
22859     // only byte shuffle instruction available on these platforms
22860     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
22861     if (UseAVX == 0) {
22862       assert(vlen_in_bytes <= 16, "required");
22863       // Multiply each shuffle by two to get byte index
22864       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
22865       __ psllw($vtmp$$XMMRegister, 1);
22866 
22867       // Duplicate to create 2 copies of byte index
22868       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
22869       __ psllw($dst$$XMMRegister, 8);
22870       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
22871 
22872       // Add one to get alternate byte index
22873       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
22874       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
22875     } else {
22876       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
22877       int vlen_enc = vector_length_encoding(this);
22878       // Multiply each shuffle by two to get byte index
22879       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
22880 
22881       // Duplicate to create 2 copies of byte index
22882       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
22883       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
22884 
22885       // Add one to get alternate byte index
22886       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
22887     }
22888   %}
22889   ins_pipe( pipe_slow );
22890 %}
22891 
22892 instruct rearrangeS(vec dst, vec shuffle) %{
22893   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22894             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
22895   match(Set dst (VectorRearrange dst shuffle));
22896   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
22897   ins_encode %{
22898     assert(UseSSE >= 4, "required");
22899     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
22900   %}
22901   ins_pipe( pipe_slow );
22902 %}
22903 
22904 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
22905   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22906             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
22907   match(Set dst (VectorRearrange src shuffle));
22908   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
22909   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
22910   ins_encode %{
22911     assert(UseAVX >= 2, "required");
22912     // Swap src into vtmp1
22913     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
22914     // Shuffle swapped src to get entries from other 128 bit lane
22915     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22916     // Shuffle original src to get entries from self 128 bit lane
22917     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
22918     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
22919     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
22920     // Perform the blend
22921     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
22922   %}
22923   ins_pipe( pipe_slow );
22924 %}
22925 
22926 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
22927   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
22928             VM_Version::supports_avx512bw());
22929   match(Set dst (VectorRearrange src shuffle));
22930   format %{ "vector_rearrange $dst, $shuffle, $src" %}
22931   ins_encode %{
22932     int vlen_enc = vector_length_encoding(this);
22933     if (!VM_Version::supports_avx512vl()) {
22934       vlen_enc = Assembler::AVX_512bit;
22935     }
22936     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
22937   %}
22938   ins_pipe( pipe_slow );
22939 %}
22940 
22941 // LoadShuffle/Rearrange for Integer and Float
22942 
22943 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
22944   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
22945             Matcher::vector_length(n) == 4 && UseAVX == 0);
22946   match(Set dst (VectorLoadShuffle src));
22947   effect(TEMP dst, TEMP vtmp);
22948   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
22949   ins_encode %{
22950     assert(UseSSE >= 4, "required");
22951 
22952     // Create a byte shuffle mask from int shuffle mask
22953     // only byte shuffle instruction available on these platforms
22954 
22955     // Duplicate and multiply each shuffle by 4
22956     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
22957     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
22958     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
22959     __ psllw($vtmp$$XMMRegister, 2);
22960 
22961     // Duplicate again to create 4 copies of byte index
22962     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
22963     __ psllw($dst$$XMMRegister, 8);
22964     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
22965 
22966     // Add 3,2,1,0 to get alternate byte index
22967     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
22968     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
22969   %}
22970   ins_pipe( pipe_slow );
22971 %}
22972 
22973 instruct rearrangeI(vec dst, vec shuffle) %{
22974   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
22975             UseAVX == 0);
22976   match(Set dst (VectorRearrange dst shuffle));
22977   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
22978   ins_encode %{
22979     assert(UseSSE >= 4, "required");
22980     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
22981   %}
22982   ins_pipe( pipe_slow );
22983 %}
22984 
22985 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
22986   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
22987             UseAVX > 0);
22988   match(Set dst (VectorRearrange src shuffle));
22989   format %{ "vector_rearrange $dst, $shuffle, $src" %}
22990   ins_encode %{
22991     int vlen_enc = vector_length_encoding(this);
22992     BasicType bt = Matcher::vector_element_basic_type(this);
22993     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
22994   %}
22995   ins_pipe( pipe_slow );
22996 %}
22997 
22998 // LoadShuffle/Rearrange for Long and Double
22999 
23000 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23001   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23002             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23003   match(Set dst (VectorLoadShuffle src));
23004   effect(TEMP dst, TEMP vtmp);
23005   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23006   ins_encode %{
23007     assert(UseAVX >= 2, "required");
23008 
23009     int vlen_enc = vector_length_encoding(this);
23010     // Create a double word shuffle mask from long shuffle mask
23011     // only double word shuffle instruction available on these platforms
23012 
23013     // Multiply each shuffle by two to get double word index
23014     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23015 
23016     // Duplicate each double word shuffle
23017     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23018     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23019 
23020     // Add one to get alternate double word index
23021     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23022   %}
23023   ins_pipe( pipe_slow );
23024 %}
23025 
23026 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23027   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23028             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23029   match(Set dst (VectorRearrange src shuffle));
23030   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23031   ins_encode %{
23032     assert(UseAVX >= 2, "required");
23033 
23034     int vlen_enc = vector_length_encoding(this);
23035     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23036   %}
23037   ins_pipe( pipe_slow );
23038 %}
23039 
23040 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23041   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23042             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23043   match(Set dst (VectorRearrange src shuffle));
23044   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23045   ins_encode %{
23046     assert(UseAVX > 2, "required");
23047 
23048     int vlen_enc = vector_length_encoding(this);
23049     if (vlen_enc == Assembler::AVX_128bit) {
23050       vlen_enc = Assembler::AVX_256bit;
23051     }
23052     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23053   %}
23054   ins_pipe( pipe_slow );
23055 %}
23056 
23057 // --------------------------------- FMA --------------------------------------
23058 // a * b + c
23059 
23060 instruct vfmaF_reg(vec a, vec b, vec c) %{
23061   match(Set c (FmaVF  c (Binary a b)));
23062   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23063   ins_cost(150);
23064   ins_encode %{
23065     assert(UseFMA, "not enabled");
23066     int vlen_enc = vector_length_encoding(this);
23067     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23068   %}
23069   ins_pipe( pipe_slow );
23070 %}
23071 
23072 instruct vfmaF_mem(vec a, memory b, vec c) %{
23073   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23074   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23075   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23076   ins_cost(150);
23077   ins_encode %{
23078     assert(UseFMA, "not enabled");
23079     int vlen_enc = vector_length_encoding(this);
23080     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23081   %}
23082   ins_pipe( pipe_slow );
23083 %}
23084 
23085 instruct vfmaD_reg(vec a, vec b, vec c) %{
23086   match(Set c (FmaVD  c (Binary a b)));
23087   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23088   ins_cost(150);
23089   ins_encode %{
23090     assert(UseFMA, "not enabled");
23091     int vlen_enc = vector_length_encoding(this);
23092     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23093   %}
23094   ins_pipe( pipe_slow );
23095 %}
23096 
23097 instruct vfmaD_mem(vec a, memory b, vec c) %{
23098   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23099   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23100   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23101   ins_cost(150);
23102   ins_encode %{
23103     assert(UseFMA, "not enabled");
23104     int vlen_enc = vector_length_encoding(this);
23105     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23106   %}
23107   ins_pipe( pipe_slow );
23108 %}
23109 
23110 // --------------------------------- Vector Multiply Add --------------------------------------
23111 
23112 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23113   predicate(UseAVX == 0);
23114   match(Set dst (MulAddVS2VI dst src1));
23115   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23116   ins_encode %{
23117     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23118   %}
23119   ins_pipe( pipe_slow );
23120 %}
23121 
23122 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23123   predicate(UseAVX > 0);
23124   match(Set dst (MulAddVS2VI src1 src2));
23125   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23126   ins_encode %{
23127     int vlen_enc = vector_length_encoding(this);
23128     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23129   %}
23130   ins_pipe( pipe_slow );
23131 %}
23132 
23133 // --------------------------------- Vector Multiply Add Add ----------------------------------
23134 
23135 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23136   predicate(VM_Version::supports_avx512_vnni());
23137   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23138   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23139   ins_encode %{
23140     assert(UseAVX > 2, "required");
23141     int vlen_enc = vector_length_encoding(this);
23142     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23143   %}
23144   ins_pipe( pipe_slow );
23145   ins_cost(10);
23146 %}
23147 
23148 // --------------------------------- PopCount --------------------------------------
23149 
23150 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23151   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23152   match(Set dst (PopCountVI src));
23153   match(Set dst (PopCountVL src));
23154   format %{ "vector_popcount_integral $dst, $src" %}
23155   ins_encode %{
23156     int opcode = this->ideal_Opcode();
23157     int vlen_enc = vector_length_encoding(this, $src);
23158     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23159     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23160   %}
23161   ins_pipe( pipe_slow );
23162 %}
23163 
23164 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23165   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23166   match(Set dst (PopCountVI src mask));
23167   match(Set dst (PopCountVL src mask));
23168   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23169   ins_encode %{
23170     int vlen_enc = vector_length_encoding(this, $src);
23171     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23172     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23173     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23174   %}
23175   ins_pipe( pipe_slow );
23176 %}
23177 
23178 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23179   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23180   match(Set dst (PopCountVI src));
23181   match(Set dst (PopCountVL src));
23182   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23183   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23184   ins_encode %{
23185     int opcode = this->ideal_Opcode();
23186     int vlen_enc = vector_length_encoding(this, $src);
23187     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23188     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23189                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23190   %}
23191   ins_pipe( pipe_slow );
23192 %}
23193 
23194 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23195 
23196 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23197   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23198                                               Matcher::vector_length_in_bytes(n->in(1))));
23199   match(Set dst (CountTrailingZerosV src));
23200   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23201   ins_cost(400);
23202   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23203   ins_encode %{
23204     int vlen_enc = vector_length_encoding(this, $src);
23205     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23206     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23207                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23208   %}
23209   ins_pipe( pipe_slow );
23210 %}
23211 
23212 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23213   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23214             VM_Version::supports_avx512cd() &&
23215             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23216   match(Set dst (CountTrailingZerosV src));
23217   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23218   ins_cost(400);
23219   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23220   ins_encode %{
23221     int vlen_enc = vector_length_encoding(this, $src);
23222     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23223     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23224                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23225   %}
23226   ins_pipe( pipe_slow );
23227 %}
23228 
23229 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23230   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23231   match(Set dst (CountTrailingZerosV src));
23232   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23233   ins_cost(400);
23234   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23235   ins_encode %{
23236     int vlen_enc = vector_length_encoding(this, $src);
23237     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23238     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23239                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23240                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23241   %}
23242   ins_pipe( pipe_slow );
23243 %}
23244 
23245 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23246   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23247   match(Set dst (CountTrailingZerosV src));
23248   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23249   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23250   ins_encode %{
23251     int vlen_enc = vector_length_encoding(this, $src);
23252     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23253     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23254                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23255   %}
23256   ins_pipe( pipe_slow );
23257 %}
23258 
23259 
23260 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23261 
23262 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23263   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23264   effect(TEMP dst);
23265   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23266   ins_encode %{
23267     int vector_len = vector_length_encoding(this);
23268     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23269   %}
23270   ins_pipe( pipe_slow );
23271 %}
23272 
23273 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23274   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23275   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23276   effect(TEMP dst);
23277   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23278   ins_encode %{
23279     int vector_len = vector_length_encoding(this);
23280     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23281   %}
23282   ins_pipe( pipe_slow );
23283 %}
23284 
23285 // --------------------------------- Rotation Operations ----------------------------------
23286 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23287   match(Set dst (RotateLeftV src shift));
23288   match(Set dst (RotateRightV src shift));
23289   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23290   ins_encode %{
23291     int opcode      = this->ideal_Opcode();
23292     int vector_len  = vector_length_encoding(this);
23293     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23294     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23295   %}
23296   ins_pipe( pipe_slow );
23297 %}
23298 
23299 instruct vprorate(vec dst, vec src, vec shift) %{
23300   match(Set dst (RotateLeftV src shift));
23301   match(Set dst (RotateRightV src shift));
23302   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23303   ins_encode %{
23304     int opcode      = this->ideal_Opcode();
23305     int vector_len  = vector_length_encoding(this);
23306     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23307     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23308   %}
23309   ins_pipe( pipe_slow );
23310 %}
23311 
23312 // ---------------------------------- Masked Operations ------------------------------------
23313 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23314   predicate(!n->in(3)->bottom_type()->isa_pvectmask());
23315   match(Set dst (LoadVectorMasked mem mask));
23316   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23317   ins_encode %{
23318     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23319     int vlen_enc = vector_length_encoding(this);
23320     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23321   %}
23322   ins_pipe( pipe_slow );
23323 %}
23324 
23325 
23326 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23327   predicate(n->in(3)->bottom_type()->isa_pvectmask());
23328   match(Set dst (LoadVectorMasked mem mask));
23329   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23330   ins_encode %{
23331     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23332     int vector_len = vector_length_encoding(this);
23333     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23334   %}
23335   ins_pipe( pipe_slow );
23336 %}
23337 
23338 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23339   predicate(!n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23340   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23341   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23342   ins_encode %{
23343     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23344     int vlen_enc = vector_length_encoding(src_node);
23345     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23346     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23347   %}
23348   ins_pipe( pipe_slow );
23349 %}
23350 
23351 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23352   predicate(n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23353   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23354   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23355   ins_encode %{
23356     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23357     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23358     int vlen_enc = vector_length_encoding(src_node);
23359     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23360   %}
23361   ins_pipe( pipe_slow );
23362 %}
23363 
23364 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23365   match(Set addr (VerifyVectorAlignment addr mask));
23366   effect(KILL cr);
23367   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23368   ins_encode %{
23369     Label Lskip;
23370     // check if masked bits of addr are zero
23371     __ testq($addr$$Register, $mask$$constant);
23372     __ jccb(Assembler::equal, Lskip);
23373     __ stop("verify_vector_alignment found a misaligned vector memory access");
23374     __ bind(Lskip);
23375   %}
23376   ins_pipe(pipe_slow);
23377 %}
23378 
23379 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23380   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23381   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23382   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23383   ins_encode %{
23384     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23385     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23386 
23387     Label DONE;
23388     int vlen_enc = vector_length_encoding(this, $src1);
23389     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23390 
23391     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23392     __ mov64($dst$$Register, -1L);
23393     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23394     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23395     __ jccb(Assembler::carrySet, DONE);
23396     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23397     __ notq($dst$$Register);
23398     __ tzcntq($dst$$Register, $dst$$Register);
23399     __ bind(DONE);
23400   %}
23401   ins_pipe( pipe_slow );
23402 %}
23403 
23404 
23405 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23406   match(Set dst (VectorMaskGen len));
23407   effect(TEMP temp, KILL cr);
23408   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23409   ins_encode %{
23410     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23411   %}
23412   ins_pipe( pipe_slow );
23413 %}
23414 
23415 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23416   match(Set dst (VectorMaskGen len));
23417   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23418   effect(TEMP temp);
23419   ins_encode %{
23420     if ($len$$constant > 0) {
23421       __ mov64($temp$$Register, right_n_bits($len$$constant));
23422       __ kmovql($dst$$KRegister, $temp$$Register);
23423     } else {
23424       __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23425     }
23426   %}
23427   ins_pipe( pipe_slow );
23428 %}
23429 
23430 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23431   predicate(n->in(1)->bottom_type()->isa_pvectmask());
23432   match(Set dst (VectorMaskToLong mask));
23433   effect(TEMP dst, KILL cr);
23434   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23435   ins_encode %{
23436     int opcode = this->ideal_Opcode();
23437     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23438     int mask_len = Matcher::vector_length(this, $mask);
23439     int mask_size = mask_len * type2aelembytes(mbt);
23440     int vlen_enc = vector_length_encoding(this, $mask);
23441     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23442                              $dst$$Register, mask_len, mask_size, vlen_enc);
23443   %}
23444   ins_pipe( pipe_slow );
23445 %}
23446 
23447 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
23448   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23449   match(Set dst (VectorMaskToLong mask));
23450   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
23451   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23452   ins_encode %{
23453     int opcode = this->ideal_Opcode();
23454     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23455     int mask_len = Matcher::vector_length(this, $mask);
23456     int vlen_enc = vector_length_encoding(this, $mask);
23457     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23458                              $dst$$Register, mask_len, mbt, vlen_enc);
23459   %}
23460   ins_pipe( pipe_slow );
23461 %}
23462 
23463 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
23464   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23465   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
23466   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
23467   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
23468   ins_encode %{
23469     int opcode = this->ideal_Opcode();
23470     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23471     int mask_len = Matcher::vector_length(this, $mask);
23472     int vlen_enc = vector_length_encoding(this, $mask);
23473     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23474                              $dst$$Register, mask_len, mbt, vlen_enc);
23475   %}
23476   ins_pipe( pipe_slow );
23477 %}
23478 
23479 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23480   predicate(n->in(1)->bottom_type()->isa_pvectmask());
23481   match(Set dst (VectorMaskTrueCount mask));
23482   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23483   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
23484   ins_encode %{
23485     int opcode = this->ideal_Opcode();
23486     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23487     int mask_len = Matcher::vector_length(this, $mask);
23488     int mask_size = mask_len * type2aelembytes(mbt);
23489     int vlen_enc = vector_length_encoding(this, $mask);
23490     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23491                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23492   %}
23493   ins_pipe( pipe_slow );
23494 %}
23495 
23496 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23497   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23498   match(Set dst (VectorMaskTrueCount mask));
23499   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23500   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23501   ins_encode %{
23502     int opcode = this->ideal_Opcode();
23503     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23504     int mask_len = Matcher::vector_length(this, $mask);
23505     int vlen_enc = vector_length_encoding(this, $mask);
23506     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23507                              $tmp$$Register, mask_len, mbt, vlen_enc);
23508   %}
23509   ins_pipe( pipe_slow );
23510 %}
23511 
23512 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23513   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23514   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
23515   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23516   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23517   ins_encode %{
23518     int opcode = this->ideal_Opcode();
23519     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23520     int mask_len = Matcher::vector_length(this, $mask);
23521     int vlen_enc = vector_length_encoding(this, $mask);
23522     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23523                              $tmp$$Register, mask_len, mbt, vlen_enc);
23524   %}
23525   ins_pipe( pipe_slow );
23526 %}
23527 
23528 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
23529   predicate(n->in(1)->bottom_type()->isa_pvectmask());
23530   match(Set dst (VectorMaskFirstTrue mask));
23531   match(Set dst (VectorMaskLastTrue mask));
23532   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
23533   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
23534   ins_encode %{
23535     int opcode = this->ideal_Opcode();
23536     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23537     int mask_len = Matcher::vector_length(this, $mask);
23538     int mask_size = mask_len * type2aelembytes(mbt);
23539     int vlen_enc = vector_length_encoding(this, $mask);
23540     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
23541                              $tmp$$Register, mask_len, mask_size, vlen_enc);
23542   %}
23543   ins_pipe( pipe_slow );
23544 %}
23545 
23546 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23547   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23548   match(Set dst (VectorMaskFirstTrue mask));
23549   match(Set dst (VectorMaskLastTrue mask));
23550   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23551   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23552   ins_encode %{
23553     int opcode = this->ideal_Opcode();
23554     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23555     int mask_len = Matcher::vector_length(this, $mask);
23556     int vlen_enc = vector_length_encoding(this, $mask);
23557     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23558                              $tmp$$Register, mask_len, mbt, vlen_enc);
23559   %}
23560   ins_pipe( pipe_slow );
23561 %}
23562 
23563 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
23564   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23565   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
23566   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
23567   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
23568   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
23569   ins_encode %{
23570     int opcode = this->ideal_Opcode();
23571     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
23572     int mask_len = Matcher::vector_length(this, $mask);
23573     int vlen_enc = vector_length_encoding(this, $mask);
23574     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
23575                              $tmp$$Register, mask_len, mbt, vlen_enc);
23576   %}
23577   ins_pipe( pipe_slow );
23578 %}
23579 
23580 // --------------------------------- Compress/Expand Operations ---------------------------
23581 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
23582   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
23583   match(Set dst (CompressV src mask));
23584   match(Set dst (ExpandV src mask));
23585   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
23586   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
23587   ins_encode %{
23588     int opcode = this->ideal_Opcode();
23589     int vlen_enc = vector_length_encoding(this);
23590     BasicType bt  = Matcher::vector_element_basic_type(this);
23591     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
23592                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
23593   %}
23594   ins_pipe( pipe_slow );
23595 %}
23596 
23597 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
23598   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
23599   match(Set dst (CompressV src mask));
23600   match(Set dst (ExpandV src mask));
23601   format %{ "vector_compress_expand $dst, $src, $mask" %}
23602   ins_encode %{
23603     int opcode = this->ideal_Opcode();
23604     int vector_len = vector_length_encoding(this);
23605     BasicType bt  = Matcher::vector_element_basic_type(this);
23606     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
23607   %}
23608   ins_pipe( pipe_slow );
23609 %}
23610 
23611 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
23612   match(Set dst (CompressM mask));
23613   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
23614   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
23615   ins_encode %{
23616     assert(this->in(1)->bottom_type()->isa_pvectmask(), "");
23617     int mask_len = Matcher::vector_length(this);
23618     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
23619   %}
23620   ins_pipe( pipe_slow );
23621 %}
23622 
23623 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
23624 
23625 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23626   predicate(!VM_Version::supports_gfni());
23627   match(Set dst (ReverseV src));
23628   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23629   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23630   ins_encode %{
23631     int vec_enc = vector_length_encoding(this);
23632     BasicType bt = Matcher::vector_element_basic_type(this);
23633     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23634                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23635   %}
23636   ins_pipe( pipe_slow );
23637 %}
23638 
23639 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
23640   predicate(VM_Version::supports_gfni());
23641   match(Set dst (ReverseV src));
23642   effect(TEMP dst, TEMP xtmp);
23643   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
23644   ins_encode %{
23645     int vec_enc = vector_length_encoding(this);
23646     BasicType bt  = Matcher::vector_element_basic_type(this);
23647     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
23648     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
23649                                $xtmp$$XMMRegister);
23650   %}
23651   ins_pipe( pipe_slow );
23652 %}
23653 
23654 instruct vreverse_byte_reg(vec dst, vec src) %{
23655   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
23656   match(Set dst (ReverseBytesV src));
23657   effect(TEMP dst);
23658   format %{ "vector_reverse_byte $dst, $src" %}
23659   ins_encode %{
23660     int vec_enc = vector_length_encoding(this);
23661     BasicType bt = Matcher::vector_element_basic_type(this);
23662     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
23663   %}
23664   ins_pipe( pipe_slow );
23665 %}
23666 
23667 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
23668   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
23669   match(Set dst (ReverseBytesV src));
23670   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23671   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
23672   ins_encode %{
23673     int vec_enc = vector_length_encoding(this);
23674     BasicType bt = Matcher::vector_element_basic_type(this);
23675     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23676                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
23677   %}
23678   ins_pipe( pipe_slow );
23679 %}
23680 
23681 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
23682 
23683 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
23684   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23685                                               Matcher::vector_length_in_bytes(n->in(1))));
23686   match(Set dst (CountLeadingZerosV src));
23687   format %{ "vector_count_leading_zeros $dst, $src" %}
23688   ins_encode %{
23689      int vlen_enc = vector_length_encoding(this, $src);
23690      BasicType bt = Matcher::vector_element_basic_type(this, $src);
23691      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23692                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
23693   %}
23694   ins_pipe( pipe_slow );
23695 %}
23696 
23697 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
23698   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23699                                               Matcher::vector_length_in_bytes(n->in(1))));
23700   match(Set dst (CountLeadingZerosV src mask));
23701   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
23702   ins_encode %{
23703     int vlen_enc = vector_length_encoding(this, $src);
23704     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23705     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23706     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
23707                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
23708   %}
23709   ins_pipe( pipe_slow );
23710 %}
23711 
23712 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
23713   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23714             VM_Version::supports_avx512cd() &&
23715             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23716   match(Set dst (CountLeadingZerosV src));
23717   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
23718   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
23719   ins_encode %{
23720     int vlen_enc = vector_length_encoding(this, $src);
23721     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23722     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23723                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
23724   %}
23725   ins_pipe( pipe_slow );
23726 %}
23727 
23728 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
23729   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23730   match(Set dst (CountLeadingZerosV src));
23731   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23732   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
23733   ins_encode %{
23734     int vlen_enc = vector_length_encoding(this, $src);
23735     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23736     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23737                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
23738                                        $rtmp$$Register, true, vlen_enc);
23739   %}
23740   ins_pipe( pipe_slow );
23741 %}
23742 
23743 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
23744   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
23745             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23746   match(Set dst (CountLeadingZerosV src));
23747   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
23748   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
23749   ins_encode %{
23750     int vlen_enc = vector_length_encoding(this, $src);
23751     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23752     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23753                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
23754   %}
23755   ins_pipe( pipe_slow );
23756 %}
23757 
23758 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23759   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
23760             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23761   match(Set dst (CountLeadingZerosV src));
23762   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23763   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23764   ins_encode %{
23765     int vlen_enc = vector_length_encoding(this, $src);
23766     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23767     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23768                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23769   %}
23770   ins_pipe( pipe_slow );
23771 %}
23772 
23773 // ---------------------------------- Vector Masked Operations ------------------------------------
23774 
23775 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
23776   match(Set dst (AddVB (Binary dst src2) mask));
23777   match(Set dst (AddVS (Binary dst src2) mask));
23778   match(Set dst (AddVI (Binary dst src2) mask));
23779   match(Set dst (AddVL (Binary dst src2) mask));
23780   match(Set dst (AddVF (Binary dst src2) mask));
23781   match(Set dst (AddVD (Binary dst src2) mask));
23782   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
23783   ins_encode %{
23784     int vlen_enc = vector_length_encoding(this);
23785     BasicType bt = Matcher::vector_element_basic_type(this);
23786     int opc = this->ideal_Opcode();
23787     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23788                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23789   %}
23790   ins_pipe( pipe_slow );
23791 %}
23792 
23793 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
23794   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
23795   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
23796   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
23797   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
23798   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
23799   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
23800   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
23801   ins_encode %{
23802     int vlen_enc = vector_length_encoding(this);
23803     BasicType bt = Matcher::vector_element_basic_type(this);
23804     int opc = this->ideal_Opcode();
23805     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23806                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23807   %}
23808   ins_pipe( pipe_slow );
23809 %}
23810 
23811 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
23812   match(Set dst (XorV (Binary dst src2) mask));
23813   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
23814   ins_encode %{
23815     int vlen_enc = vector_length_encoding(this);
23816     BasicType bt = Matcher::vector_element_basic_type(this);
23817     int opc = this->ideal_Opcode();
23818     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23819                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23820   %}
23821   ins_pipe( pipe_slow );
23822 %}
23823 
23824 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
23825   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
23826   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
23827   ins_encode %{
23828     int vlen_enc = vector_length_encoding(this);
23829     BasicType bt = Matcher::vector_element_basic_type(this);
23830     int opc = this->ideal_Opcode();
23831     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23832                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23833   %}
23834   ins_pipe( pipe_slow );
23835 %}
23836 
23837 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
23838   match(Set dst (OrV (Binary dst src2) mask));
23839   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
23840   ins_encode %{
23841     int vlen_enc = vector_length_encoding(this);
23842     BasicType bt = Matcher::vector_element_basic_type(this);
23843     int opc = this->ideal_Opcode();
23844     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23845                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23846   %}
23847   ins_pipe( pipe_slow );
23848 %}
23849 
23850 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
23851   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
23852   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
23853   ins_encode %{
23854     int vlen_enc = vector_length_encoding(this);
23855     BasicType bt = Matcher::vector_element_basic_type(this);
23856     int opc = this->ideal_Opcode();
23857     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23858                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23859   %}
23860   ins_pipe( pipe_slow );
23861 %}
23862 
23863 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
23864   match(Set dst (AndV (Binary dst src2) mask));
23865   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
23866   ins_encode %{
23867     int vlen_enc = vector_length_encoding(this);
23868     BasicType bt = Matcher::vector_element_basic_type(this);
23869     int opc = this->ideal_Opcode();
23870     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23871                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23872   %}
23873   ins_pipe( pipe_slow );
23874 %}
23875 
23876 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
23877   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
23878   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
23879   ins_encode %{
23880     int vlen_enc = vector_length_encoding(this);
23881     BasicType bt = Matcher::vector_element_basic_type(this);
23882     int opc = this->ideal_Opcode();
23883     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23884                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23885   %}
23886   ins_pipe( pipe_slow );
23887 %}
23888 
23889 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
23890   match(Set dst (SubVB (Binary dst src2) mask));
23891   match(Set dst (SubVS (Binary dst src2) mask));
23892   match(Set dst (SubVI (Binary dst src2) mask));
23893   match(Set dst (SubVL (Binary dst src2) mask));
23894   match(Set dst (SubVF (Binary dst src2) mask));
23895   match(Set dst (SubVD (Binary dst src2) mask));
23896   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
23897   ins_encode %{
23898     int vlen_enc = vector_length_encoding(this);
23899     BasicType bt = Matcher::vector_element_basic_type(this);
23900     int opc = this->ideal_Opcode();
23901     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23902                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23903   %}
23904   ins_pipe( pipe_slow );
23905 %}
23906 
23907 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
23908   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
23909   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
23910   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
23911   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
23912   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
23913   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
23914   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
23915   ins_encode %{
23916     int vlen_enc = vector_length_encoding(this);
23917     BasicType bt = Matcher::vector_element_basic_type(this);
23918     int opc = this->ideal_Opcode();
23919     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23920                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23921   %}
23922   ins_pipe( pipe_slow );
23923 %}
23924 
23925 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
23926   match(Set dst (MulVS (Binary dst src2) mask));
23927   match(Set dst (MulVI (Binary dst src2) mask));
23928   match(Set dst (MulVL (Binary dst src2) mask));
23929   match(Set dst (MulVF (Binary dst src2) mask));
23930   match(Set dst (MulVD (Binary dst src2) mask));
23931   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
23932   ins_encode %{
23933     int vlen_enc = vector_length_encoding(this);
23934     BasicType bt = Matcher::vector_element_basic_type(this);
23935     int opc = this->ideal_Opcode();
23936     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23937                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23938   %}
23939   ins_pipe( pipe_slow );
23940 %}
23941 
23942 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
23943   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
23944   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
23945   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
23946   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
23947   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
23948   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
23949   ins_encode %{
23950     int vlen_enc = vector_length_encoding(this);
23951     BasicType bt = Matcher::vector_element_basic_type(this);
23952     int opc = this->ideal_Opcode();
23953     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23954                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23955   %}
23956   ins_pipe( pipe_slow );
23957 %}
23958 
23959 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
23960   match(Set dst (SqrtVF dst mask));
23961   match(Set dst (SqrtVD dst mask));
23962   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
23963   ins_encode %{
23964     int vlen_enc = vector_length_encoding(this);
23965     BasicType bt = Matcher::vector_element_basic_type(this);
23966     int opc = this->ideal_Opcode();
23967     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23968                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
23969   %}
23970   ins_pipe( pipe_slow );
23971 %}
23972 
23973 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
23974   match(Set dst (DivVF (Binary dst src2) mask));
23975   match(Set dst (DivVD (Binary dst src2) mask));
23976   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
23977   ins_encode %{
23978     int vlen_enc = vector_length_encoding(this);
23979     BasicType bt = Matcher::vector_element_basic_type(this);
23980     int opc = this->ideal_Opcode();
23981     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23982                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
23983   %}
23984   ins_pipe( pipe_slow );
23985 %}
23986 
23987 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
23988   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
23989   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
23990   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
23991   ins_encode %{
23992     int vlen_enc = vector_length_encoding(this);
23993     BasicType bt = Matcher::vector_element_basic_type(this);
23994     int opc = this->ideal_Opcode();
23995     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
23996                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
23997   %}
23998   ins_pipe( pipe_slow );
23999 %}
24000 
24001 
24002 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24003   match(Set dst (RotateLeftV (Binary dst shift) mask));
24004   match(Set dst (RotateRightV (Binary dst shift) mask));
24005   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24006   ins_encode %{
24007     int vlen_enc = vector_length_encoding(this);
24008     BasicType bt = Matcher::vector_element_basic_type(this);
24009     int opc = this->ideal_Opcode();
24010     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24011                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24012   %}
24013   ins_pipe( pipe_slow );
24014 %}
24015 
24016 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24017   match(Set dst (RotateLeftV (Binary dst src2) mask));
24018   match(Set dst (RotateRightV (Binary dst src2) mask));
24019   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24020   ins_encode %{
24021     int vlen_enc = vector_length_encoding(this);
24022     BasicType bt = Matcher::vector_element_basic_type(this);
24023     int opc = this->ideal_Opcode();
24024     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24025                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24026   %}
24027   ins_pipe( pipe_slow );
24028 %}
24029 
24030 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24031   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24032   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24033   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24034   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24035   ins_encode %{
24036     int vlen_enc = vector_length_encoding(this);
24037     BasicType bt = Matcher::vector_element_basic_type(this);
24038     int opc = this->ideal_Opcode();
24039     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24040                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24041   %}
24042   ins_pipe( pipe_slow );
24043 %}
24044 
24045 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24046   predicate(!n->as_ShiftV()->is_var_shift());
24047   match(Set dst (LShiftVS (Binary dst src2) mask));
24048   match(Set dst (LShiftVI (Binary dst src2) mask));
24049   match(Set dst (LShiftVL (Binary dst src2) mask));
24050   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24051   ins_encode %{
24052     int vlen_enc = vector_length_encoding(this);
24053     BasicType bt = Matcher::vector_element_basic_type(this);
24054     int opc = this->ideal_Opcode();
24055     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24056                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24057   %}
24058   ins_pipe( pipe_slow );
24059 %}
24060 
24061 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24062   predicate(n->as_ShiftV()->is_var_shift());
24063   match(Set dst (LShiftVS (Binary dst src2) mask));
24064   match(Set dst (LShiftVI (Binary dst src2) mask));
24065   match(Set dst (LShiftVL (Binary dst src2) mask));
24066   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24067   ins_encode %{
24068     int vlen_enc = vector_length_encoding(this);
24069     BasicType bt = Matcher::vector_element_basic_type(this);
24070     int opc = this->ideal_Opcode();
24071     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24072                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24073   %}
24074   ins_pipe( pipe_slow );
24075 %}
24076 
24077 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24078   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24079   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24080   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24081   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24082   ins_encode %{
24083     int vlen_enc = vector_length_encoding(this);
24084     BasicType bt = Matcher::vector_element_basic_type(this);
24085     int opc = this->ideal_Opcode();
24086     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24087                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24088   %}
24089   ins_pipe( pipe_slow );
24090 %}
24091 
24092 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24093   predicate(!n->as_ShiftV()->is_var_shift());
24094   match(Set dst (RShiftVS (Binary dst src2) mask));
24095   match(Set dst (RShiftVI (Binary dst src2) mask));
24096   match(Set dst (RShiftVL (Binary dst src2) mask));
24097   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24098   ins_encode %{
24099     int vlen_enc = vector_length_encoding(this);
24100     BasicType bt = Matcher::vector_element_basic_type(this);
24101     int opc = this->ideal_Opcode();
24102     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24103                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24104   %}
24105   ins_pipe( pipe_slow );
24106 %}
24107 
24108 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24109   predicate(n->as_ShiftV()->is_var_shift());
24110   match(Set dst (RShiftVS (Binary dst src2) mask));
24111   match(Set dst (RShiftVI (Binary dst src2) mask));
24112   match(Set dst (RShiftVL (Binary dst src2) mask));
24113   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24114   ins_encode %{
24115     int vlen_enc = vector_length_encoding(this);
24116     BasicType bt = Matcher::vector_element_basic_type(this);
24117     int opc = this->ideal_Opcode();
24118     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24119                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24120   %}
24121   ins_pipe( pipe_slow );
24122 %}
24123 
24124 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24125   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24126   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24127   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24128   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24129   ins_encode %{
24130     int vlen_enc = vector_length_encoding(this);
24131     BasicType bt = Matcher::vector_element_basic_type(this);
24132     int opc = this->ideal_Opcode();
24133     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24134                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24135   %}
24136   ins_pipe( pipe_slow );
24137 %}
24138 
24139 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24140   predicate(!n->as_ShiftV()->is_var_shift());
24141   match(Set dst (URShiftVS (Binary dst src2) mask));
24142   match(Set dst (URShiftVI (Binary dst src2) mask));
24143   match(Set dst (URShiftVL (Binary dst src2) mask));
24144   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24145   ins_encode %{
24146     int vlen_enc = vector_length_encoding(this);
24147     BasicType bt = Matcher::vector_element_basic_type(this);
24148     int opc = this->ideal_Opcode();
24149     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24150                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24151   %}
24152   ins_pipe( pipe_slow );
24153 %}
24154 
24155 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24156   predicate(n->as_ShiftV()->is_var_shift());
24157   match(Set dst (URShiftVS (Binary dst src2) mask));
24158   match(Set dst (URShiftVI (Binary dst src2) mask));
24159   match(Set dst (URShiftVL (Binary dst src2) mask));
24160   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24161   ins_encode %{
24162     int vlen_enc = vector_length_encoding(this);
24163     BasicType bt = Matcher::vector_element_basic_type(this);
24164     int opc = this->ideal_Opcode();
24165     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24166                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24167   %}
24168   ins_pipe( pipe_slow );
24169 %}
24170 
24171 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24172   match(Set dst (MaxV (Binary dst src2) mask));
24173   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24174   ins_encode %{
24175     int vlen_enc = vector_length_encoding(this);
24176     BasicType bt = Matcher::vector_element_basic_type(this);
24177     int opc = this->ideal_Opcode();
24178     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24179                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24180   %}
24181   ins_pipe( pipe_slow );
24182 %}
24183 
24184 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24185   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24186   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24187   ins_encode %{
24188     int vlen_enc = vector_length_encoding(this);
24189     BasicType bt = Matcher::vector_element_basic_type(this);
24190     int opc = this->ideal_Opcode();
24191     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24192                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24193   %}
24194   ins_pipe( pipe_slow );
24195 %}
24196 
24197 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24198   match(Set dst (MinV (Binary dst src2) mask));
24199   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24200   ins_encode %{
24201     int vlen_enc = vector_length_encoding(this);
24202     BasicType bt = Matcher::vector_element_basic_type(this);
24203     int opc = this->ideal_Opcode();
24204     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24205                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24206   %}
24207   ins_pipe( pipe_slow );
24208 %}
24209 
24210 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24211   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24212   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24213   ins_encode %{
24214     int vlen_enc = vector_length_encoding(this);
24215     BasicType bt = Matcher::vector_element_basic_type(this);
24216     int opc = this->ideal_Opcode();
24217     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24218                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24219   %}
24220   ins_pipe( pipe_slow );
24221 %}
24222 
24223 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24224   match(Set dst (VectorRearrange (Binary dst src2) mask));
24225   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24226   ins_encode %{
24227     int vlen_enc = vector_length_encoding(this);
24228     BasicType bt = Matcher::vector_element_basic_type(this);
24229     int opc = this->ideal_Opcode();
24230     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24231                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24232   %}
24233   ins_pipe( pipe_slow );
24234 %}
24235 
24236 instruct vabs_masked(vec dst, kReg mask) %{
24237   match(Set dst (AbsVB dst mask));
24238   match(Set dst (AbsVS dst mask));
24239   match(Set dst (AbsVI dst mask));
24240   match(Set dst (AbsVL dst mask));
24241   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24242   ins_encode %{
24243     int vlen_enc = vector_length_encoding(this);
24244     BasicType bt = Matcher::vector_element_basic_type(this);
24245     int opc = this->ideal_Opcode();
24246     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24247                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24248   %}
24249   ins_pipe( pipe_slow );
24250 %}
24251 
24252 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24253   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24254   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24255   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24256   ins_encode %{
24257     assert(UseFMA, "Needs FMA instructions support.");
24258     int vlen_enc = vector_length_encoding(this);
24259     BasicType bt = Matcher::vector_element_basic_type(this);
24260     int opc = this->ideal_Opcode();
24261     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24262                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24263   %}
24264   ins_pipe( pipe_slow );
24265 %}
24266 
24267 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24268   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24269   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24270   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24271   ins_encode %{
24272     assert(UseFMA, "Needs FMA instructions support.");
24273     int vlen_enc = vector_length_encoding(this);
24274     BasicType bt = Matcher::vector_element_basic_type(this);
24275     int opc = this->ideal_Opcode();
24276     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24277                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24278   %}
24279   ins_pipe( pipe_slow );
24280 %}
24281 
24282 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24283   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24284   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24285   ins_encode %{
24286     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
24287     int vlen_enc = vector_length_encoding(this, $src1);
24288     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24289 
24290     // Comparison i
24291     switch (src1_elem_bt) {
24292       case T_BYTE: {
24293         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24294         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24295         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24296         break;
24297       }
24298       case T_SHORT: {
24299         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24300         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24301         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24302         break;
24303       }
24304       case T_INT: {
24305         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24306         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24307         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24308         break;
24309       }
24310       case T_LONG: {
24311         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24312         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24313         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24314         break;
24315       }
24316       case T_FLOAT: {
24317         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24318         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24319         break;
24320       }
24321       case T_DOUBLE: {
24322         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24323         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24324         break;
24325       }
24326       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24327     }
24328   %}
24329   ins_pipe( pipe_slow );
24330 %}
24331 
24332 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24333   predicate(Matcher::vector_length(n) <= 32);
24334   match(Set dst (MaskAll src));
24335   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24336   ins_encode %{
24337     int mask_len = Matcher::vector_length(this);
24338     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24339   %}
24340   ins_pipe( pipe_slow );
24341 %}
24342 
24343 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24344   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24345   match(Set dst (XorVMask src (MaskAll cnt)));
24346   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24347   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24348   ins_encode %{
24349     uint masklen = Matcher::vector_length(this);
24350     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24351   %}
24352   ins_pipe( pipe_slow );
24353 %}
24354 
24355 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24356   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24357             (Matcher::vector_length(n) == 16) ||
24358             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24359   match(Set dst (XorVMask src (MaskAll cnt)));
24360   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24361   ins_encode %{
24362     uint masklen = Matcher::vector_length(this);
24363     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24364   %}
24365   ins_pipe( pipe_slow );
24366 %}
24367 
24368 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
24369   predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) <= 8);
24370   match(Set dst (VectorLongToMask src));
24371   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
24372   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
24373   ins_encode %{
24374     int mask_len = Matcher::vector_length(this);
24375     int vec_enc  = vector_length_encoding(mask_len);
24376     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24377                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24378   %}
24379   ins_pipe( pipe_slow );
24380 %}
24381 
24382 
24383 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24384   predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) > 8);
24385   match(Set dst (VectorLongToMask src));
24386   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24387   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24388   ins_encode %{
24389     int mask_len = Matcher::vector_length(this);
24390     assert(mask_len <= 32, "invalid mask length");
24391     int vec_enc  = vector_length_encoding(mask_len);
24392     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24393                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24394   %}
24395   ins_pipe( pipe_slow );
24396 %}
24397 
24398 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24399   predicate(n->bottom_type()->isa_pvectmask());
24400   match(Set dst (VectorLongToMask src));
24401   format %{ "long_to_mask_evex $dst, $src\t!" %}
24402   ins_encode %{
24403     __ kmov($dst$$KRegister, $src$$Register);
24404   %}
24405   ins_pipe( pipe_slow );
24406 %}
24407 
24408 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24409   match(Set dst (AndVMask src1 src2));
24410   match(Set dst (OrVMask src1 src2));
24411   match(Set dst (XorVMask src1 src2));
24412   effect(TEMP kscratch);
24413   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24414   ins_encode %{
24415     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24416     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24417     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24418     uint masklen = Matcher::vector_length(this);
24419     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24420     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24421   %}
24422   ins_pipe( pipe_slow );
24423 %}
24424 
24425 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24426   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24427   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24428   ins_encode %{
24429     int vlen_enc = vector_length_encoding(this);
24430     BasicType bt = Matcher::vector_element_basic_type(this);
24431     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24432                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24433   %}
24434   ins_pipe( pipe_slow );
24435 %}
24436 
24437 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
24438   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24439   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24440   ins_encode %{
24441     int vlen_enc = vector_length_encoding(this);
24442     BasicType bt = Matcher::vector_element_basic_type(this);
24443     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24444                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
24445   %}
24446   ins_pipe( pipe_slow );
24447 %}
24448 
24449 instruct castMM(kReg dst)
24450 %{
24451   match(Set dst (CastVV dst));
24452 
24453   size(0);
24454   format %{ "# castVV of $dst" %}
24455   ins_encode(/* empty encoding */);
24456   ins_cost(0);
24457   ins_pipe(empty);
24458 %}
24459 
24460 instruct castVV(vec dst)
24461 %{
24462   match(Set dst (CastVV dst));
24463 
24464   size(0);
24465   format %{ "# castVV of $dst" %}
24466   ins_encode(/* empty encoding */);
24467   ins_cost(0);
24468   ins_pipe(empty);
24469 %}
24470 
24471 instruct castVVLeg(legVec dst)
24472 %{
24473   match(Set dst (CastVV dst));
24474 
24475   size(0);
24476   format %{ "# castVV of $dst" %}
24477   ins_encode(/* empty encoding */);
24478   ins_cost(0);
24479   ins_pipe(empty);
24480 %}
24481 
24482 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
24483 %{
24484   match(Set dst (IsInfiniteF src));
24485   effect(TEMP ktmp, KILL cr);
24486   format %{ "float_class_check $dst, $src" %}
24487   ins_encode %{
24488     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24489     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24490   %}
24491   ins_pipe(pipe_slow);
24492 %}
24493 
24494 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
24495 %{
24496   match(Set dst (IsInfiniteD src));
24497   effect(TEMP ktmp, KILL cr);
24498   format %{ "double_class_check $dst, $src" %}
24499   ins_encode %{
24500     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
24501     __ kmovbl($dst$$Register, $ktmp$$KRegister);
24502   %}
24503   ins_pipe(pipe_slow);
24504 %}
24505 
24506 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
24507 %{
24508   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24509             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24510   match(Set dst (SaturatingAddV src1 src2));
24511   match(Set dst (SaturatingSubV src1 src2));
24512   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24513   ins_encode %{
24514     int vlen_enc = vector_length_encoding(this);
24515     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24516     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24517                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24518   %}
24519   ins_pipe(pipe_slow);
24520 %}
24521 
24522 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
24523 %{
24524   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24525             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24526   match(Set dst (SaturatingAddV src1 src2));
24527   match(Set dst (SaturatingSubV src1 src2));
24528   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24529   ins_encode %{
24530     int vlen_enc = vector_length_encoding(this);
24531     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24532     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24533                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24534   %}
24535   ins_pipe(pipe_slow);
24536 %}
24537 
24538 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
24539 %{
24540   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24541             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24542             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24543   match(Set dst (SaturatingAddV src1 src2));
24544   match(Set dst (SaturatingSubV src1 src2));
24545   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
24546   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
24547   ins_encode %{
24548     int vlen_enc = vector_length_encoding(this);
24549     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24550     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24551                                         $src1$$XMMRegister, $src2$$XMMRegister,
24552                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24553                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
24554   %}
24555   ins_pipe(pipe_slow);
24556 %}
24557 
24558 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
24559 %{
24560   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24561             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
24562             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24563   match(Set dst (SaturatingAddV src1 src2));
24564   match(Set dst (SaturatingSubV src1 src2));
24565   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
24566   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
24567   ins_encode %{
24568     int vlen_enc = vector_length_encoding(this);
24569     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24570     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24571                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
24572                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
24573   %}
24574   ins_pipe(pipe_slow);
24575 %}
24576 
24577 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
24578 %{
24579   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24580             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24581             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24582   match(Set dst (SaturatingAddV src1 src2));
24583   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
24584   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
24585   ins_encode %{
24586     int vlen_enc = vector_length_encoding(this);
24587     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24588     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24589                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24590   %}
24591   ins_pipe(pipe_slow);
24592 %}
24593 
24594 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
24595 %{
24596   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24597             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24598             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24599   match(Set dst (SaturatingAddV src1 src2));
24600   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24601   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24602   ins_encode %{
24603     int vlen_enc = vector_length_encoding(this);
24604     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24605     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24606                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
24607   %}
24608   ins_pipe(pipe_slow);
24609 %}
24610 
24611 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
24612 %{
24613   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24614             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24615             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
24616   match(Set dst (SaturatingSubV src1 src2));
24617   effect(TEMP ktmp);
24618   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
24619   ins_encode %{
24620     int vlen_enc = vector_length_encoding(this);
24621     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24622     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
24623                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
24624   %}
24625   ins_pipe(pipe_slow);
24626 %}
24627 
24628 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
24629 %{
24630   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
24631             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
24632             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
24633   match(Set dst (SaturatingSubV src1 src2));
24634   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24635   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
24636   ins_encode %{
24637     int vlen_enc = vector_length_encoding(this);
24638     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24639     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24640                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24641   %}
24642   ins_pipe(pipe_slow);
24643 %}
24644 
24645 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
24646 %{
24647   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24648             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24649   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24650   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24651   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
24652   ins_encode %{
24653     int vlen_enc = vector_length_encoding(this);
24654     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24655     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24656                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
24657   %}
24658   ins_pipe(pipe_slow);
24659 %}
24660 
24661 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
24662 %{
24663   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24664             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24665   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
24666   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
24667   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
24668   ins_encode %{
24669     int vlen_enc = vector_length_encoding(this);
24670     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24671     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
24672                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
24673   %}
24674   ins_pipe(pipe_slow);
24675 %}
24676 
24677 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
24678   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24679             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24680   match(Set dst (SaturatingAddV (Binary dst src) mask));
24681   match(Set dst (SaturatingSubV (Binary dst src) mask));
24682   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24683   ins_encode %{
24684     int vlen_enc = vector_length_encoding(this);
24685     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24686     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24687                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
24688   %}
24689   ins_pipe( pipe_slow );
24690 %}
24691 
24692 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
24693   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24694             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24695   match(Set dst (SaturatingAddV (Binary dst src) mask));
24696   match(Set dst (SaturatingSubV (Binary dst src) mask));
24697   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24698   ins_encode %{
24699     int vlen_enc = vector_length_encoding(this);
24700     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24701     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24702                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
24703   %}
24704   ins_pipe( pipe_slow );
24705 %}
24706 
24707 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
24708   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24709             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
24710   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24711   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24712   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
24713   ins_encode %{
24714     int vlen_enc = vector_length_encoding(this);
24715     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24716     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24717                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
24718   %}
24719   ins_pipe( pipe_slow );
24720 %}
24721 
24722 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
24723   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
24724             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
24725   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
24726   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
24727   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
24728   ins_encode %{
24729     int vlen_enc = vector_length_encoding(this);
24730     BasicType elem_bt = Matcher::vector_element_basic_type(this);
24731     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
24732                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
24733   %}
24734   ins_pipe( pipe_slow );
24735 %}
24736 
24737 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
24738 %{
24739   match(Set index (SelectFromTwoVector (Binary index src1) src2));
24740   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
24741   ins_encode %{
24742     int vlen_enc = vector_length_encoding(this);
24743     BasicType bt = Matcher::vector_element_basic_type(this);
24744     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24745   %}
24746   ins_pipe(pipe_slow);
24747 %}
24748 
24749 instruct reinterpretS2HF(regF dst, rRegI src)
24750 %{
24751   match(Set dst (ReinterpretS2HF src));
24752   format %{ "evmovw $dst, $src" %}
24753   ins_encode %{
24754     __ evmovw($dst$$XMMRegister, $src$$Register);
24755   %}
24756   ins_pipe(pipe_slow);
24757 %}
24758 
24759 instruct reinterpretHF2S(rRegI dst, regF src)
24760 %{
24761   match(Set dst (ReinterpretHF2S src));
24762   format %{ "evmovw $dst, $src" %}
24763   ins_encode %{
24764     __ evmovw($dst$$Register, $src$$XMMRegister);
24765     __ narrow_subword_type($dst$$Register, T_SHORT);
24766   %}
24767   ins_pipe(pipe_slow);
24768 %}
24769 
24770 instruct convF2HFAndS2HF(regF dst, regF src)
24771 %{
24772   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
24773   format %{ "convF2HFAndS2HF $dst, $src" %}
24774   ins_encode %{
24775     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
24776   %}
24777   ins_pipe(pipe_slow);
24778 %}
24779 
24780 instruct convHF2SAndHF2F(regF dst, regF src)
24781 %{
24782   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
24783   format %{ "convHF2SAndHF2F $dst, $src" %}
24784   ins_encode %{
24785     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
24786   %}
24787   ins_pipe(pipe_slow);
24788 %}
24789 
24790 instruct scalar_sqrt_HF_reg(regF dst, regF src)
24791 %{
24792   match(Set dst (SqrtHF src));
24793   format %{ "scalar_sqrt_fp16 $dst, $src" %}
24794   ins_encode %{
24795     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
24796   %}
24797   ins_pipe(pipe_slow);
24798 %}
24799 
24800 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
24801 %{
24802   match(Set dst (AddHF src1 src2));
24803   match(Set dst (DivHF src1 src2));
24804   match(Set dst (MulHF src1 src2));
24805   match(Set dst (SubHF src1 src2));
24806   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
24807   ins_encode %{
24808     int opcode = this->ideal_Opcode();
24809     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
24810   %}
24811   ins_pipe(pipe_slow);
24812 %}
24813 
24814 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
24815 %{
24816   predicate(VM_Version::supports_avx10_2());
24817   match(Set dst (MaxHF src1 src2));
24818   match(Set dst (MinHF src1 src2));
24819 
24820   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
24821   ins_encode %{
24822     int opcode = this->ideal_Opcode();
24823     __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
24824   %}
24825   ins_pipe( pipe_slow );
24826 %}
24827 
24828 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
24829 %{
24830   predicate(!VM_Version::supports_avx10_2());
24831   match(Set dst (MaxHF src1 src2));
24832   match(Set dst (MinHF src1 src2));
24833   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
24834 
24835   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
24836   ins_encode %{
24837     int opcode = this->ideal_Opcode();
24838     __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
24839                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
24840   %}
24841   ins_pipe( pipe_slow );
24842 %}
24843 
24844 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
24845 %{
24846   match(Set dst (FmaHF  src2 (Binary dst src1)));
24847   effect(DEF dst);
24848   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
24849   ins_encode %{
24850     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
24851   %}
24852   ins_pipe( pipe_slow );
24853 %}
24854 
24855 
24856 instruct vector_sqrt_HF_reg(vec dst, vec src)
24857 %{
24858   match(Set dst (SqrtVHF src));
24859   format %{ "vector_sqrt_fp16 $dst, $src" %}
24860   ins_encode %{
24861     int vlen_enc = vector_length_encoding(this);
24862     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24863   %}
24864   ins_pipe(pipe_slow);
24865 %}
24866 
24867 instruct vector_sqrt_HF_mem(vec dst, memory src)
24868 %{
24869   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
24870   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
24871   ins_encode %{
24872     int vlen_enc = vector_length_encoding(this);
24873     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
24874   %}
24875   ins_pipe(pipe_slow);
24876 %}
24877 
24878 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
24879 %{
24880   match(Set dst (AddVHF src1 src2));
24881   match(Set dst (DivVHF src1 src2));
24882   match(Set dst (MulVHF src1 src2));
24883   match(Set dst (SubVHF src1 src2));
24884   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
24885   ins_encode %{
24886     int vlen_enc = vector_length_encoding(this);
24887     int opcode = this->ideal_Opcode();
24888     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
24889   %}
24890   ins_pipe(pipe_slow);
24891 %}
24892 
24893 
24894 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
24895 %{
24896   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
24897   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
24898   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
24899   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
24900   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
24901   ins_encode %{
24902     int vlen_enc = vector_length_encoding(this);
24903     int opcode = this->ideal_Opcode();
24904     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
24905   %}
24906   ins_pipe(pipe_slow);
24907 %}
24908 
24909 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
24910 %{
24911   match(Set dst (FmaVHF src2 (Binary dst src1)));
24912   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
24913   ins_encode %{
24914     int vlen_enc = vector_length_encoding(this);
24915     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
24916   %}
24917   ins_pipe( pipe_slow );
24918 %}
24919 
24920 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
24921 %{
24922   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
24923   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
24924   ins_encode %{
24925     int vlen_enc = vector_length_encoding(this);
24926     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
24927   %}
24928   ins_pipe( pipe_slow );
24929 %}
24930 
24931 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
24932 %{
24933   predicate(VM_Version::supports_avx10_2());
24934   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
24935   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
24936   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
24937   ins_encode %{
24938     int vlen_enc = vector_length_encoding(this);
24939     int opcode = this->ideal_Opcode();
24940     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
24941                             k0, vlen_enc);
24942   %}
24943   ins_pipe( pipe_slow );
24944 %}
24945 
24946 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
24947 %{
24948   predicate(VM_Version::supports_avx10_2());
24949   match(Set dst (MinVHF src1 src2));
24950   match(Set dst (MaxVHF src1 src2));
24951   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
24952   ins_encode %{
24953     int vlen_enc = vector_length_encoding(this);
24954     int opcode = this->ideal_Opcode();
24955     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
24956                             k0, vlen_enc);
24957   %}
24958   ins_pipe( pipe_slow );
24959 %}
24960 
24961 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
24962 %{
24963   predicate(!VM_Version::supports_avx10_2());
24964   match(Set dst (MinVHF src1 src2));
24965   match(Set dst (MaxVHF src1 src2));
24966   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
24967   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
24968   ins_encode %{
24969     int vlen_enc = vector_length_encoding(this);
24970     int opcode = this->ideal_Opcode();
24971     __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
24972                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
24973   %}
24974   ins_pipe( pipe_slow );
24975 %}
24976 
24977 //----------PEEPHOLE RULES-----------------------------------------------------
24978 // These must follow all instruction definitions as they use the names
24979 // defined in the instructions definitions.
24980 //
24981 // peeppredicate ( rule_predicate );
24982 // // the predicate unless which the peephole rule will be ignored
24983 //
24984 // peepmatch ( root_instr_name [preceding_instruction]* );
24985 //
24986 // peepprocedure ( procedure_name );
24987 // // provide a procedure name to perform the optimization, the procedure should
24988 // // reside in the architecture dependent peephole file, the method has the
24989 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
24990 // // with the arguments being the basic block, the current node index inside the
24991 // // block, the register allocator, the functions upon invoked return a new node
24992 // // defined in peepreplace, and the rules of the nodes appearing in the
24993 // // corresponding peepmatch, the function return true if successful, else
24994 // // return false
24995 //
24996 // peepconstraint %{
24997 // (instruction_number.operand_name relational_op instruction_number.operand_name
24998 //  [, ...] );
24999 // // instruction numbers are zero-based using left to right order in peepmatch
25000 //
25001 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25002 // // provide an instruction_number.operand_name for each operand that appears
25003 // // in the replacement instruction's match rule
25004 //
25005 // ---------VM FLAGS---------------------------------------------------------
25006 //
25007 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25008 //
25009 // Each peephole rule is given an identifying number starting with zero and
25010 // increasing by one in the order seen by the parser.  An individual peephole
25011 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25012 // on the command-line.
25013 //
25014 // ---------CURRENT LIMITATIONS----------------------------------------------
25015 //
25016 // Only transformations inside a basic block (do we need more for peephole)
25017 //
25018 // ---------EXAMPLE----------------------------------------------------------
25019 //
25020 // // pertinent parts of existing instructions in architecture description
25021 // instruct movI(rRegI dst, rRegI src)
25022 // %{
25023 //   match(Set dst (CopyI src));
25024 // %}
25025 //
25026 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25027 // %{
25028 //   match(Set dst (AddI dst src));
25029 //   effect(KILL cr);
25030 // %}
25031 //
25032 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25033 // %{
25034 //   match(Set dst (AddI dst src));
25035 // %}
25036 //
25037 // 1. Simple replacement
25038 // - Only match adjacent instructions in same basic block
25039 // - Only equality constraints
25040 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25041 // - Only one replacement instruction
25042 //
25043 // // Change (inc mov) to lea
25044 // peephole %{
25045 //   // lea should only be emitted when beneficial
25046 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25047 //   // increment preceded by register-register move
25048 //   peepmatch ( incI_rReg movI );
25049 //   // require that the destination register of the increment
25050 //   // match the destination register of the move
25051 //   peepconstraint ( 0.dst == 1.dst );
25052 //   // construct a replacement instruction that sets
25053 //   // the destination to ( move's source register + one )
25054 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25055 // %}
25056 //
25057 // 2. Procedural replacement
25058 // - More flexible finding relevent nodes
25059 // - More flexible constraints
25060 // - More flexible transformations
25061 // - May utilise architecture-dependent API more effectively
25062 // - Currently only one replacement instruction due to adlc parsing capabilities
25063 //
25064 // // Change (inc mov) to lea
25065 // peephole %{
25066 //   // lea should only be emitted when beneficial
25067 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25068 //   // the rule numbers of these nodes inside are passed into the function below
25069 //   peepmatch ( incI_rReg movI );
25070 //   // the method that takes the responsibility of transformation
25071 //   peepprocedure ( inc_mov_to_lea );
25072 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25073 //   // node is passed into the function above
25074 //   peepreplace ( leaI_rReg_immI() );
25075 // %}
25076 
25077 // These instructions is not matched by the matcher but used by the peephole
25078 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25079 %{
25080   predicate(false);
25081   match(Set dst (AddI src1 src2));
25082   format %{ "leal    $dst, [$src1 + $src2]" %}
25083   ins_encode %{
25084     Register dst = $dst$$Register;
25085     Register src1 = $src1$$Register;
25086     Register src2 = $src2$$Register;
25087     if (src1 != rbp && src1 != r13) {
25088       __ leal(dst, Address(src1, src2, Address::times_1));
25089     } else {
25090       assert(src2 != rbp && src2 != r13, "");
25091       __ leal(dst, Address(src2, src1, Address::times_1));
25092     }
25093   %}
25094   ins_pipe(ialu_reg_reg);
25095 %}
25096 
25097 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25098 %{
25099   predicate(false);
25100   match(Set dst (AddI src1 src2));
25101   format %{ "leal    $dst, [$src1 + $src2]" %}
25102   ins_encode %{
25103     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25104   %}
25105   ins_pipe(ialu_reg_reg);
25106 %}
25107 
25108 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25109 %{
25110   predicate(false);
25111   match(Set dst (LShiftI src shift));
25112   format %{ "leal    $dst, [$src << $shift]" %}
25113   ins_encode %{
25114     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25115     Register src = $src$$Register;
25116     if (scale == Address::times_2 && src != rbp && src != r13) {
25117       __ leal($dst$$Register, Address(src, src, Address::times_1));
25118     } else {
25119       __ leal($dst$$Register, Address(noreg, src, scale));
25120     }
25121   %}
25122   ins_pipe(ialu_reg_reg);
25123 %}
25124 
25125 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25126 %{
25127   predicate(false);
25128   match(Set dst (AddL src1 src2));
25129   format %{ "leaq    $dst, [$src1 + $src2]" %}
25130   ins_encode %{
25131     Register dst = $dst$$Register;
25132     Register src1 = $src1$$Register;
25133     Register src2 = $src2$$Register;
25134     if (src1 != rbp && src1 != r13) {
25135       __ leaq(dst, Address(src1, src2, Address::times_1));
25136     } else {
25137       assert(src2 != rbp && src2 != r13, "");
25138       __ leaq(dst, Address(src2, src1, Address::times_1));
25139     }
25140   %}
25141   ins_pipe(ialu_reg_reg);
25142 %}
25143 
25144 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25145 %{
25146   predicate(false);
25147   match(Set dst (AddL src1 src2));
25148   format %{ "leaq    $dst, [$src1 + $src2]" %}
25149   ins_encode %{
25150     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25151   %}
25152   ins_pipe(ialu_reg_reg);
25153 %}
25154 
25155 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25156 %{
25157   predicate(false);
25158   match(Set dst (LShiftL src shift));
25159   format %{ "leaq    $dst, [$src << $shift]" %}
25160   ins_encode %{
25161     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25162     Register src = $src$$Register;
25163     if (scale == Address::times_2 && src != rbp && src != r13) {
25164       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25165     } else {
25166       __ leaq($dst$$Register, Address(noreg, src, scale));
25167     }
25168   %}
25169   ins_pipe(ialu_reg_reg);
25170 %}
25171 
25172 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25173 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25174 // processors with at least partial ALU support for lea
25175 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25176 // beneficial for processors with full ALU support
25177 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25178 
25179 peephole
25180 %{
25181   peeppredicate(VM_Version::supports_fast_2op_lea());
25182   peepmatch (addI_rReg);
25183   peepprocedure (lea_coalesce_reg);
25184   peepreplace (leaI_rReg_rReg_peep());
25185 %}
25186 
25187 peephole
25188 %{
25189   peeppredicate(VM_Version::supports_fast_2op_lea());
25190   peepmatch (addI_rReg_imm);
25191   peepprocedure (lea_coalesce_imm);
25192   peepreplace (leaI_rReg_immI_peep());
25193 %}
25194 
25195 peephole
25196 %{
25197   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25198                 VM_Version::is_intel_cascade_lake());
25199   peepmatch (incI_rReg);
25200   peepprocedure (lea_coalesce_imm);
25201   peepreplace (leaI_rReg_immI_peep());
25202 %}
25203 
25204 peephole
25205 %{
25206   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25207                 VM_Version::is_intel_cascade_lake());
25208   peepmatch (decI_rReg);
25209   peepprocedure (lea_coalesce_imm);
25210   peepreplace (leaI_rReg_immI_peep());
25211 %}
25212 
25213 peephole
25214 %{
25215   peeppredicate(VM_Version::supports_fast_2op_lea());
25216   peepmatch (salI_rReg_immI2);
25217   peepprocedure (lea_coalesce_imm);
25218   peepreplace (leaI_rReg_immI2_peep());
25219 %}
25220 
25221 peephole
25222 %{
25223   peeppredicate(VM_Version::supports_fast_2op_lea());
25224   peepmatch (addL_rReg);
25225   peepprocedure (lea_coalesce_reg);
25226   peepreplace (leaL_rReg_rReg_peep());
25227 %}
25228 
25229 peephole
25230 %{
25231   peeppredicate(VM_Version::supports_fast_2op_lea());
25232   peepmatch (addL_rReg_imm);
25233   peepprocedure (lea_coalesce_imm);
25234   peepreplace (leaL_rReg_immL32_peep());
25235 %}
25236 
25237 peephole
25238 %{
25239   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25240                 VM_Version::is_intel_cascade_lake());
25241   peepmatch (incL_rReg);
25242   peepprocedure (lea_coalesce_imm);
25243   peepreplace (leaL_rReg_immL32_peep());
25244 %}
25245 
25246 peephole
25247 %{
25248   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25249                 VM_Version::is_intel_cascade_lake());
25250   peepmatch (decL_rReg);
25251   peepprocedure (lea_coalesce_imm);
25252   peepreplace (leaL_rReg_immL32_peep());
25253 %}
25254 
25255 peephole
25256 %{
25257   peeppredicate(VM_Version::supports_fast_2op_lea());
25258   peepmatch (salL_rReg_immI2);
25259   peepprocedure (lea_coalesce_imm);
25260   peepreplace (leaL_rReg_immI2_peep());
25261 %}
25262 
25263 peephole
25264 %{
25265   peepmatch (leaPCompressedOopOffset);
25266   peepprocedure (lea_remove_redundant);
25267 %}
25268 
25269 peephole
25270 %{
25271   peepmatch (leaP8Narrow);
25272   peepprocedure (lea_remove_redundant);
25273 %}
25274 
25275 peephole
25276 %{
25277   peepmatch (leaP32Narrow);
25278   peepprocedure (lea_remove_redundant);
25279 %}
25280 
25281 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25282 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25283 
25284 //int variant
25285 peephole
25286 %{
25287   peepmatch (testI_reg);
25288   peepprocedure (test_may_remove);
25289 %}
25290 
25291 //long variant
25292 peephole
25293 %{
25294   peepmatch (testL_reg);
25295   peepprocedure (test_may_remove);
25296 %}
25297 
25298 
25299 //----------SMARTSPILL RULES---------------------------------------------------
25300 // These must follow all instruction definitions as they use the names
25301 // defined in the instructions definitions.