1 //
    2 // Copyright (c) 2011, 2026, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 AMD64 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
   64 // used as byte registers)
   65 
   66 // Previously set RBX, RSI, and RDI as save-on-entry for java code
   67 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   68 // Now that allocator is better, turn on RSI and RDI as SOE registers.
   69 
   70 reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
   71 reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());
   72 
   73 reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
   74 reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());
   75 
   76 reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
   77 reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());
   78 
   79 reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
   80 reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());
   81 
   82 reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
   83 reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());
   84 
   85 // now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
   86 reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
   87 reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());
   88 
   89 #ifdef _WIN64
   90 
   91 reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
   92 reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());
   93 
   94 reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
   95 reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());
   96 
   97 #else
   98 
   99 reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
  100 reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());
  101 
  102 reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
  103 reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());
  104 
  105 #endif
  106 
  107 reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
  108 reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());
  109 
  110 reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
  111 reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());
  112 
  113 reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
  114 reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  115 
  116 reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
  117 reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
  118 
  119 reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
  120 reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());
  121 
  122 reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
  123 reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());
  124 
  125 reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
  126 reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());
  127 
  128 reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
  129 reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());
  130 
  131 reg_def R16  (SOC, SOC, Op_RegI, 16, r16->as_VMReg());
  132 reg_def R16_H(SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
  133 
  134 reg_def R17  (SOC, SOC, Op_RegI, 17, r17->as_VMReg());
  135 reg_def R17_H(SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
  136 
  137 reg_def R18  (SOC, SOC, Op_RegI, 18, r18->as_VMReg());
  138 reg_def R18_H(SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
  139 
  140 reg_def R19  (SOC, SOC, Op_RegI, 19, r19->as_VMReg());
  141 reg_def R19_H(SOC, SOC, Op_RegI, 19, r19->as_VMReg()->next());
  142 
  143 reg_def R20  (SOC, SOC, Op_RegI, 20, r20->as_VMReg());
  144 reg_def R20_H(SOC, SOC, Op_RegI, 20, r20->as_VMReg()->next());
  145 
  146 reg_def R21  (SOC, SOC, Op_RegI, 21, r21->as_VMReg());
  147 reg_def R21_H(SOC, SOC, Op_RegI, 21, r21->as_VMReg()->next());
  148 
  149 reg_def R22  (SOC, SOC, Op_RegI, 22, r22->as_VMReg());
  150 reg_def R22_H(SOC, SOC, Op_RegI, 22, r22->as_VMReg()->next());
  151 
  152 reg_def R23  (SOC, SOC, Op_RegI, 23, r23->as_VMReg());
  153 reg_def R23_H(SOC, SOC, Op_RegI, 23, r23->as_VMReg()->next());
  154 
  155 reg_def R24  (SOC, SOC, Op_RegI, 24, r24->as_VMReg());
  156 reg_def R24_H(SOC, SOC, Op_RegI, 24, r24->as_VMReg()->next());
  157 
  158 reg_def R25  (SOC, SOC, Op_RegI, 25, r25->as_VMReg());
  159 reg_def R25_H(SOC, SOC, Op_RegI, 25, r25->as_VMReg()->next());
  160 
  161 reg_def R26  (SOC, SOC, Op_RegI, 26, r26->as_VMReg());
  162 reg_def R26_H(SOC, SOC, Op_RegI, 26, r26->as_VMReg()->next());
  163 
  164 reg_def R27  (SOC, SOC, Op_RegI, 27, r27->as_VMReg());
  165 reg_def R27_H(SOC, SOC, Op_RegI, 27, r27->as_VMReg()->next());
  166 
  167 reg_def R28  (SOC, SOC, Op_RegI, 28, r28->as_VMReg());
  168 reg_def R28_H(SOC, SOC, Op_RegI, 28, r28->as_VMReg()->next());
  169 
  170 reg_def R29  (SOC, SOC, Op_RegI, 29, r29->as_VMReg());
  171 reg_def R29_H(SOC, SOC, Op_RegI, 29, r29->as_VMReg()->next());
  172 
  173 reg_def R30  (SOC, SOC, Op_RegI, 30, r30->as_VMReg());
  174 reg_def R30_H(SOC, SOC, Op_RegI, 30, r30->as_VMReg()->next());
  175 
  176 reg_def R31  (SOC, SOC, Op_RegI, 31, r31->as_VMReg());
  177 reg_def R31_H(SOC, SOC, Op_RegI, 31, r31->as_VMReg()->next());
  178 
  179 // Floating Point Registers
  180 
  181 // Specify priority of register selection within phases of register
  182 // allocation.  Highest priority is first.  A useful heuristic is to
  183 // give registers a low priority when they are required by machine
  184 // instructions, like EAX and EDX on I486, and choose no-save registers
  185 // before save-on-call, & save-on-call before save-on-entry.  Registers
  186 // which participate in fixed calling sequences should come last.
  187 // Registers which are used as pairs must fall on an even boundary.
  188 
  189 alloc_class chunk0(R10,         R10_H,
  190                    R11,         R11_H,
  191                    R8,          R8_H,
  192                    R9,          R9_H,
  193                    R12,         R12_H,
  194                    RCX,         RCX_H,
  195                    RBX,         RBX_H,
  196                    RDI,         RDI_H,
  197                    RDX,         RDX_H,
  198                    RSI,         RSI_H,
  199                    RAX,         RAX_H,
  200                    RBP,         RBP_H,
  201                    R13,         R13_H,
  202                    R14,         R14_H,
  203                    R15,         R15_H,
  204                    R16,         R16_H,
  205                    R17,         R17_H,
  206                    R18,         R18_H,
  207                    R19,         R19_H,
  208                    R20,         R20_H,
  209                    R21,         R21_H,
  210                    R22,         R22_H,
  211                    R23,         R23_H,
  212                    R24,         R24_H,
  213                    R25,         R25_H,
  214                    R26,         R26_H,
  215                    R27,         R27_H,
  216                    R28,         R28_H,
  217                    R29,         R29_H,
  218                    R30,         R30_H,
  219                    R31,         R31_H,
  220                    RSP,         RSP_H);
  221 
  222 // XMM registers.  512-bit registers or 8 words each, labeled (a)-p.
  223 // Word a in each register holds a Float, words ab hold a Double.
  224 // The whole registers are used in SSE4.2 version intrinsics,
  225 // array copy stubs and superword operations (see UseSSE42Intrinsics,
  226 // UseXMMForArrayCopy and UseSuperword flags).
  227 // For pre EVEX enabled architectures:
  228 //      XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
  229 // For EVEX enabled architectures:
  230 //      XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
  231 //
  232 // Linux ABI:   No register preserved across function calls
  233 //              XMM0-XMM7 might hold parameters
  234 // Windows ABI: XMM6-XMM15 preserved across function calls
  235 //              XMM0-XMM3 might hold parameters
  236 
  237 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
  238 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
  239 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
  240 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
  241 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
  242 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
  243 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
  244 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
  245 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
  246 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
  247 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
  248 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
  249 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
  250 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
  251 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
  252 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
  253 
  254 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
  255 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
  256 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
  257 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
  258 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
  259 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
  260 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
  261 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
  262 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
  263 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
  264 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
  265 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
  266 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
  267 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
  268 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
  269 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
  270 
  271 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
  272 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
  273 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
  274 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
  275 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
  276 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
  277 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
  278 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
  279 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
  280 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
  281 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
  282 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
  283 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
  284 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
  285 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
  286 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
  287 
  288 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
  289 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
  290 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
  291 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
  292 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
  293 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
  294 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
  295 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
  296 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
  297 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
  298 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
  299 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
  300 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
  301 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
  302 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
  303 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
  304 
  305 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
  306 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
  307 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
  308 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
  309 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
  310 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
  311 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
  312 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
  313 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
  314 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
  315 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
  316 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
  317 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
  318 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
  319 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
  320 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
  321 
  322 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
  323 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
  324 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
  325 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
  326 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
  327 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
  328 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
  329 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
  330 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
  331 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
  332 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
  333 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
  334 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
  335 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
  336 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
  337 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
  338 
  339 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
  340 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
  341 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
  342 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
  343 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
  344 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
  345 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
  346 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
  347 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
  348 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
  349 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
  350 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
  351 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
  352 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
  353 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
  354 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
  355 
  356 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
  357 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
  358 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
  359 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
  360 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
  361 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
  362 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
  363 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
  364 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
  365 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
  366 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
  367 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
  368 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
  369 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
  370 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
  371 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
  372 
  373 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
  374 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
  375 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
  376 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
  377 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
  378 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
  379 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
  380 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
  381 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
  382 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
  383 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
  384 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
  385 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
  386 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
  387 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
  388 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
  389 
  390 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
  391 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
  392 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
  393 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
  394 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
  395 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
  396 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
  397 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
  398 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
  399 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
  400 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
  401 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
  402 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
  403 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
  404 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
  405 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
  406 
  407 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
  408 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
  409 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
  410 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
  411 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
  412 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
  413 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
  414 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
  415 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
  416 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
  417 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
  418 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
  419 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
  420 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
  421 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
  422 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
  423 
  424 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
  425 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
  426 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
  427 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
  428 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
  429 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
  430 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
  431 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
  432 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
  433 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
  434 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
  435 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
  436 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
  437 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
  438 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
  439 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
  440 
  441 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
  442 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
  443 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
  444 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
  445 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
  446 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
  447 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
  448 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
  449 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
  450 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
  451 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
  452 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
  453 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
  454 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
  455 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
  456 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
  457 
  458 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
  459 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
  460 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
  461 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
  462 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
  463 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
  464 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
  465 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
  466 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
  467 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
  468 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
  469 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
  470 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
  471 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
  472 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
  473 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
  474 
  475 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
  476 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
  477 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
  478 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
  479 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
  480 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
  481 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
  482 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
  483 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
  484 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
  485 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
  486 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
  487 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
  488 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
  489 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
  490 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
  491 
  492 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
  493 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
  494 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
  495 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
  496 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
  497 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
  498 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
  499 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
  500 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
  501 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
  502 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
  503 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
  504 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
  505 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
  506 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
  507 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
  508 
  509 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
  510 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
  511 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
  512 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
  513 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
  514 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
  515 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
  516 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
  517 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
  518 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
  519 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
  520 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
  521 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
  522 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
  523 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
  524 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
  525 
  526 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
  527 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
  528 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
  529 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
  530 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
  531 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
  532 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
  533 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
  534 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
  535 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
  536 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
  537 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
  538 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
  539 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
  540 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
  541 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
  542 
  543 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
  544 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
  545 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
  546 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
  547 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
  548 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
  549 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
  550 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
  551 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
  552 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
  553 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
  554 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
  555 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
  556 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
  557 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
  558 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
  559 
  560 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
  561 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
  562 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
  563 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
  564 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
  565 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
  566 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
  567 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
  568 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
  569 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
  570 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
  571 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
  572 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
  573 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
  574 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
  575 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
  576 
  577 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
  578 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
  579 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
  580 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
  581 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
  582 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
  583 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
  584 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
  585 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
  586 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
  587 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
  588 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
  589 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
  590 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
  591 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
  592 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
  593 
  594 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
  595 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
  596 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
  597 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
  598 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
  599 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
  600 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
  601 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
  602 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
  603 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
  604 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
  605 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
  606 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
  607 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
  608 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
  609 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
  610 
  611 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
  612 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
  613 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
  614 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
  615 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
  616 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
  617 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
  618 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
  619 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
  620 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
  621 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
  622 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
  623 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
  624 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
  625 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
  626 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
  627 
  628 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
  629 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
  630 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
  631 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
  632 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
  633 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
  634 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
  635 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
  636 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
  637 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
  638 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
  639 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
  640 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
  641 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
  642 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
  643 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
  644 
  645 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
  646 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
  647 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
  648 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
  649 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
  650 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
  651 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
  652 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
  653 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
  654 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
  655 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
  656 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
  657 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
  658 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
  659 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
  660 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
  661 
  662 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
  663 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
  664 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
  665 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
  666 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
  667 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
  668 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
  669 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
  670 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
  671 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
  672 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
  673 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
  674 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
  675 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
  676 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
  677 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
  678 
  679 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
  680 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
  681 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
  682 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
  683 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
  684 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
  685 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
  686 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
  687 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
  688 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
  689 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
  690 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
  691 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
  692 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
  693 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
  694 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
  695 
  696 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
  697 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
  698 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
  699 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
  700 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
  701 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
  702 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
  703 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
  704 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
  705 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
  706 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
  707 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
  708 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
  709 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
  710 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
  711 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
  712 
  713 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
  714 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
  715 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
  716 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
  717 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
  718 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
  719 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
  720 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
  721 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
  722 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
  723 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
  724 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
  725 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
  726 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
  727 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
  728 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
  729 
  730 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
  731 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
  732 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
  733 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
  734 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
  735 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
  736 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
  737 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
  738 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
  739 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
  740 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
  741 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
  742 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
  743 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
  744 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
  745 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
  746 
  747 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
  748 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
  749 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
  750 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
  751 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
  752 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
  753 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
  754 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
  755 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
  756 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
  757 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
  758 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
  759 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
  760 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
  761 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
  762 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
  763 
  764 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
  765 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
  766 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
  767 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
  768 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
  769 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
  770 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
  771 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
  772 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
  773 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
  774 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
  775 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
  776 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
  777 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
  778 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
  779 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
  780 
  781 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
  782 
  783 // AVX3 Mask Registers.
  784 reg_def K1   (SOC, SOC, Op_RegI,  1, k1->as_VMReg());
  785 reg_def K1_H (SOC, SOC, Op_RegI,  1, k1->as_VMReg()->next());
  786 
  787 reg_def K2   (SOC, SOC, Op_RegI,  2, k2->as_VMReg());
  788 reg_def K2_H (SOC, SOC, Op_RegI,  2, k2->as_VMReg()->next());
  789 
  790 reg_def K3   (SOC, SOC, Op_RegI,  3, k3->as_VMReg());
  791 reg_def K3_H (SOC, SOC, Op_RegI,  3, k3->as_VMReg()->next());
  792 
  793 reg_def K4   (SOC, SOC, Op_RegI,  4, k4->as_VMReg());
  794 reg_def K4_H (SOC, SOC, Op_RegI,  4, k4->as_VMReg()->next());
  795 
  796 reg_def K5   (SOC, SOC, Op_RegI,  5, k5->as_VMReg());
  797 reg_def K5_H (SOC, SOC, Op_RegI,  5, k5->as_VMReg()->next());
  798 
  799 reg_def K6   (SOC, SOC, Op_RegI,  6, k6->as_VMReg());
  800 reg_def K6_H (SOC, SOC, Op_RegI,  6, k6->as_VMReg()->next());
  801 
  802 reg_def K7   (SOC, SOC, Op_RegI,  7, k7->as_VMReg());
  803 reg_def K7_H (SOC, SOC, Op_RegI,  7, k7->as_VMReg()->next());
  804 
  805 
  806 //----------Architecture Description Register Classes--------------------------
  807 // Several register classes are automatically defined based upon information in
  808 // this architecture description.
  809 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  810 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  811 //
  812 
  813 // Empty register class.
  814 reg_class no_reg();
  815 
  816 // Class for all pointer/long registers including APX extended GPRs.
  817 reg_class all_reg(RAX, RAX_H,
  818                   RDX, RDX_H,
  819                   RBP, RBP_H,
  820                   RDI, RDI_H,
  821                   RSI, RSI_H,
  822                   RCX, RCX_H,
  823                   RBX, RBX_H,
  824                   RSP, RSP_H,
  825                   R8,  R8_H,
  826                   R9,  R9_H,
  827                   R10, R10_H,
  828                   R11, R11_H,
  829                   R12, R12_H,
  830                   R13, R13_H,
  831                   R14, R14_H,
  832                   R15, R15_H,
  833                   R16, R16_H,
  834                   R17, R17_H,
  835                   R18, R18_H,
  836                   R19, R19_H,
  837                   R20, R20_H,
  838                   R21, R21_H,
  839                   R22, R22_H,
  840                   R23, R23_H,
  841                   R24, R24_H,
  842                   R25, R25_H,
  843                   R26, R26_H,
  844                   R27, R27_H,
  845                   R28, R28_H,
  846                   R29, R29_H,
  847                   R30, R30_H,
  848                   R31, R31_H);
  849 
  850 // Class for all int registers including APX extended GPRs.
  851 reg_class all_int_reg(RAX
  852                       RDX,
  853                       RBP,
  854                       RDI,
  855                       RSI,
  856                       RCX,
  857                       RBX,
  858                       R8,
  859                       R9,
  860                       R10,
  861                       R11,
  862                       R12,
  863                       R13,
  864                       R14,
  865                       R16,
  866                       R17,
  867                       R18,
  868                       R19,
  869                       R20,
  870                       R21,
  871                       R22,
  872                       R23,
  873                       R24,
  874                       R25,
  875                       R26,
  876                       R27,
  877                       R28,
  878                       R29,
  879                       R30,
  880                       R31);
  881 
  882 // Class for all pointer registers
  883 reg_class any_reg %{
  884   return _ANY_REG_mask;
  885 %}
  886 
  887 // Class for all pointer registers (excluding RSP)
  888 reg_class ptr_reg %{
  889   return _PTR_REG_mask;
  890 %}
  891 
  892 // Class for all pointer registers (excluding RSP and RBP)
  893 reg_class ptr_reg_no_rbp %{
  894   return _PTR_REG_NO_RBP_mask;
  895 %}
  896 
  897 // Class for all pointer registers (excluding RAX and RSP)
  898 reg_class ptr_no_rax_reg %{
  899   return _PTR_NO_RAX_REG_mask;
  900 %}
  901 
  902 // Class for all pointer registers (excluding RAX, RBX, and RSP)
  903 reg_class ptr_no_rax_rbx_reg %{
  904   return _PTR_NO_RAX_RBX_REG_mask;
  905 %}
  906 
  907 // Class for all long registers (excluding RSP)
  908 reg_class long_reg %{
  909   return _LONG_REG_mask;
  910 %}
  911 
  912 // Class for all long registers (excluding RAX, RDX and RSP)
  913 reg_class long_no_rax_rdx_reg %{
  914   return _LONG_NO_RAX_RDX_REG_mask;
  915 %}
  916 
  917 // Class for all long registers (excluding RCX and RSP)
  918 reg_class long_no_rcx_reg %{
  919   return _LONG_NO_RCX_REG_mask;
  920 %}
  921 
  922 // Class for all long registers (excluding RBP and R13)
  923 reg_class long_no_rbp_r13_reg %{
  924   return _LONG_NO_RBP_R13_REG_mask;
  925 %}
  926 
  927 // Class for all int registers (excluding RSP)
  928 reg_class int_reg %{
  929   return _INT_REG_mask;
  930 %}
  931 
  932 // Class for all int registers (excluding RAX, RDX, and RSP)
  933 reg_class int_no_rax_rdx_reg %{
  934   return _INT_NO_RAX_RDX_REG_mask;
  935 %}
  936 
  937 // Class for all int registers (excluding RCX and RSP)
  938 reg_class int_no_rcx_reg %{
  939   return _INT_NO_RCX_REG_mask;
  940 %}
  941 
  942 // Class for all int registers (excluding RBP and R13)
  943 reg_class int_no_rbp_r13_reg %{
  944   return _INT_NO_RBP_R13_REG_mask;
  945 %}
  946 
  947 // Singleton class for RAX pointer register
  948 reg_class ptr_rax_reg(RAX, RAX_H);
  949 
  950 // Singleton class for RBX pointer register
  951 reg_class ptr_rbx_reg(RBX, RBX_H);
  952 
  953 // Singleton class for RSI pointer register
  954 reg_class ptr_rsi_reg(RSI, RSI_H);
  955 
  956 // Singleton class for RBP pointer register
  957 reg_class ptr_rbp_reg(RBP, RBP_H);
  958 
  959 // Singleton class for RDI pointer register
  960 reg_class ptr_rdi_reg(RDI, RDI_H);
  961 
  962 // Singleton class for stack pointer
  963 reg_class ptr_rsp_reg(RSP, RSP_H);
  964 
  965 // Singleton class for TLS pointer
  966 reg_class ptr_r15_reg(R15, R15_H);
  967 
  968 // Singleton class for RAX long register
  969 reg_class long_rax_reg(RAX, RAX_H);
  970 
  971 // Singleton class for RCX long register
  972 reg_class long_rcx_reg(RCX, RCX_H);
  973 
  974 // Singleton class for RDX long register
  975 reg_class long_rdx_reg(RDX, RDX_H);
  976 
  977 // Singleton class for R11 long register
  978 reg_class long_r11_reg(R11, R11_H);
  979 
  980 // Singleton class for RAX int register
  981 reg_class int_rax_reg(RAX);
  982 
  983 // Singleton class for RBX int register
  984 reg_class int_rbx_reg(RBX);
  985 
  986 // Singleton class for RCX int register
  987 reg_class int_rcx_reg(RCX);
  988 
  989 // Singleton class for RDX int register
  990 reg_class int_rdx_reg(RDX);
  991 
  992 // Singleton class for RDI int register
  993 reg_class int_rdi_reg(RDI);
  994 
  995 // Singleton class for instruction pointer
  996 // reg_class ip_reg(RIP);
  997 
  998 alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
  999                    XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1000                    XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1001                    XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1002                    XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1003                    XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1004                    XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1005                    XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1006                    XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1007                    XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1008                    XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1009                    XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1010                    XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1011                    XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1012                    XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1013                    XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1014                    XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1015                    XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1016                    XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1017                    XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1018                    XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1019                    XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1020                    XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1021                    XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1022                    XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1023                    XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1024                    XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1025                    XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1026                    XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1027                    XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1028                    XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1029                    XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1030 
 1031 alloc_class chunk2(K7, K7_H,
 1032                    K6, K6_H,
 1033                    K5, K5_H,
 1034                    K4, K4_H,
 1035                    K3, K3_H,
 1036                    K2, K2_H,
 1037                    K1, K1_H);
 1038 
 1039 reg_class  vectmask_reg(K1, K1_H,
 1040                         K2, K2_H,
 1041                         K3, K3_H,
 1042                         K4, K4_H,
 1043                         K5, K5_H,
 1044                         K6, K6_H,
 1045                         K7, K7_H);
 1046 
 1047 reg_class vectmask_reg_K1(K1, K1_H);
 1048 reg_class vectmask_reg_K2(K2, K2_H);
 1049 reg_class vectmask_reg_K3(K3, K3_H);
 1050 reg_class vectmask_reg_K4(K4, K4_H);
 1051 reg_class vectmask_reg_K5(K5, K5_H);
 1052 reg_class vectmask_reg_K6(K6, K6_H);
 1053 reg_class vectmask_reg_K7(K7, K7_H);
 1054 
 1055 // flags allocation class should be last.
 1056 alloc_class chunk3(RFLAGS);
 1057 
 1058 // Singleton class for condition codes
 1059 reg_class int_flags(RFLAGS);
 1060 
 1061 // Class for pre evex float registers
 1062 reg_class float_reg_legacy(XMM0,
 1063                     XMM1,
 1064                     XMM2,
 1065                     XMM3,
 1066                     XMM4,
 1067                     XMM5,
 1068                     XMM6,
 1069                     XMM7,
 1070                     XMM8,
 1071                     XMM9,
 1072                     XMM10,
 1073                     XMM11,
 1074                     XMM12,
 1075                     XMM13,
 1076                     XMM14,
 1077                     XMM15);
 1078 
 1079 // Class for evex float registers
 1080 reg_class float_reg_evex(XMM0,
 1081                     XMM1,
 1082                     XMM2,
 1083                     XMM3,
 1084                     XMM4,
 1085                     XMM5,
 1086                     XMM6,
 1087                     XMM7,
 1088                     XMM8,
 1089                     XMM9,
 1090                     XMM10,
 1091                     XMM11,
 1092                     XMM12,
 1093                     XMM13,
 1094                     XMM14,
 1095                     XMM15,
 1096                     XMM16,
 1097                     XMM17,
 1098                     XMM18,
 1099                     XMM19,
 1100                     XMM20,
 1101                     XMM21,
 1102                     XMM22,
 1103                     XMM23,
 1104                     XMM24,
 1105                     XMM25,
 1106                     XMM26,
 1107                     XMM27,
 1108                     XMM28,
 1109                     XMM29,
 1110                     XMM30,
 1111                     XMM31);
 1112 
 1113 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
 1114 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1115 
 1116 // Class for pre evex double registers
 1117 reg_class double_reg_legacy(XMM0,  XMM0b,
 1118                      XMM1,  XMM1b,
 1119                      XMM2,  XMM2b,
 1120                      XMM3,  XMM3b,
 1121                      XMM4,  XMM4b,
 1122                      XMM5,  XMM5b,
 1123                      XMM6,  XMM6b,
 1124                      XMM7,  XMM7b,
 1125                      XMM8,  XMM8b,
 1126                      XMM9,  XMM9b,
 1127                      XMM10, XMM10b,
 1128                      XMM11, XMM11b,
 1129                      XMM12, XMM12b,
 1130                      XMM13, XMM13b,
 1131                      XMM14, XMM14b,
 1132                      XMM15, XMM15b);
 1133 
 1134 // Class for evex double registers
 1135 reg_class double_reg_evex(XMM0,  XMM0b,
 1136                      XMM1,  XMM1b,
 1137                      XMM2,  XMM2b,
 1138                      XMM3,  XMM3b,
 1139                      XMM4,  XMM4b,
 1140                      XMM5,  XMM5b,
 1141                      XMM6,  XMM6b,
 1142                      XMM7,  XMM7b,
 1143                      XMM8,  XMM8b,
 1144                      XMM9,  XMM9b,
 1145                      XMM10, XMM10b,
 1146                      XMM11, XMM11b,
 1147                      XMM12, XMM12b,
 1148                      XMM13, XMM13b,
 1149                      XMM14, XMM14b,
 1150                      XMM15, XMM15b,
 1151                      XMM16, XMM16b,
 1152                      XMM17, XMM17b,
 1153                      XMM18, XMM18b,
 1154                      XMM19, XMM19b,
 1155                      XMM20, XMM20b,
 1156                      XMM21, XMM21b,
 1157                      XMM22, XMM22b,
 1158                      XMM23, XMM23b,
 1159                      XMM24, XMM24b,
 1160                      XMM25, XMM25b,
 1161                      XMM26, XMM26b,
 1162                      XMM27, XMM27b,
 1163                      XMM28, XMM28b,
 1164                      XMM29, XMM29b,
 1165                      XMM30, XMM30b,
 1166                      XMM31, XMM31b);
 1167 
 1168 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
 1169 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1170 
 1171 // Class for pre evex 32bit vector registers
 1172 reg_class vectors_reg_legacy(XMM0,
 1173                       XMM1,
 1174                       XMM2,
 1175                       XMM3,
 1176                       XMM4,
 1177                       XMM5,
 1178                       XMM6,
 1179                       XMM7,
 1180                       XMM8,
 1181                       XMM9,
 1182                       XMM10,
 1183                       XMM11,
 1184                       XMM12,
 1185                       XMM13,
 1186                       XMM14,
 1187                       XMM15);
 1188 
 1189 // Class for evex 32bit vector registers
 1190 reg_class vectors_reg_evex(XMM0,
 1191                       XMM1,
 1192                       XMM2,
 1193                       XMM3,
 1194                       XMM4,
 1195                       XMM5,
 1196                       XMM6,
 1197                       XMM7,
 1198                       XMM8,
 1199                       XMM9,
 1200                       XMM10,
 1201                       XMM11,
 1202                       XMM12,
 1203                       XMM13,
 1204                       XMM14,
 1205                       XMM15,
 1206                       XMM16,
 1207                       XMM17,
 1208                       XMM18,
 1209                       XMM19,
 1210                       XMM20,
 1211                       XMM21,
 1212                       XMM22,
 1213                       XMM23,
 1214                       XMM24,
 1215                       XMM25,
 1216                       XMM26,
 1217                       XMM27,
 1218                       XMM28,
 1219                       XMM29,
 1220                       XMM30,
 1221                       XMM31);
 1222 
 1223 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
 1224 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1225 
 1226 // Class for all 64bit vector registers
 1227 reg_class vectord_reg_legacy(XMM0,  XMM0b,
 1228                       XMM1,  XMM1b,
 1229                       XMM2,  XMM2b,
 1230                       XMM3,  XMM3b,
 1231                       XMM4,  XMM4b,
 1232                       XMM5,  XMM5b,
 1233                       XMM6,  XMM6b,
 1234                       XMM7,  XMM7b,
 1235                       XMM8,  XMM8b,
 1236                       XMM9,  XMM9b,
 1237                       XMM10, XMM10b,
 1238                       XMM11, XMM11b,
 1239                       XMM12, XMM12b,
 1240                       XMM13, XMM13b,
 1241                       XMM14, XMM14b,
 1242                       XMM15, XMM15b);
 1243 
 1244 // Class for all 64bit vector registers
 1245 reg_class vectord_reg_evex(XMM0,  XMM0b,
 1246                       XMM1,  XMM1b,
 1247                       XMM2,  XMM2b,
 1248                       XMM3,  XMM3b,
 1249                       XMM4,  XMM4b,
 1250                       XMM5,  XMM5b,
 1251                       XMM6,  XMM6b,
 1252                       XMM7,  XMM7b,
 1253                       XMM8,  XMM8b,
 1254                       XMM9,  XMM9b,
 1255                       XMM10, XMM10b,
 1256                       XMM11, XMM11b,
 1257                       XMM12, XMM12b,
 1258                       XMM13, XMM13b,
 1259                       XMM14, XMM14b,
 1260                       XMM15, XMM15b,
 1261                       XMM16, XMM16b,
 1262                       XMM17, XMM17b,
 1263                       XMM18, XMM18b,
 1264                       XMM19, XMM19b,
 1265                       XMM20, XMM20b,
 1266                       XMM21, XMM21b,
 1267                       XMM22, XMM22b,
 1268                       XMM23, XMM23b,
 1269                       XMM24, XMM24b,
 1270                       XMM25, XMM25b,
 1271                       XMM26, XMM26b,
 1272                       XMM27, XMM27b,
 1273                       XMM28, XMM28b,
 1274                       XMM29, XMM29b,
 1275                       XMM30, XMM30b,
 1276                       XMM31, XMM31b);
 1277 
 1278 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
 1279 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1280 
 1281 // Class for all 128bit vector registers
 1282 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1283                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1284                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1285                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1286                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1287                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1288                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1289                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1290                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1291                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1292                       XMM10, XMM10b, XMM10c, XMM10d,
 1293                       XMM11, XMM11b, XMM11c, XMM11d,
 1294                       XMM12, XMM12b, XMM12c, XMM12d,
 1295                       XMM13, XMM13b, XMM13c, XMM13d,
 1296                       XMM14, XMM14b, XMM14c, XMM14d,
 1297                       XMM15, XMM15b, XMM15c, XMM15d);
 1298 
 1299 // Class for all 128bit vector registers
 1300 reg_class vectorx_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,
 1301                       XMM1,  XMM1b,  XMM1c,  XMM1d,
 1302                       XMM2,  XMM2b,  XMM2c,  XMM2d,
 1303                       XMM3,  XMM3b,  XMM3c,  XMM3d,
 1304                       XMM4,  XMM4b,  XMM4c,  XMM4d,
 1305                       XMM5,  XMM5b,  XMM5c,  XMM5d,
 1306                       XMM6,  XMM6b,  XMM6c,  XMM6d,
 1307                       XMM7,  XMM7b,  XMM7c,  XMM7d,
 1308                       XMM8,  XMM8b,  XMM8c,  XMM8d,
 1309                       XMM9,  XMM9b,  XMM9c,  XMM9d,
 1310                       XMM10, XMM10b, XMM10c, XMM10d,
 1311                       XMM11, XMM11b, XMM11c, XMM11d,
 1312                       XMM12, XMM12b, XMM12c, XMM12d,
 1313                       XMM13, XMM13b, XMM13c, XMM13d,
 1314                       XMM14, XMM14b, XMM14c, XMM14d,
 1315                       XMM15, XMM15b, XMM15c, XMM15d,
 1316                       XMM16, XMM16b, XMM16c, XMM16d,
 1317                       XMM17, XMM17b, XMM17c, XMM17d,
 1318                       XMM18, XMM18b, XMM18c, XMM18d,
 1319                       XMM19, XMM19b, XMM19c, XMM19d,
 1320                       XMM20, XMM20b, XMM20c, XMM20d,
 1321                       XMM21, XMM21b, XMM21c, XMM21d,
 1322                       XMM22, XMM22b, XMM22c, XMM22d,
 1323                       XMM23, XMM23b, XMM23c, XMM23d,
 1324                       XMM24, XMM24b, XMM24c, XMM24d,
 1325                       XMM25, XMM25b, XMM25c, XMM25d,
 1326                       XMM26, XMM26b, XMM26c, XMM26d,
 1327                       XMM27, XMM27b, XMM27c, XMM27d,
 1328                       XMM28, XMM28b, XMM28c, XMM28d,
 1329                       XMM29, XMM29b, XMM29c, XMM29d,
 1330                       XMM30, XMM30b, XMM30c, XMM30d,
 1331                       XMM31, XMM31b, XMM31c, XMM31d);
 1332 
 1333 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
 1334 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1335 
 1336 // Class for all 256bit vector registers
 1337 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1338                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1339                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1340                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1341                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1342                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1343                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1344                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1345                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1346                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1347                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1348                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1349                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1350                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1351                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1352                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h);
 1353 
 1354 // Class for all 256bit vector registers
 1355 reg_class vectory_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
 1356                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
 1357                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
 1358                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
 1359                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
 1360                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
 1361                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
 1362                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,
 1363                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
 1364                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
 1365                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
 1366                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
 1367                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
 1368                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
 1369                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
 1370                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
 1371                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
 1372                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
 1373                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
 1374                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
 1375                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
 1376                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
 1377                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
 1378                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
 1379                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
 1380                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
 1381                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
 1382                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
 1383                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
 1384                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
 1385                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
 1386                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h);
 1387 
 1388 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
 1389 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 1390 
 1391 // Class for all 512bit vector registers
 1392 reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1393                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1394                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1395                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1396                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1397                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1398                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1399                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1400                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1401                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1402                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1403                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1404                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1405                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1406                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1407                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p,
 1408                       XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
 1409                       XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
 1410                       XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
 1411                       XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
 1412                       XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
 1413                       XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
 1414                       XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
 1415                       XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
 1416                       XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
 1417                       XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
 1418                       XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
 1419                       XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
 1420                       XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
 1421                       XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
 1422                       XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
 1423                       XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p);
 1424 
 1425 // Class for restricted 512bit vector registers
 1426 reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
 1427                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
 1428                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
 1429                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
 1430                       XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
 1431                       XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
 1432                       XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
 1433                       XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p,
 1434                       XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
 1435                       XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
 1436                       XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
 1437                       XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
 1438                       XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
 1439                       XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
 1440                       XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
 1441                       XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p);
 1442 
 1443 reg_class_dynamic vectorz_reg   (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
 1444 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 1445 
 1446 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 1447 
 1448 %}
 1449 
 1450 
 1451 //----------SOURCE BLOCK-------------------------------------------------------
 1452 // This is a block of C++ code which provides values, functions, and
 1453 // definitions necessary in the rest of the architecture description
 1454 
 1455 source_hpp %{
 1456 
 1457 #include "peephole_x86_64.hpp"
 1458 
 1459 bool castLL_is_imm32(const Node* n);
 1460 
 1461 %}
 1462 
 1463 source %{
 1464 
 1465 bool castLL_is_imm32(const Node* n) {
 1466   assert(n->is_CastLL(), "must be a CastLL");
 1467   const TypeLong* t = n->bottom_type()->is_long();
 1468   return (t->_lo == min_jlong || Assembler::is_simm32(t->_lo)) && (t->_hi == max_jlong || Assembler::is_simm32(t->_hi));
 1469 }
 1470 
 1471 %}
 1472 
 1473 // Register masks
 1474 source_hpp %{
 1475 
 1476 extern RegMask _ANY_REG_mask;
 1477 extern RegMask _PTR_REG_mask;
 1478 extern RegMask _PTR_REG_NO_RBP_mask;
 1479 extern RegMask _PTR_NO_RAX_REG_mask;
 1480 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
 1481 extern RegMask _LONG_REG_mask;
 1482 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
 1483 extern RegMask _LONG_NO_RCX_REG_mask;
 1484 extern RegMask _LONG_NO_RBP_R13_REG_mask;
 1485 extern RegMask _INT_REG_mask;
 1486 extern RegMask _INT_NO_RAX_RDX_REG_mask;
 1487 extern RegMask _INT_NO_RCX_REG_mask;
 1488 extern RegMask _INT_NO_RBP_R13_REG_mask;
 1489 extern RegMask _FLOAT_REG_mask;
 1490 
 1491 extern RegMask _STACK_OR_PTR_REG_mask;
 1492 extern RegMask _STACK_OR_LONG_REG_mask;
 1493 extern RegMask _STACK_OR_INT_REG_mask;
 1494 
 1495 inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
 1496 inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
 1497 inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }
 1498 
 1499 %}
 1500 
 1501 source %{
 1502 #define   RELOC_IMM64    Assembler::imm_operand
 1503 #define   RELOC_DISP32   Assembler::disp32_operand
 1504 
 1505 #define __ masm->
 1506 
 1507 RegMask _ANY_REG_mask;
 1508 RegMask _PTR_REG_mask;
 1509 RegMask _PTR_REG_NO_RBP_mask;
 1510 RegMask _PTR_NO_RAX_REG_mask;
 1511 RegMask _PTR_NO_RAX_RBX_REG_mask;
 1512 RegMask _LONG_REG_mask;
 1513 RegMask _LONG_NO_RAX_RDX_REG_mask;
 1514 RegMask _LONG_NO_RCX_REG_mask;
 1515 RegMask _LONG_NO_RBP_R13_REG_mask;
 1516 RegMask _INT_REG_mask;
 1517 RegMask _INT_NO_RAX_RDX_REG_mask;
 1518 RegMask _INT_NO_RCX_REG_mask;
 1519 RegMask _INT_NO_RBP_R13_REG_mask;
 1520 RegMask _FLOAT_REG_mask;
 1521 RegMask _STACK_OR_PTR_REG_mask;
 1522 RegMask _STACK_OR_LONG_REG_mask;
 1523 RegMask _STACK_OR_INT_REG_mask;
 1524 
 1525 static bool need_r12_heapbase() {
 1526   return UseCompressedOops;
 1527 }
 1528 
 1529 void reg_mask_init() {
 1530   constexpr Register egprs[] = {r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31};
 1531 
 1532   // _ALL_REG_mask is generated by adlc from the all_reg register class below.
 1533   // We derive a number of subsets from it.
 1534   _ANY_REG_mask.assignFrom(_ALL_REG_mask);
 1535 
 1536   if (PreserveFramePointer) {
 1537     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1538     _ANY_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1539   }
 1540   if (need_r12_heapbase()) {
 1541     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1542     _ANY_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
 1543   }
 1544 
 1545   _PTR_REG_mask.assignFrom(_ANY_REG_mask);
 1546   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
 1547   _PTR_REG_mask.remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
 1548   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()));
 1549   _PTR_REG_mask.remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));
 1550   if (!UseAPX) {
 1551     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1552       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1553       _PTR_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()->next()));
 1554     }
 1555   }
 1556 
 1557   _STACK_OR_PTR_REG_mask.assignFrom(_PTR_REG_mask);
 1558   _STACK_OR_PTR_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1559 
 1560   _PTR_REG_NO_RBP_mask.assignFrom(_PTR_REG_mask);
 1561   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1562   _PTR_REG_NO_RBP_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1563 
 1564   _PTR_NO_RAX_REG_mask.assignFrom(_PTR_REG_mask);
 1565   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1566   _PTR_NO_RAX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1567 
 1568   _PTR_NO_RAX_RBX_REG_mask.assignFrom(_PTR_NO_RAX_REG_mask);
 1569   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
 1570   _PTR_NO_RAX_RBX_REG_mask.remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));
 1571 
 1572 
 1573   _LONG_REG_mask.assignFrom(_PTR_REG_mask);
 1574   _STACK_OR_LONG_REG_mask.assignFrom(_LONG_REG_mask);
 1575   _STACK_OR_LONG_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1576 
 1577   _LONG_NO_RAX_RDX_REG_mask.assignFrom(_LONG_REG_mask);
 1578   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1579   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
 1580   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1581   _LONG_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));
 1582 
 1583   _LONG_NO_RCX_REG_mask.assignFrom(_LONG_REG_mask);
 1584   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1585   _LONG_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));
 1586 
 1587   _LONG_NO_RBP_R13_REG_mask.assignFrom(_LONG_REG_mask);
 1588   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1589   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
 1590   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1591   _LONG_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));
 1592 
 1593   _INT_REG_mask.assignFrom(_ALL_INT_REG_mask);
 1594   if (!UseAPX) {
 1595     for (uint i = 0; i < sizeof(egprs)/sizeof(Register); i++) {
 1596       _INT_REG_mask.remove(OptoReg::as_OptoReg(egprs[i]->as_VMReg()));
 1597     }
 1598   }
 1599 
 1600   if (PreserveFramePointer) {
 1601     _INT_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1602   }
 1603   if (need_r12_heapbase()) {
 1604     _INT_REG_mask.remove(OptoReg::as_OptoReg(r12->as_VMReg()));
 1605   }
 1606 
 1607   _STACK_OR_INT_REG_mask.assignFrom(_INT_REG_mask);
 1608   _STACK_OR_INT_REG_mask.or_with(STACK_OR_STACK_SLOTS_mask());
 1609 
 1610   _INT_NO_RAX_RDX_REG_mask.assignFrom(_INT_REG_mask);
 1611   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rax->as_VMReg()));
 1612   _INT_NO_RAX_RDX_REG_mask.remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
 1613 
 1614   _INT_NO_RCX_REG_mask.assignFrom(_INT_REG_mask);
 1615   _INT_NO_RCX_REG_mask.remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
 1616 
 1617   _INT_NO_RBP_R13_REG_mask.assignFrom(_INT_REG_mask);
 1618   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
 1619   _INT_NO_RBP_R13_REG_mask.remove(OptoReg::as_OptoReg(r13->as_VMReg()));
 1620 
 1621   // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
 1622   // from the float_reg_legacy/float_reg_evex register class.
 1623   _FLOAT_REG_mask.assignFrom(VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask);
 1624 }
 1625 
 1626 static bool generate_vzeroupper(Compile* C) {
 1627   return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
 1628 }
 1629 
 1630 static int clear_avx_size() {
 1631   return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
 1632 }
 1633 
 1634 // !!!!! Special hack to get all types of calls to specify the byte offset
 1635 //       from the start of the call to the point where the return address
 1636 //       will point.
 1637 int MachCallStaticJavaNode::ret_addr_offset()
 1638 {
 1639   int offset = 5; // 5 bytes from start of call to where return address points
 1640   offset += clear_avx_size();
 1641   return offset;
 1642 }
 1643 
 1644 int MachCallDynamicJavaNode::ret_addr_offset()
 1645 {
 1646   int offset = 15; // 15 bytes from start of call to where return address points
 1647   offset += clear_avx_size();
 1648   return offset;
 1649 }
 1650 
 1651 int MachCallRuntimeNode::ret_addr_offset() {
 1652   int offset = 13; // movq r10,#addr; callq (r10)
 1653   if (this->ideal_Opcode() != Op_CallLeafVector) {
 1654     offset += clear_avx_size();
 1655   }
 1656   return offset;
 1657 }
 1658 //
 1659 // Compute padding required for nodes which need alignment
 1660 //
 1661 
 1662 // The address of the call instruction needs to be 4-byte aligned to
 1663 // ensure that it does not span a cache line so that it can be patched.
 1664 int CallStaticJavaDirectNode::compute_padding(int current_offset) const
 1665 {
 1666   current_offset += clear_avx_size(); // skip vzeroupper
 1667   current_offset += 1; // skip call opcode byte
 1668   return align_up(current_offset, alignment_required()) - current_offset;
 1669 }
 1670 
 1671 // The address of the call instruction needs to be 4-byte aligned to
 1672 // ensure that it does not span a cache line so that it can be patched.
 1673 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 1674 {
 1675   current_offset += clear_avx_size(); // skip vzeroupper
 1676   current_offset += 11; // skip movq instruction + call opcode byte
 1677   return align_up(current_offset, alignment_required()) - current_offset;
 1678 }
 1679 
 1680 // This could be in MacroAssembler but it's fairly C2 specific
 1681 static void emit_cmpfp_fixup(MacroAssembler* masm) {
 1682   Label exit;
 1683   __ jccb(Assembler::noParity, exit);
 1684   __ pushf();
 1685   //
 1686   // comiss/ucomiss instructions set ZF,PF,CF flags and
 1687   // zero OF,AF,SF for NaN values.
 1688   // Fixup flags by zeroing ZF,PF so that compare of NaN
 1689   // values returns 'less than' result (CF is set).
 1690   // Leave the rest of flags unchanged.
 1691   //
 1692   //    7 6 5 4 3 2 1 0
 1693   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 1694   //    0 0 1 0 1 0 1 1   (0x2B)
 1695   //
 1696   __ andq(Address(rsp, 0), 0xffffff2b);
 1697   __ popf();
 1698   __ bind(exit);
 1699 }
 1700 
 1701 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
 1702   // If any floating point comparison instruction is used, unordered case always triggers jump
 1703   // for below condition, CF=1 is true when at least one input is NaN
 1704   Label done;
 1705   __ movl(dst, -1);
 1706   __ jcc(Assembler::below, done);
 1707   __ setcc(Assembler::notEqual, dst);
 1708   __ bind(done);
 1709 }
 1710 
 1711 enum FP_PREC {
 1712   fp_prec_hlf,
 1713   fp_prec_flt,
 1714   fp_prec_dbl
 1715 };
 1716 
 1717 static inline void emit_fp_ucom(MacroAssembler* masm, enum FP_PREC pt,
 1718                                 XMMRegister p, XMMRegister q) {
 1719   if (pt == fp_prec_hlf) {
 1720     __ evucomish(p, q);
 1721   } else if (pt == fp_prec_flt) {
 1722     __ ucomiss(p, q);
 1723   } else {
 1724     __ ucomisd(p, q);
 1725   }
 1726 }
 1727 
 1728 static inline void movfp(MacroAssembler* masm, enum FP_PREC pt,
 1729                          XMMRegister dst, XMMRegister src, Register scratch) {
 1730   if (pt == fp_prec_hlf) {
 1731     __ movhlf(dst, src, scratch);
 1732   } else if (pt == fp_prec_flt) {
 1733     __ movflt(dst, src);
 1734   } else {
 1735     __ movdbl(dst, src);
 1736   }
 1737 }
 1738 
 1739 // Math.min()          # Math.max()
 1740 // -----------------------------
 1741 // (v)ucomis[h/s/d]    #
 1742 // ja   -> b           # a
 1743 // jp   -> NaN         # NaN
 1744 // jb   -> a           # b
 1745 // je                  #
 1746 // |-jz -> a | b       # a & b
 1747 // |    -> a           #
 1748 static void emit_fp_min_max(MacroAssembler* masm, XMMRegister dst,
 1749                             XMMRegister a, XMMRegister b,
 1750                             XMMRegister xmmt, Register rt,
 1751                             bool min, enum FP_PREC pt) {
 1752 
 1753   Label nan, zero, below, above, done;
 1754 
 1755   emit_fp_ucom(masm, pt, a, b);
 1756 
 1757   if (dst->encoding() != (min ? b : a)->encoding()) {
 1758     __ jccb(Assembler::above, above); // CF=0 & ZF=0
 1759   } else {
 1760     __ jccb(Assembler::above, done);
 1761   }
 1762 
 1763   __ jccb(Assembler::parity, nan);  // PF=1
 1764   __ jccb(Assembler::below, below); // CF=1
 1765 
 1766   // equal
 1767   __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
 1768   emit_fp_ucom(masm, pt, a, xmmt);
 1769 
 1770   __ jccb(Assembler::equal, zero);
 1771   movfp(masm, pt, dst, a, rt);
 1772 
 1773   __ jmp(done);
 1774 
 1775   __ bind(zero);
 1776   if (min) {
 1777     __ vpor(dst, a, b, Assembler::AVX_128bit);
 1778   } else {
 1779     __ vpand(dst, a, b, Assembler::AVX_128bit);
 1780   }
 1781 
 1782   __ jmp(done);
 1783 
 1784   __ bind(above);
 1785   movfp(masm, pt, dst, min ? b : a, rt);
 1786 
 1787   __ jmp(done);
 1788 
 1789   __ bind(nan);
 1790   if (pt == fp_prec_hlf) {
 1791     __ movl(rt, 0x00007e00); // Float16.NaN
 1792     __ evmovw(dst, rt);
 1793   } else if (pt == fp_prec_flt) {
 1794     __ movl(rt, 0x7fc00000); // Float.NaN
 1795     __ movdl(dst, rt);
 1796   } else {
 1797     __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
 1798     __ movdq(dst, rt);
 1799   }
 1800   __ jmp(done);
 1801 
 1802   __ bind(below);
 1803   movfp(masm, pt, dst, min ? a : b, rt);
 1804 
 1805   __ bind(done);
 1806 }
 1807 
 1808 //=============================================================================
 1809 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
 1810 
 1811 int ConstantTable::calculate_table_base_offset() const {
 1812   return 0;  // absolute addressing, no offset
 1813 }
 1814 
 1815 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 1816 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 1817   ShouldNotReachHere();
 1818 }
 1819 
 1820 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
 1821   // Empty encoding
 1822 }
 1823 
 1824 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 1825   return 0;
 1826 }
 1827 
 1828 #ifndef PRODUCT
 1829 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1830   st->print("# MachConstantBaseNode (empty encoding)");
 1831 }
 1832 #endif
 1833 
 1834 
 1835 //=============================================================================
 1836 #ifndef PRODUCT
 1837 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 1838   Compile* C = ra_->C;
 1839 
 1840   int framesize = C->output()->frame_size_in_bytes();
 1841   int bangsize = C->output()->bang_size_in_bytes();
 1842   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1843   // Remove wordSize for return addr which is already pushed.
 1844   framesize -= wordSize;
 1845 
 1846   if (C->output()->need_stack_bang(bangsize)) {
 1847     framesize -= wordSize;
 1848     st->print("# stack bang (%d bytes)", bangsize);
 1849     st->print("\n\t");
 1850     st->print("pushq   rbp\t# Save rbp");
 1851     if (PreserveFramePointer) {
 1852         st->print("\n\t");
 1853         st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1854     }
 1855     if (framesize) {
 1856       st->print("\n\t");
 1857       st->print("subq    rsp, #%d\t# Create frame",framesize);
 1858     }
 1859   } else {
 1860     st->print("subq    rsp, #%d\t# Create frame",framesize);
 1861     st->print("\n\t");
 1862     framesize -= wordSize;
 1863     st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
 1864     if (PreserveFramePointer) {
 1865       st->print("\n\t");
 1866       st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
 1867       if (framesize > 0) {
 1868         st->print("\n\t");
 1869         st->print("addq    rbp, #%d", framesize);
 1870       }
 1871     }
 1872   }
 1873 
 1874   if (VerifyStackAtCalls) {
 1875     st->print("\n\t");
 1876     framesize -= wordSize;
 1877     st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
 1878 #ifdef ASSERT
 1879     st->print("\n\t");
 1880     st->print("# stack alignment check");
 1881 #endif
 1882   }
 1883   if (C->stub_function() != nullptr) {
 1884     st->print("\n\t");
 1885     st->print("cmpl    [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
 1886     st->print("\n\t");
 1887     st->print("je      fast_entry\t");
 1888     st->print("\n\t");
 1889     st->print("call    #nmethod_entry_barrier_stub\t");
 1890     st->print("\n\tfast_entry:");
 1891   }
 1892   st->cr();
 1893 }
 1894 #endif
 1895 
 1896 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1897   Compile* C = ra_->C;
 1898 
 1899   int framesize = C->output()->frame_size_in_bytes();
 1900   int bangsize = C->output()->bang_size_in_bytes();
 1901 
 1902   if (C->clinit_barrier_on_entry()) {
 1903     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
 1904     assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
 1905 
 1906     Label L_skip_barrier;
 1907     Register klass = rscratch1;
 1908 
 1909     __ mov_metadata(klass, C->method()->holder()->constant_encoding());
 1910     __ clinit_barrier(klass, &L_skip_barrier /*L_fast_path*/);
 1911 
 1912     __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
 1913 
 1914     __ bind(L_skip_barrier);
 1915   }
 1916 
 1917   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
 1918 
 1919   C->output()->set_frame_complete(__ offset());
 1920 
 1921   if (C->has_mach_constant_base_node()) {
 1922     // NOTE: We set the table base offset here because users might be
 1923     // emitted before MachConstantBaseNode.
 1924     ConstantTable& constant_table = C->output()->constant_table();
 1925     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 1926   }
 1927 }
 1928 
 1929 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
 1930 {
 1931   return MachNode::size(ra_); // too many variables; just compute it
 1932                               // the hard way
 1933 }
 1934 
 1935 int MachPrologNode::reloc() const
 1936 {
 1937   return 0; // a large enough number
 1938 }
 1939 
 1940 //=============================================================================
 1941 #ifndef PRODUCT
 1942 void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 1943 {
 1944   Compile* C = ra_->C;
 1945   if (generate_vzeroupper(C)) {
 1946     st->print("vzeroupper");
 1947     st->cr(); st->print("\t");
 1948   }
 1949 
 1950   int framesize = C->output()->frame_size_in_bytes();
 1951   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1952   // Remove word for return adr already pushed
 1953   // and RBP
 1954   framesize -= 2*wordSize;
 1955 
 1956   if (framesize) {
 1957     st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
 1958     st->print("\t");
 1959   }
 1960 
 1961   st->print_cr("popq    rbp");
 1962   if (do_polling() && C->is_method_compilation()) {
 1963     st->print("\t");
 1964     st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
 1965                  "ja      #safepoint_stub\t"
 1966                  "# Safepoint: poll for GC");
 1967   }
 1968 }
 1969 #endif
 1970 
 1971 void MachEpilogNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 1972 {
 1973   Compile* C = ra_->C;
 1974 
 1975   if (generate_vzeroupper(C)) {
 1976     // Clear upper bits of YMM registers when current compiled code uses
 1977     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1978     __ vzeroupper();
 1979   }
 1980 
 1981   int framesize = C->output()->frame_size_in_bytes();
 1982   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 1983   // Remove word for return adr already pushed
 1984   // and RBP
 1985   framesize -= 2*wordSize;
 1986 
 1987   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 1988 
 1989   if (framesize) {
 1990     __ addq(rsp, framesize);
 1991   }
 1992 
 1993   __ popq(rbp);
 1994 
 1995   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 1996     __ reserved_stack_check();
 1997   }
 1998 
 1999   if (do_polling() && C->is_method_compilation()) {
 2000     Label dummy_label;
 2001     Label* code_stub = &dummy_label;
 2002     if (!C->output()->in_scratch_emit_size()) {
 2003       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
 2004       C->output()->add_stub(stub);
 2005       code_stub = &stub->entry();
 2006     }
 2007     __ relocate(relocInfo::poll_return_type);
 2008     __ safepoint_poll(*code_stub, true /* at_return */, true /* in_nmethod */);
 2009   }
 2010 }
 2011 
 2012 uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
 2013 {
 2014   return MachNode::size(ra_); // too many variables; just compute it
 2015                               // the hard way
 2016 }
 2017 
 2018 int MachEpilogNode::reloc() const
 2019 {
 2020   return 2; // a large enough number
 2021 }
 2022 
 2023 const Pipeline* MachEpilogNode::pipeline() const
 2024 {
 2025   return MachNode::pipeline_class();
 2026 }
 2027 
 2028 //=============================================================================
 2029 
 2030 enum RC {
 2031   rc_bad,
 2032   rc_int,
 2033   rc_kreg,
 2034   rc_float,
 2035   rc_stack
 2036 };
 2037 
 2038 static enum RC rc_class(OptoReg::Name reg)
 2039 {
 2040   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 2041 
 2042   if (OptoReg::is_stack(reg)) return rc_stack;
 2043 
 2044   VMReg r = OptoReg::as_VMReg(reg);
 2045 
 2046   if (r->is_Register()) return rc_int;
 2047 
 2048   if (r->is_KRegister()) return rc_kreg;
 2049 
 2050   assert(r->is_XMMRegister(), "must be");
 2051   return rc_float;
 2052 }
 2053 
 2054 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 2055 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 2056                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 2057 
 2058 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 2059                      int stack_offset, int reg, uint ireg, outputStream* st);
 2060 
 2061 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
 2062                                       int dst_offset, uint ireg, outputStream* st) {
 2063   if (masm) {
 2064     switch (ireg) {
 2065     case Op_VecS:
 2066       __ movq(Address(rsp, -8), rax);
 2067       __ movl(rax, Address(rsp, src_offset));
 2068       __ movl(Address(rsp, dst_offset), rax);
 2069       __ movq(rax, Address(rsp, -8));
 2070       break;
 2071     case Op_VecD:
 2072       __ pushq(Address(rsp, src_offset));
 2073       __ popq (Address(rsp, dst_offset));
 2074       break;
 2075     case Op_VecX:
 2076       __ pushq(Address(rsp, src_offset));
 2077       __ popq (Address(rsp, dst_offset));
 2078       __ pushq(Address(rsp, src_offset+8));
 2079       __ popq (Address(rsp, dst_offset+8));
 2080       break;
 2081     case Op_VecY:
 2082       __ vmovdqu(Address(rsp, -32), xmm0);
 2083       __ vmovdqu(xmm0, Address(rsp, src_offset));
 2084       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 2085       __ vmovdqu(xmm0, Address(rsp, -32));
 2086       break;
 2087     case Op_VecZ:
 2088       __ evmovdquq(Address(rsp, -64), xmm0, 2);
 2089       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 2090       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 2091       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 2092       break;
 2093     default:
 2094       ShouldNotReachHere();
 2095     }
 2096 #ifndef PRODUCT
 2097   } else {
 2098     switch (ireg) {
 2099     case Op_VecS:
 2100       st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2101                 "movl    rax, [rsp + #%d]\n\t"
 2102                 "movl    [rsp + #%d], rax\n\t"
 2103                 "movq    rax, [rsp - #8]",
 2104                 src_offset, dst_offset);
 2105       break;
 2106     case Op_VecD:
 2107       st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2108                 "popq    [rsp + #%d]",
 2109                 src_offset, dst_offset);
 2110       break;
 2111      case Op_VecX:
 2112       st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
 2113                 "popq    [rsp + #%d]\n\t"
 2114                 "pushq   [rsp + #%d]\n\t"
 2115                 "popq    [rsp + #%d]",
 2116                 src_offset, dst_offset, src_offset+8, dst_offset+8);
 2117       break;
 2118     case Op_VecY:
 2119       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 2120                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2121                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2122                 "vmovdqu xmm0, [rsp - #32]",
 2123                 src_offset, dst_offset);
 2124       break;
 2125     case Op_VecZ:
 2126       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 2127                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 2128                 "vmovdqu [rsp + #%d], xmm0\n\t"
 2129                 "vmovdqu xmm0, [rsp - #64]",
 2130                 src_offset, dst_offset);
 2131       break;
 2132     default:
 2133       ShouldNotReachHere();
 2134     }
 2135 #endif
 2136   }
 2137 }
 2138 
 2139 uint MachSpillCopyNode::implementation(C2_MacroAssembler* masm,
 2140                                        PhaseRegAlloc* ra_,
 2141                                        bool do_size,
 2142                                        outputStream* st) const {
 2143   assert(masm != nullptr || st  != nullptr, "sanity");
 2144   // Get registers to move
 2145   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 2146   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 2147   OptoReg::Name dst_second = ra_->get_reg_second(this);
 2148   OptoReg::Name dst_first = ra_->get_reg_first(this);
 2149 
 2150   enum RC src_second_rc = rc_class(src_second);
 2151   enum RC src_first_rc = rc_class(src_first);
 2152   enum RC dst_second_rc = rc_class(dst_second);
 2153   enum RC dst_first_rc = rc_class(dst_first);
 2154 
 2155   assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
 2156          "must move at least 1 register" );
 2157 
 2158   if (src_first == dst_first && src_second == dst_second) {
 2159     // Self copy, no move
 2160     return 0;
 2161   }
 2162   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_pvectmask() == nullptr) {
 2163     uint ireg = ideal_reg();
 2164     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 2165     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 2166     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 2167       // mem -> mem
 2168       int src_offset = ra_->reg2offset(src_first);
 2169       int dst_offset = ra_->reg2offset(dst_first);
 2170       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 2171     } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
 2172       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 2173     } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 2174       int stack_offset = ra_->reg2offset(dst_first);
 2175       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 2176     } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
 2177       int stack_offset = ra_->reg2offset(src_first);
 2178       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 2179     } else {
 2180       ShouldNotReachHere();
 2181     }
 2182     return 0;
 2183   }
 2184   if (src_first_rc == rc_stack) {
 2185     // mem ->
 2186     if (dst_first_rc == rc_stack) {
 2187       // mem -> mem
 2188       assert(src_second != dst_first, "overlap");
 2189       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2190           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2191         // 64-bit
 2192         int src_offset = ra_->reg2offset(src_first);
 2193         int dst_offset = ra_->reg2offset(dst_first);
 2194         if (masm) {
 2195           __ pushq(Address(rsp, src_offset));
 2196           __ popq (Address(rsp, dst_offset));
 2197 #ifndef PRODUCT
 2198         } else {
 2199           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 2200                     "popq    [rsp + #%d]",
 2201                      src_offset, dst_offset);
 2202 #endif
 2203         }
 2204       } else {
 2205         // 32-bit
 2206         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2207         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2208         // No pushl/popl, so:
 2209         int src_offset = ra_->reg2offset(src_first);
 2210         int dst_offset = ra_->reg2offset(dst_first);
 2211         if (masm) {
 2212           __ movq(Address(rsp, -8), rax);
 2213           __ movl(rax, Address(rsp, src_offset));
 2214           __ movl(Address(rsp, dst_offset), rax);
 2215           __ movq(rax, Address(rsp, -8));
 2216 #ifndef PRODUCT
 2217         } else {
 2218           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
 2219                     "movl    rax, [rsp + #%d]\n\t"
 2220                     "movl    [rsp + #%d], rax\n\t"
 2221                     "movq    rax, [rsp - #8]",
 2222                      src_offset, dst_offset);
 2223 #endif
 2224         }
 2225       }
 2226       return 0;
 2227     } else if (dst_first_rc == rc_int) {
 2228       // mem -> gpr
 2229       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2230           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2231         // 64-bit
 2232         int offset = ra_->reg2offset(src_first);
 2233         if (masm) {
 2234           __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2235 #ifndef PRODUCT
 2236         } else {
 2237           st->print("movq    %s, [rsp + #%d]\t# spill",
 2238                      Matcher::regName[dst_first],
 2239                      offset);
 2240 #endif
 2241         }
 2242       } else {
 2243         // 32-bit
 2244         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2245         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2246         int offset = ra_->reg2offset(src_first);
 2247         if (masm) {
 2248           __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2249 #ifndef PRODUCT
 2250         } else {
 2251           st->print("movl    %s, [rsp + #%d]\t# spill",
 2252                      Matcher::regName[dst_first],
 2253                      offset);
 2254 #endif
 2255         }
 2256       }
 2257       return 0;
 2258     } else if (dst_first_rc == rc_float) {
 2259       // mem-> xmm
 2260       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2261           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2262         // 64-bit
 2263         int offset = ra_->reg2offset(src_first);
 2264         if (masm) {
 2265           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2266 #ifndef PRODUCT
 2267         } else {
 2268           st->print("%s  %s, [rsp + #%d]\t# spill",
 2269                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
 2270                      Matcher::regName[dst_first],
 2271                      offset);
 2272 #endif
 2273         }
 2274       } else {
 2275         // 32-bit
 2276         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2277         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2278         int offset = ra_->reg2offset(src_first);
 2279         if (masm) {
 2280           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2281 #ifndef PRODUCT
 2282         } else {
 2283           st->print("movss   %s, [rsp + #%d]\t# spill",
 2284                      Matcher::regName[dst_first],
 2285                      offset);
 2286 #endif
 2287         }
 2288       }
 2289       return 0;
 2290     } else if (dst_first_rc == rc_kreg) {
 2291       // mem -> kreg
 2292       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2293           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2294         // 64-bit
 2295         int offset = ra_->reg2offset(src_first);
 2296         if (masm) {
 2297           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 2298 #ifndef PRODUCT
 2299         } else {
 2300           st->print("kmovq   %s, [rsp + #%d]\t# spill",
 2301                      Matcher::regName[dst_first],
 2302                      offset);
 2303 #endif
 2304         }
 2305       }
 2306       return 0;
 2307     }
 2308   } else if (src_first_rc == rc_int) {
 2309     // gpr ->
 2310     if (dst_first_rc == rc_stack) {
 2311       // gpr -> mem
 2312       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2313           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2314         // 64-bit
 2315         int offset = ra_->reg2offset(dst_first);
 2316         if (masm) {
 2317           __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2318 #ifndef PRODUCT
 2319         } else {
 2320           st->print("movq    [rsp + #%d], %s\t# spill",
 2321                      offset,
 2322                      Matcher::regName[src_first]);
 2323 #endif
 2324         }
 2325       } else {
 2326         // 32-bit
 2327         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2328         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2329         int offset = ra_->reg2offset(dst_first);
 2330         if (masm) {
 2331           __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 2332 #ifndef PRODUCT
 2333         } else {
 2334           st->print("movl    [rsp + #%d], %s\t# spill",
 2335                      offset,
 2336                      Matcher::regName[src_first]);
 2337 #endif
 2338         }
 2339       }
 2340       return 0;
 2341     } else if (dst_first_rc == rc_int) {
 2342       // gpr -> gpr
 2343       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2344           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2345         // 64-bit
 2346         if (masm) {
 2347           __ movq(as_Register(Matcher::_regEncode[dst_first]),
 2348                   as_Register(Matcher::_regEncode[src_first]));
 2349 #ifndef PRODUCT
 2350         } else {
 2351           st->print("movq    %s, %s\t# spill",
 2352                      Matcher::regName[dst_first],
 2353                      Matcher::regName[src_first]);
 2354 #endif
 2355         }
 2356         return 0;
 2357       } else {
 2358         // 32-bit
 2359         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2360         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2361         if (masm) {
 2362           __ movl(as_Register(Matcher::_regEncode[dst_first]),
 2363                   as_Register(Matcher::_regEncode[src_first]));
 2364 #ifndef PRODUCT
 2365         } else {
 2366           st->print("movl    %s, %s\t# spill",
 2367                      Matcher::regName[dst_first],
 2368                      Matcher::regName[src_first]);
 2369 #endif
 2370         }
 2371         return 0;
 2372       }
 2373     } else if (dst_first_rc == rc_float) {
 2374       // gpr -> xmm
 2375       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2376           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2377         // 64-bit
 2378         if (masm) {
 2379           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2380 #ifndef PRODUCT
 2381         } else {
 2382           st->print("movdq   %s, %s\t# spill",
 2383                      Matcher::regName[dst_first],
 2384                      Matcher::regName[src_first]);
 2385 #endif
 2386         }
 2387       } else {
 2388         // 32-bit
 2389         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2390         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2391         if (masm) {
 2392           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2393 #ifndef PRODUCT
 2394         } else {
 2395           st->print("movdl   %s, %s\t# spill",
 2396                      Matcher::regName[dst_first],
 2397                      Matcher::regName[src_first]);
 2398 #endif
 2399         }
 2400       }
 2401       return 0;
 2402     } else if (dst_first_rc == rc_kreg) {
 2403       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2404           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2405         // 64-bit
 2406         if (masm) {
 2407           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 2408   #ifndef PRODUCT
 2409         } else {
 2410            st->print("kmovq   %s, %s\t# spill",
 2411                        Matcher::regName[dst_first],
 2412                        Matcher::regName[src_first]);
 2413   #endif
 2414         }
 2415       }
 2416       Unimplemented();
 2417       return 0;
 2418     }
 2419   } else if (src_first_rc == rc_float) {
 2420     // xmm ->
 2421     if (dst_first_rc == rc_stack) {
 2422       // xmm -> mem
 2423       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2424           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2425         // 64-bit
 2426         int offset = ra_->reg2offset(dst_first);
 2427         if (masm) {
 2428           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2429 #ifndef PRODUCT
 2430         } else {
 2431           st->print("movsd   [rsp + #%d], %s\t# spill",
 2432                      offset,
 2433                      Matcher::regName[src_first]);
 2434 #endif
 2435         }
 2436       } else {
 2437         // 32-bit
 2438         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2439         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2440         int offset = ra_->reg2offset(dst_first);
 2441         if (masm) {
 2442           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 2443 #ifndef PRODUCT
 2444         } else {
 2445           st->print("movss   [rsp + #%d], %s\t# spill",
 2446                      offset,
 2447                      Matcher::regName[src_first]);
 2448 #endif
 2449         }
 2450       }
 2451       return 0;
 2452     } else if (dst_first_rc == rc_int) {
 2453       // xmm -> gpr
 2454       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2455           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2456         // 64-bit
 2457         if (masm) {
 2458           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2459 #ifndef PRODUCT
 2460         } else {
 2461           st->print("movdq   %s, %s\t# spill",
 2462                      Matcher::regName[dst_first],
 2463                      Matcher::regName[src_first]);
 2464 #endif
 2465         }
 2466       } else {
 2467         // 32-bit
 2468         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2469         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2470         if (masm) {
 2471           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2472 #ifndef PRODUCT
 2473         } else {
 2474           st->print("movdl   %s, %s\t# spill",
 2475                      Matcher::regName[dst_first],
 2476                      Matcher::regName[src_first]);
 2477 #endif
 2478         }
 2479       }
 2480       return 0;
 2481     } else if (dst_first_rc == rc_float) {
 2482       // xmm -> xmm
 2483       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2484           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2485         // 64-bit
 2486         if (masm) {
 2487           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2488 #ifndef PRODUCT
 2489         } else {
 2490           st->print("%s  %s, %s\t# spill",
 2491                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
 2492                      Matcher::regName[dst_first],
 2493                      Matcher::regName[src_first]);
 2494 #endif
 2495         }
 2496       } else {
 2497         // 32-bit
 2498         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
 2499         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
 2500         if (masm) {
 2501           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 2502 #ifndef PRODUCT
 2503         } else {
 2504           st->print("%s  %s, %s\t# spill",
 2505                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
 2506                      Matcher::regName[dst_first],
 2507                      Matcher::regName[src_first]);
 2508 #endif
 2509         }
 2510       }
 2511       return 0;
 2512     } else if (dst_first_rc == rc_kreg) {
 2513       assert(false, "Illegal spilling");
 2514       return 0;
 2515     }
 2516   } else if (src_first_rc == rc_kreg) {
 2517     if (dst_first_rc == rc_stack) {
 2518       // mem -> kreg
 2519       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2520           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2521         // 64-bit
 2522         int offset = ra_->reg2offset(dst_first);
 2523         if (masm) {
 2524           __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 2525 #ifndef PRODUCT
 2526         } else {
 2527           st->print("kmovq   [rsp + #%d] , %s\t# spill",
 2528                      offset,
 2529                      Matcher::regName[src_first]);
 2530 #endif
 2531         }
 2532       }
 2533       return 0;
 2534     } else if (dst_first_rc == rc_int) {
 2535       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2536           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2537         // 64-bit
 2538         if (masm) {
 2539           __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2540 #ifndef PRODUCT
 2541         } else {
 2542          st->print("kmovq   %s, %s\t# spill",
 2543                      Matcher::regName[dst_first],
 2544                      Matcher::regName[src_first]);
 2545 #endif
 2546         }
 2547       }
 2548       Unimplemented();
 2549       return 0;
 2550     } else if (dst_first_rc == rc_kreg) {
 2551       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
 2552           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
 2553         // 64-bit
 2554         if (masm) {
 2555           __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 2556 #ifndef PRODUCT
 2557         } else {
 2558          st->print("kmovq   %s, %s\t# spill",
 2559                      Matcher::regName[dst_first],
 2560                      Matcher::regName[src_first]);
 2561 #endif
 2562         }
 2563       }
 2564       return 0;
 2565     } else if (dst_first_rc == rc_float) {
 2566       assert(false, "Illegal spill");
 2567       return 0;
 2568     }
 2569   }
 2570 
 2571   assert(0," foo ");
 2572   Unimplemented();
 2573   return 0;
 2574 }
 2575 
 2576 #ifndef PRODUCT
 2577 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 2578   implementation(nullptr, ra_, false, st);
 2579 }
 2580 #endif
 2581 
 2582 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 2583   implementation(masm, ra_, false, nullptr);
 2584 }
 2585 
 2586 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 2587   return MachNode::size(ra_);
 2588 }
 2589 
 2590 //=============================================================================
 2591 #ifndef PRODUCT
 2592 void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2593 {
 2594   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2595   int reg = ra_->get_reg_first(this);
 2596   st->print("leaq    %s, [rsp + #%d]\t# box lock",
 2597             Matcher::regName[reg], offset);
 2598 }
 2599 #endif
 2600 
 2601 void BoxLockNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2602 {
 2603   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2604   int reg = ra_->get_encode(this);
 2605 
 2606   __ lea(as_Register(reg), Address(rsp, offset));
 2607 }
 2608 
 2609 uint BoxLockNode::size(PhaseRegAlloc *ra_) const
 2610 {
 2611   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 2612   if (ra_->get_encode(this) > 15) {
 2613     return (offset < 0x80) ? 6 : 9; // REX2
 2614   } else {
 2615     return (offset < 0x80) ? 5 : 8; // REX
 2616   }
 2617 }
 2618 
 2619 //=============================================================================
 2620 #ifndef PRODUCT
 2621 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 2622 {
 2623   st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 2624   st->print_cr("\tcmpl    rscratch1, [rax + CompiledICData::speculated_klass_offset()]\t # Inline cache check");
 2625   st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
 2626 }
 2627 #endif
 2628 
 2629 void MachUEPNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const
 2630 {
 2631   __ ic_check(InteriorEntryAlignment);
 2632 }
 2633 
 2634 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
 2635 {
 2636   return MachNode::size(ra_); // too many variables; just compute it
 2637                               // the hard way
 2638 }
 2639 
 2640 
 2641 //=============================================================================
 2642 
 2643 bool Matcher::supports_vector_calling_convention(void) {
 2644   return EnableVectorSupport;
 2645 }
 2646 
 2647 static bool is_ndd_demotable_opr1(const MachNode* mdef) {
 2648   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr1) != 0);
 2649 }
 2650 
 2651 static bool is_ndd_demotable_opr2(const MachNode* mdef) {
 2652   return ((mdef->flags() & Node::PD::Flag_ndd_demotable_opr2) != 0);
 2653 }
 2654 
 2655 #ifdef ASSERT
 2656 static bool is_ndd_demotable(const MachNode* mdef) {
 2657   return (is_ndd_demotable_opr1(mdef) || is_ndd_demotable_opr2(mdef));
 2658 }
 2659 #endif
 2660 
 2661 bool Matcher::is_register_biasing_candidate(const MachNode* mdef,
 2662                                             int oper_index) {
 2663   if (mdef == nullptr) {
 2664     return false;
 2665   }
 2666 
 2667   if (mdef->num_opnds() <= oper_index || mdef->operand_index(oper_index) < 0 ||
 2668       mdef->in(mdef->operand_index(oper_index)) == nullptr) {
 2669     assert(oper_index != 1 || !is_ndd_demotable_opr1(mdef), "%s", mdef->Name());
 2670     assert(oper_index != 2 || !is_ndd_demotable_opr2(mdef), "%s", mdef->Name());
 2671     return false;
 2672   }
 2673 
 2674   // Complex memory operand covers multiple incoming edges needed for
 2675   // address computation. Biasing def towards any address component will not
 2676   // result in NDD demotion by assembler.
 2677   if (mdef->operand_num_edges(oper_index) != 1) {
 2678     return false;
 2679   }
 2680 
 2681   // Demotion candidate must be register mask compatible with definition.
 2682   const RegMask& oper_mask = mdef->in_RegMask(mdef->operand_index(oper_index));
 2683   if (!oper_mask.overlap(mdef->out_RegMask())) {
 2684     assert(!is_ndd_demotable(mdef), "%s", mdef->Name());
 2685     return false;
 2686   }
 2687 
 2688   switch (oper_index) {
 2689   // First operand of MachNode corresponding to Intel APX NDD selection
 2690   // pattern can share its assigned register with definition operand if
 2691   // their live ranges do not overlap. In such a scenario we can demote
 2692   // it to legacy map0/map1 instruction by replacing its 4-byte extended
 2693   // EVEX prefix with shorter REX/REX2 encoding. Demotion candidates
 2694   // are decorated with a special flag by instruction selector.
 2695   case 1:
 2696     return is_ndd_demotable_opr1(mdef);
 2697 
 2698   // Definition operand of commutative operation can be biased towards second
 2699   // operand.
 2700   case 2:
 2701     return is_ndd_demotable_opr2(mdef);
 2702 
 2703   // Current scheme only selects up to two biasing candidates
 2704   default:
 2705     assert(false, "unhandled operand index: %s", mdef->Name());
 2706     break;
 2707   }
 2708 
 2709   return false;
 2710 }
 2711 
 2712 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 2713   assert(EnableVectorSupport, "sanity");
 2714   int lo = XMM0_num;
 2715   int hi = XMM0b_num;
 2716   if (ideal_reg == Op_VecX) hi = XMM0d_num;
 2717   else if (ideal_reg == Op_VecY) hi = XMM0h_num;
 2718   else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
 2719   return OptoRegPair(hi, lo);
 2720 }
 2721 
 2722 // Is this branch offset short enough that a short branch can be used?
 2723 //
 2724 // NOTE: If the platform does not provide any short branch variants, then
 2725 //       this method should return false for offset 0.
 2726 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 2727   // The passed offset is relative to address of the branch.
 2728   // On 86 a branch displacement is calculated relative to address
 2729   // of a next instruction.
 2730   offset -= br_size;
 2731 
 2732   // the short version of jmpConUCF2 contains multiple branches,
 2733   // making the reach slightly less
 2734   if (rule == jmpConUCF2_rule)
 2735     return (-126 <= offset && offset <= 125);
 2736   return (-128 <= offset && offset <= 127);
 2737 }
 2738 
 2739 #ifdef ASSERT
 2740 // Return whether or not this register is ever used as an argument.
 2741 bool Matcher::can_be_java_arg(int reg)
 2742 {
 2743   return
 2744     reg ==  RDI_num || reg == RDI_H_num ||
 2745     reg ==  RSI_num || reg == RSI_H_num ||
 2746     reg ==  RDX_num || reg == RDX_H_num ||
 2747     reg ==  RCX_num || reg == RCX_H_num ||
 2748     reg ==   R8_num || reg ==  R8_H_num ||
 2749     reg ==   R9_num || reg ==  R9_H_num ||
 2750     reg ==  R12_num || reg == R12_H_num ||
 2751     reg == XMM0_num || reg == XMM0b_num ||
 2752     reg == XMM1_num || reg == XMM1b_num ||
 2753     reg == XMM2_num || reg == XMM2b_num ||
 2754     reg == XMM3_num || reg == XMM3b_num ||
 2755     reg == XMM4_num || reg == XMM4b_num ||
 2756     reg == XMM5_num || reg == XMM5b_num ||
 2757     reg == XMM6_num || reg == XMM6b_num ||
 2758     reg == XMM7_num || reg == XMM7b_num;
 2759 }
 2760 #endif
 2761 
 2762 uint Matcher::int_pressure_limit()
 2763 {
 2764   return (INTPRESSURE == -1) ? _INT_REG_mask.size() : INTPRESSURE;
 2765 }
 2766 
 2767 uint Matcher::float_pressure_limit()
 2768 {
 2769   // After experiment around with different values, the following default threshold
 2770   // works best for LCM's register pressure scheduling on x64.
 2771   uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
 2772   uint default_float_pressure_threshold = _FLOAT_REG_mask.size() - dec_count;
 2773   return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
 2774 }
 2775 
 2776 // Register for DIVI projection of divmodI
 2777 const RegMask& Matcher::divI_proj_mask() {
 2778   return INT_RAX_REG_mask();
 2779 }
 2780 
 2781 // Register for MODI projection of divmodI
 2782 const RegMask& Matcher::modI_proj_mask() {
 2783   return INT_RDX_REG_mask();
 2784 }
 2785 
 2786 // Register for DIVL projection of divmodL
 2787 const RegMask& Matcher::divL_proj_mask() {
 2788   return LONG_RAX_REG_mask();
 2789 }
 2790 
 2791 // Register for MODL projection of divmodL
 2792 const RegMask& Matcher::modL_proj_mask() {
 2793   return LONG_RDX_REG_mask();
 2794 }
 2795 
 2796 %}
 2797 
 2798 source_hpp %{
 2799 // Header information of the source block.
 2800 // Method declarations/definitions which are used outside
 2801 // the ad-scope can conveniently be defined here.
 2802 //
 2803 // To keep related declarations/definitions/uses close together,
 2804 // we switch between source %{ }% and source_hpp %{ }% freely as needed.
 2805 
 2806 #include "runtime/vm_version.hpp"
 2807 
 2808 class NativeJump;
 2809 
 2810 class CallStubImpl {
 2811 
 2812   //--------------------------------------------------------------
 2813   //---<  Used for optimization in Compile::shorten_branches  >---
 2814   //--------------------------------------------------------------
 2815 
 2816  public:
 2817   // Size of call trampoline stub.
 2818   static uint size_call_trampoline() {
 2819     return 0; // no call trampolines on this platform
 2820   }
 2821 
 2822   // number of relocations needed by a call trampoline stub
 2823   static uint reloc_call_trampoline() {
 2824     return 0; // no call trampolines on this platform
 2825   }
 2826 };
 2827 
 2828 class HandlerImpl {
 2829 
 2830  public:
 2831 
 2832   static int emit_deopt_handler(C2_MacroAssembler* masm);
 2833 
 2834   static uint size_deopt_handler() {
 2835     // one call and one jmp.
 2836     return 7;
 2837   }
 2838 };
 2839 
 2840 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
 2841   switch(bytes) {
 2842     case  4: // fall-through
 2843     case  8: // fall-through
 2844     case 16: return Assembler::AVX_128bit;
 2845     case 32: return Assembler::AVX_256bit;
 2846     case 64: return Assembler::AVX_512bit;
 2847 
 2848     default: {
 2849       ShouldNotReachHere();
 2850       return Assembler::AVX_NoVec;
 2851     }
 2852   }
 2853 }
 2854 
 2855 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 2856   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 2857 }
 2858 
 2859 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 2860   uint def_idx = use->operand_index(opnd);
 2861   Node* def = use->in(def_idx);
 2862   return vector_length_encoding(def);
 2863 }
 2864 
 2865 static inline bool is_vector_popcount_predicate(BasicType bt) {
 2866   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 2867          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 2868 }
 2869 
 2870 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 2871   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 2872            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 2873 }
 2874 
 2875 class Node::PD {
 2876 public:
 2877   enum NodeFlags : uint64_t {
 2878     Flag_intel_jcc_erratum    = Node::_last_flag << 1,
 2879     Flag_sets_carry_flag      = Node::_last_flag << 2,
 2880     Flag_sets_parity_flag     = Node::_last_flag << 3,
 2881     Flag_sets_zero_flag       = Node::_last_flag << 4,
 2882     Flag_sets_overflow_flag   = Node::_last_flag << 5,
 2883     Flag_sets_sign_flag       = Node::_last_flag << 6,
 2884     Flag_clears_carry_flag    = Node::_last_flag << 7,
 2885     Flag_clears_parity_flag   = Node::_last_flag << 8,
 2886     Flag_clears_zero_flag     = Node::_last_flag << 9,
 2887     Flag_clears_overflow_flag = Node::_last_flag << 10,
 2888     Flag_clears_sign_flag     = Node::_last_flag << 11,
 2889     Flag_ndd_demotable_opr1   = Node::_last_flag << 12,
 2890     Flag_ndd_demotable_opr2   = Node::_last_flag << 13,
 2891     _last_flag                = Flag_ndd_demotable_opr2
 2892   };
 2893 };
 2894 
 2895 %} // end source_hpp
 2896 
 2897 source %{
 2898 
 2899 #include "opto/addnode.hpp"
 2900 #include "c2_intelJccErratum_x86.hpp"
 2901 
 2902 void PhaseOutput::pd_perform_mach_node_analysis() {
 2903   if (VM_Version::has_intel_jcc_erratum()) {
 2904     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 2905     _buf_sizes._code += extra_padding;
 2906   }
 2907 }
 2908 
 2909 int MachNode::pd_alignment_required() const {
 2910   if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) {
 2911     // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
 2912     return IntelJccErratum::largest_jcc_size() + 1;
 2913   } else {
 2914     return 1;
 2915   }
 2916 }
 2917 
 2918 int MachNode::compute_padding(int current_offset) const {
 2919   if (flags() & Node::PD::Flag_intel_jcc_erratum) {
 2920     Compile* C = Compile::current();
 2921     PhaseOutput* output = C->output();
 2922     Block* block = output->block();
 2923     int index = output->index();
 2924     return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
 2925   } else {
 2926     return 0;
 2927   }
 2928 }
 2929 
 2930 // Emit deopt handler code.
 2931 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) {
 2932 
 2933   // Note that the code buffer's insts_mark is always relative to insts.
 2934   // That's why we must use the macroassembler to generate a handler.
 2935   address base = __ start_a_stub(size_deopt_handler());
 2936   if (base == nullptr) {
 2937     ciEnv::current()->record_failure("CodeCache is full");
 2938     return 0;  // CodeBuffer::expand failed
 2939   }
 2940   int offset = __ offset();
 2941 
 2942   Label start;
 2943   __ bind(start);
 2944 
 2945   __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
 2946 
 2947   int entry_offset = __ offset();
 2948 
 2949   __ jmp(start);
 2950 
 2951   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
 2952   assert(__ offset() - entry_offset >= NativePostCallNop::first_check_size,
 2953          "out of bounds read in post-call NOP check");
 2954   __ end_a_stub();
 2955   return entry_offset;
 2956 }
 2957 
 2958 static Assembler::Width widthForType(BasicType bt) {
 2959   if (bt == T_BYTE) {
 2960     return Assembler::B;
 2961   } else if (bt == T_SHORT) {
 2962     return Assembler::W;
 2963   } else if (bt == T_INT) {
 2964     return Assembler::D;
 2965   } else {
 2966     assert(bt == T_LONG, "not a long: %s", type2name(bt));
 2967     return Assembler::Q;
 2968   }
 2969 }
 2970 
 2971 //=============================================================================
 2972 
 2973   // Float masks come from different places depending on platform.
 2974   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
 2975   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
 2976   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
 2977   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
 2978   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
 2979   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
 2980   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
 2981   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
 2982   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
 2983   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
 2984   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
 2985   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
 2986   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
 2987   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
 2988   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
 2989   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
 2990   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
 2991   static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();}
 2992   static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();}
 2993 
 2994 //=============================================================================
 2995 bool Matcher::match_rule_supported(int opcode) {
 2996   if (!has_match_rule(opcode)) {
 2997     return false; // no match rule present
 2998   }
 2999   switch (opcode) {
 3000     case Op_AbsVL:
 3001     case Op_StoreVectorScatter:
 3002       if (UseAVX < 3) {
 3003         return false;
 3004       }
 3005       break;
 3006     case Op_PopCountI:
 3007     case Op_PopCountL:
 3008       if (!UsePopCountInstruction) {
 3009         return false;
 3010       }
 3011       break;
 3012     case Op_PopCountVI:
 3013       if (UseAVX < 2) {
 3014         return false;
 3015       }
 3016       break;
 3017     case Op_CompressV:
 3018     case Op_ExpandV:
 3019     case Op_PopCountVL:
 3020       if (UseAVX < 2) {
 3021         return false;
 3022       }
 3023       break;
 3024     case Op_MulVI:
 3025       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 3026         return false;
 3027       }
 3028       break;
 3029     case Op_MulVL:
 3030       if (UseSSE < 4) { // only with SSE4_1 or AVX
 3031         return false;
 3032       }
 3033       break;
 3034     case Op_MulReductionVL:
 3035       if (VM_Version::supports_avx512dq() == false) {
 3036         return false;
 3037       }
 3038       break;
 3039     case Op_AbsVB:
 3040     case Op_AbsVS:
 3041     case Op_AbsVI:
 3042     case Op_AddReductionVI:
 3043     case Op_AndReductionV:
 3044     case Op_OrReductionV:
 3045     case Op_XorReductionV:
 3046       if (UseSSE < 3) { // requires at least SSSE3
 3047         return false;
 3048       }
 3049       break;
 3050     case Op_MaxHF:
 3051     case Op_MinHF:
 3052       if (!VM_Version::supports_avx512vlbw()) {
 3053         return false;
 3054       }  // fallthrough
 3055     case Op_AddHF:
 3056     case Op_DivHF:
 3057     case Op_FmaHF:
 3058     case Op_MulHF:
 3059     case Op_ReinterpretS2HF:
 3060     case Op_ReinterpretHF2S:
 3061     case Op_SubHF:
 3062     case Op_SqrtHF:
 3063       if (!VM_Version::supports_avx512_fp16()) {
 3064         return false;
 3065       }
 3066       break;
 3067     case Op_VectorLoadShuffle:
 3068     case Op_VectorRearrange:
 3069     case Op_MulReductionVI:
 3070       if (UseSSE < 4) { // requires at least SSE4
 3071         return false;
 3072       }
 3073       break;
 3074     case Op_IsInfiniteF:
 3075     case Op_IsInfiniteD:
 3076       if (!VM_Version::supports_avx512dq()) {
 3077         return false;
 3078       }
 3079       break;
 3080     case Op_SqrtVD:
 3081     case Op_SqrtVF:
 3082     case Op_VectorMaskCmp:
 3083     case Op_VectorCastB2X:
 3084     case Op_VectorCastS2X:
 3085     case Op_VectorCastI2X:
 3086     case Op_VectorCastL2X:
 3087     case Op_VectorCastF2X:
 3088     case Op_VectorCastD2X:
 3089     case Op_VectorUCastB2X:
 3090     case Op_VectorUCastS2X:
 3091     case Op_VectorUCastI2X:
 3092     case Op_VectorMaskCast:
 3093       if (UseAVX < 1) { // enabled for AVX only
 3094         return false;
 3095       }
 3096       break;
 3097     case Op_PopulateIndex:
 3098       if (UseAVX < 2) {
 3099         return false;
 3100       }
 3101       break;
 3102     case Op_RoundVF:
 3103       if (UseAVX < 2) { // enabled for AVX2 only
 3104         return false;
 3105       }
 3106       break;
 3107     case Op_RoundVD:
 3108       if (UseAVX < 3) {
 3109         return false;  // enabled for AVX3 only
 3110       }
 3111       break;
 3112     case Op_CompareAndSwapL:
 3113     case Op_CompareAndSwapP:
 3114       break;
 3115     case Op_StrIndexOf:
 3116       if (!UseSSE42Intrinsics) {
 3117         return false;
 3118       }
 3119       break;
 3120     case Op_StrIndexOfChar:
 3121       if (!UseSSE42Intrinsics) {
 3122         return false;
 3123       }
 3124       break;
 3125     case Op_OnSpinWait:
 3126       if (VM_Version::supports_on_spin_wait() == false) {
 3127         return false;
 3128       }
 3129       break;
 3130     case Op_MulVB:
 3131     case Op_LShiftVB:
 3132     case Op_RShiftVB:
 3133     case Op_URShiftVB:
 3134     case Op_VectorInsert:
 3135     case Op_VectorLoadMask:
 3136     case Op_VectorStoreMask:
 3137     case Op_VectorBlend:
 3138       if (UseSSE < 4) {
 3139         return false;
 3140       }
 3141       break;
 3142     case Op_MaxD:
 3143     case Op_MaxF:
 3144     case Op_MinD:
 3145     case Op_MinF:
 3146       if (UseAVX < 1) { // enabled for AVX only
 3147         return false;
 3148       }
 3149       break;
 3150     case Op_CacheWB:
 3151     case Op_CacheWBPreSync:
 3152     case Op_CacheWBPostSync:
 3153       if (!VM_Version::supports_data_cache_line_flush()) {
 3154         return false;
 3155       }
 3156       break;
 3157     case Op_ExtractB:
 3158     case Op_ExtractL:
 3159     case Op_ExtractI:
 3160     case Op_RoundDoubleMode:
 3161       if (UseSSE < 4) {
 3162         return false;
 3163       }
 3164       break;
 3165     case Op_RoundDoubleModeV:
 3166       if (VM_Version::supports_avx() == false) {
 3167         return false; // 128bit vroundpd is not available
 3168       }
 3169       break;
 3170     case Op_LoadVectorGather:
 3171     case Op_LoadVectorGatherMasked:
 3172       if (UseAVX < 2) {
 3173         return false;
 3174       }
 3175       break;
 3176     case Op_FmaF:
 3177     case Op_FmaD:
 3178     case Op_FmaVD:
 3179     case Op_FmaVF:
 3180       if (!UseFMA) {
 3181         return false;
 3182       }
 3183       break;
 3184     case Op_MacroLogicV:
 3185       if (UseAVX < 3 || !UseVectorMacroLogic) {
 3186         return false;
 3187       }
 3188       break;
 3189 
 3190     case Op_VectorCmpMasked:
 3191     case Op_VectorMaskGen:
 3192       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3193         return false;
 3194       }
 3195       break;
 3196     case Op_VectorMaskFirstTrue:
 3197     case Op_VectorMaskLastTrue:
 3198     case Op_VectorMaskTrueCount:
 3199     case Op_VectorMaskToLong:
 3200       if (UseAVX < 1) {
 3201          return false;
 3202       }
 3203       break;
 3204     case Op_RoundF:
 3205     case Op_RoundD:
 3206       break;
 3207     case Op_CopySignD:
 3208     case Op_CopySignF:
 3209       if (UseAVX < 3)  {
 3210         return false;
 3211       }
 3212       if (!VM_Version::supports_avx512vl()) {
 3213         return false;
 3214       }
 3215       break;
 3216     case Op_CompressBits:
 3217     case Op_ExpandBits:
 3218       if (!VM_Version::supports_bmi2()) {
 3219         return false;
 3220       }
 3221       break;
 3222     case Op_CompressM:
 3223       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 3224         return false;
 3225       }
 3226       break;
 3227     case Op_ConvF2HF:
 3228     case Op_ConvHF2F:
 3229       if (!VM_Version::supports_float16()) {
 3230         return false;
 3231       }
 3232       break;
 3233     case Op_VectorCastF2HF:
 3234     case Op_VectorCastHF2F:
 3235       if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) {
 3236         return false;
 3237       }
 3238       break;
 3239   }
 3240   return true;  // Match rules are supported by default.
 3241 }
 3242 
 3243 //------------------------------------------------------------------------
 3244 
 3245 static inline bool is_pop_count_instr_target(BasicType bt) {
 3246   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 3247          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 3248 }
 3249 
 3250 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
 3251   return match_rule_supported_vector(opcode, vlen, bt);
 3252 }
 3253 
 3254 // Identify extra cases that we might want to provide match rules for vector nodes and
 3255 // other intrinsics guarded with vector length (vlen) and element type (bt).
 3256 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 3257   if (!match_rule_supported(opcode)) {
 3258     return false;
 3259   }
 3260   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 3261   //   * SSE2 supports 128bit vectors for all types;
 3262   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 3263   //   * AVX2 supports 256bit vectors for all types;
 3264   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 3265   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 3266   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 3267   // And MaxVectorSize is taken into account as well.
 3268   if (!vector_size_supported(bt, vlen)) {
 3269     return false;
 3270   }
 3271   // Special cases which require vector length follow:
 3272   //   * implementation limitations
 3273   //   * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
 3274   //   * 128bit vroundpd instruction is present only in AVX1
 3275   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3276   switch (opcode) {
 3277     case Op_MaxVHF:
 3278     case Op_MinVHF:
 3279       if (!VM_Version::supports_avx512bw()) {
 3280         return false;
 3281       }
 3282     case Op_AddVHF:
 3283     case Op_DivVHF:
 3284     case Op_FmaVHF:
 3285     case Op_MulVHF:
 3286     case Op_SubVHF:
 3287     case Op_SqrtVHF:
 3288       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3289         return false;
 3290       }
 3291       if (!VM_Version::supports_avx512_fp16()) {
 3292         return false;
 3293       }
 3294       break;
 3295     case Op_AbsVF:
 3296     case Op_NegVF:
 3297       if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
 3298         return false; // 512bit vandps and vxorps are not available
 3299       }
 3300       break;
 3301     case Op_AbsVD:
 3302     case Op_NegVD:
 3303       if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
 3304         return false; // 512bit vpmullq, vandpd and vxorpd are not available
 3305       }
 3306       break;
 3307     case Op_RotateRightV:
 3308     case Op_RotateLeftV:
 3309       if (bt != T_INT && bt != T_LONG) {
 3310         return false;
 3311       } // fallthrough
 3312     case Op_MacroLogicV:
 3313       if (!VM_Version::supports_evex() ||
 3314           ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
 3315         return false;
 3316       }
 3317       break;
 3318     case Op_ClearArray:
 3319     case Op_VectorMaskGen:
 3320     case Op_VectorCmpMasked:
 3321       if (!VM_Version::supports_avx512bw()) {
 3322         return false;
 3323       }
 3324       if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
 3325         return false;
 3326       }
 3327       break;
 3328     case Op_LoadVectorMasked:
 3329     case Op_StoreVectorMasked:
 3330       if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) {
 3331         return false;
 3332       }
 3333       break;
 3334     case Op_UMinV:
 3335     case Op_UMaxV:
 3336       if (UseAVX == 0) {
 3337         return false;
 3338       }
 3339       break;
 3340     case Op_UMinReductionV:
 3341     case Op_UMaxReductionV:
 3342       if (UseAVX == 0) {
 3343         return false;
 3344       }
 3345       if (bt == T_LONG && !VM_Version::supports_avx512vl()) {
 3346         return false;
 3347       }
 3348       if (UseAVX > 2 && size_in_bits == 512 && !VM_Version::supports_avx512vl()) {
 3349         return false;
 3350       }
 3351       break;
 3352     case Op_MaxV:
 3353     case Op_MinV:
 3354       if (UseSSE < 4 && is_integral_type(bt)) {
 3355         return false;
 3356       }
 3357       if ((bt == T_FLOAT || bt == T_DOUBLE)) {
 3358           // Float/Double intrinsics are enabled for AVX family currently.
 3359           if (UseAVX == 0) {
 3360             return false;
 3361           }
 3362           if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
 3363             return false;
 3364           }
 3365       }
 3366       break;
 3367     case Op_CallLeafVector:
 3368       if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) {
 3369         return false;
 3370       }
 3371       break;
 3372     case Op_AddReductionVI:
 3373       if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
 3374         return false;
 3375       }
 3376       // fallthrough
 3377     case Op_AndReductionV:
 3378     case Op_OrReductionV:
 3379     case Op_XorReductionV:
 3380       if (is_subword_type(bt) && (UseSSE < 4)) {
 3381         return false;
 3382       }
 3383       break;
 3384     case Op_MinReductionV:
 3385     case Op_MaxReductionV:
 3386       if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
 3387         return false;
 3388       } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
 3389         return false;
 3390       }
 3391       // Float/Double intrinsics enabled for AVX family.
 3392       if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
 3393         return false;
 3394       }
 3395       if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
 3396         return false;
 3397       }
 3398       break;
 3399     case Op_VectorBlend:
 3400       if (UseAVX == 0 && size_in_bits < 128) {
 3401         return false;
 3402       }
 3403       break;
 3404     case Op_VectorTest:
 3405       if (UseSSE < 4) {
 3406         return false; // Implementation limitation
 3407       } else if (size_in_bits < 32) {
 3408         return false; // Implementation limitation
 3409       }
 3410       break;
 3411     case Op_VectorLoadShuffle:
 3412     case Op_VectorRearrange:
 3413       if(vlen == 2) {
 3414         return false; // Implementation limitation due to how shuffle is loaded
 3415       } else if (size_in_bits == 256 && UseAVX < 2) {
 3416         return false; // Implementation limitation
 3417       }
 3418       break;
 3419     case Op_VectorLoadMask:
 3420     case Op_VectorMaskCast:
 3421       if (size_in_bits == 256 && UseAVX < 2) {
 3422         return false; // Implementation limitation
 3423       }
 3424       // fallthrough
 3425     case Op_VectorStoreMask:
 3426       if (vlen == 2) {
 3427         return false; // Implementation limitation
 3428       }
 3429       break;
 3430     case Op_PopulateIndex:
 3431       if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) {
 3432         return false;
 3433       }
 3434       break;
 3435     case Op_VectorCastB2X:
 3436     case Op_VectorCastS2X:
 3437     case Op_VectorCastI2X:
 3438       if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) {
 3439         return false;
 3440       }
 3441       break;
 3442     case Op_VectorCastL2X:
 3443       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
 3444         return false;
 3445       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
 3446         return false;
 3447       }
 3448       break;
 3449     case Op_VectorCastF2X: {
 3450         // As per JLS section 5.1.3 narrowing conversion to sub-word types
 3451         // happen after intermediate conversion to integer and special handling
 3452         // code needs AVX2 vpcmpeqd instruction for 256 bit vectors.
 3453         int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte;
 3454         if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) {
 3455           return false;
 3456         }
 3457       }
 3458       // fallthrough
 3459     case Op_VectorCastD2X:
 3460       if (bt == T_LONG && !VM_Version::supports_avx512dq()) {
 3461         return false;
 3462       }
 3463       break;
 3464     case Op_VectorCastF2HF:
 3465     case Op_VectorCastHF2F:
 3466       if (!VM_Version::supports_f16c() &&
 3467          ((!VM_Version::supports_evex() ||
 3468          ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) {
 3469         return false;
 3470       }
 3471       break;
 3472     case Op_RoundVD:
 3473       if (!VM_Version::supports_avx512dq()) {
 3474         return false;
 3475       }
 3476       break;
 3477     case Op_MulReductionVI:
 3478       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3479         return false;
 3480       }
 3481       break;
 3482     case Op_LoadVectorGatherMasked:
 3483       if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3484         return false;
 3485       }
 3486       if (is_subword_type(bt) &&
 3487          ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) ||
 3488           (size_in_bits < 64)                                      ||
 3489           (bt == T_SHORT && !VM_Version::supports_bmi2()))) {
 3490         return false;
 3491       }
 3492       break;
 3493     case Op_StoreVectorScatterMasked:
 3494     case Op_StoreVectorScatter:
 3495       if (is_subword_type(bt)) {
 3496         return false;
 3497       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3498         return false;
 3499       }
 3500       // fallthrough
 3501     case Op_LoadVectorGather:
 3502       if (!is_subword_type(bt) && size_in_bits == 64) {
 3503         return false;
 3504       }
 3505       if (is_subword_type(bt) && size_in_bits < 64) {
 3506         return false;
 3507       }
 3508       break;
 3509     case Op_SaturatingAddV:
 3510     case Op_SaturatingSubV:
 3511       if (UseAVX < 1) {
 3512         return false; // Implementation limitation
 3513       }
 3514       if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 3515         return false;
 3516       }
 3517       break;
 3518     case Op_SelectFromTwoVector:
 3519        if (size_in_bits < 128) {
 3520          return false;
 3521        }
 3522        if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3523          return false;
 3524        }
 3525        if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3526          return false;
 3527        }
 3528        if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3529          return false;
 3530        }
 3531        if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) {
 3532          return false;
 3533        }
 3534        break;
 3535     case Op_MaskAll:
 3536       if (!VM_Version::supports_evex()) {
 3537         return false;
 3538       }
 3539       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 3540         return false;
 3541       }
 3542       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3543         return false;
 3544       }
 3545       break;
 3546     case Op_VectorMaskCmp:
 3547       if (vlen < 2 || size_in_bits < 32) {
 3548         return false;
 3549       }
 3550       break;
 3551     case Op_CompressM:
 3552       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 3553         return false;
 3554       }
 3555       break;
 3556     case Op_CompressV:
 3557     case Op_ExpandV:
 3558       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 3559         return false;
 3560       }
 3561       if (size_in_bits < 128 ) {
 3562         return false;
 3563       }
 3564     case Op_VectorLongToMask:
 3565       if (UseAVX < 1) {
 3566         return false;
 3567       }
 3568       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 3569         return false;
 3570       }
 3571       break;
 3572     case Op_SignumVD:
 3573     case Op_SignumVF:
 3574       if (UseAVX < 1) {
 3575         return false;
 3576       }
 3577       break;
 3578     case Op_PopCountVI:
 3579     case Op_PopCountVL: {
 3580         if (!is_pop_count_instr_target(bt) &&
 3581             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 3582           return false;
 3583         }
 3584       }
 3585       break;
 3586     case Op_ReverseV:
 3587     case Op_ReverseBytesV:
 3588       if (UseAVX < 2) {
 3589         return false;
 3590       }
 3591       break;
 3592     case Op_CountTrailingZerosV:
 3593     case Op_CountLeadingZerosV:
 3594       if (UseAVX < 2) {
 3595         return false;
 3596       }
 3597       break;
 3598   }
 3599   return true;  // Per default match rules are supported.
 3600 }
 3601 
 3602 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 3603   // ADLC based match_rule_supported routine checks for the existence of pattern based
 3604   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 3605   // of their non-masked counterpart with mask edge being the differentiator.
 3606   // This routine does a strict check on the existence of masked operation patterns
 3607   // by returning a default false value for all the other opcodes apart from the
 3608   // ones whose masked instruction patterns are defined in this file.
 3609   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 3610     return false;
 3611   }
 3612 
 3613   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
 3614   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
 3615     return false;
 3616   }
 3617   switch(opcode) {
 3618     // Unary masked operations
 3619     case Op_AbsVB:
 3620     case Op_AbsVS:
 3621       if(!VM_Version::supports_avx512bw()) {
 3622         return false;  // Implementation limitation
 3623       }
 3624     case Op_AbsVI:
 3625     case Op_AbsVL:
 3626       return true;
 3627 
 3628     // Ternary masked operations
 3629     case Op_FmaVF:
 3630     case Op_FmaVD:
 3631       return true;
 3632 
 3633     case Op_MacroLogicV:
 3634       if(bt != T_INT && bt != T_LONG) {
 3635         return false;
 3636       }
 3637       return true;
 3638 
 3639     // Binary masked operations
 3640     case Op_AddVB:
 3641     case Op_AddVS:
 3642     case Op_SubVB:
 3643     case Op_SubVS:
 3644     case Op_MulVS:
 3645     case Op_LShiftVS:
 3646     case Op_RShiftVS:
 3647     case Op_URShiftVS:
 3648       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3649       if (!VM_Version::supports_avx512bw()) {
 3650         return false;  // Implementation limitation
 3651       }
 3652       return true;
 3653 
 3654     case Op_MulVL:
 3655       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3656       if (!VM_Version::supports_avx512dq()) {
 3657         return false;  // Implementation limitation
 3658       }
 3659       return true;
 3660 
 3661     case Op_AndV:
 3662     case Op_OrV:
 3663     case Op_XorV:
 3664     case Op_RotateRightV:
 3665     case Op_RotateLeftV:
 3666       if (bt != T_INT && bt != T_LONG) {
 3667         return false; // Implementation limitation
 3668       }
 3669       return true;
 3670 
 3671     case Op_VectorLoadMask:
 3672       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
 3673       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3674         return false;
 3675       }
 3676       return true;
 3677 
 3678     case Op_AddVI:
 3679     case Op_AddVL:
 3680     case Op_AddVF:
 3681     case Op_AddVD:
 3682     case Op_SubVI:
 3683     case Op_SubVL:
 3684     case Op_SubVF:
 3685     case Op_SubVD:
 3686     case Op_MulVI:
 3687     case Op_MulVF:
 3688     case Op_MulVD:
 3689     case Op_DivVF:
 3690     case Op_DivVD:
 3691     case Op_SqrtVF:
 3692     case Op_SqrtVD:
 3693     case Op_LShiftVI:
 3694     case Op_LShiftVL:
 3695     case Op_RShiftVI:
 3696     case Op_RShiftVL:
 3697     case Op_URShiftVI:
 3698     case Op_URShiftVL:
 3699     case Op_LoadVectorMasked:
 3700     case Op_StoreVectorMasked:
 3701     case Op_LoadVectorGatherMasked:
 3702     case Op_StoreVectorScatterMasked:
 3703       return true;
 3704 
 3705     case Op_UMinV:
 3706     case Op_UMaxV:
 3707       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 3708         return false;
 3709       } // fallthrough
 3710     case Op_MaxV:
 3711     case Op_MinV:
 3712       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3713         return false; // Implementation limitation
 3714       }
 3715       if (is_floating_point_type(bt) && !VM_Version::supports_avx10_2()) {
 3716         return false; // Implementation limitation
 3717       }
 3718       return true;
 3719     case Op_SaturatingAddV:
 3720     case Op_SaturatingSubV:
 3721       if (!is_subword_type(bt)) {
 3722         return false;
 3723       }
 3724       if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) {
 3725         return false; // Implementation limitation
 3726       }
 3727       return true;
 3728 
 3729     case Op_VectorMaskCmp:
 3730       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
 3731         return false; // Implementation limitation
 3732       }
 3733       return true;
 3734 
 3735     case Op_VectorRearrange:
 3736       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 3737         return false; // Implementation limitation
 3738       }
 3739       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 3740         return false; // Implementation limitation
 3741       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 3742         return false; // Implementation limitation
 3743       }
 3744       return true;
 3745 
 3746     // Binary Logical operations
 3747     case Op_AndVMask:
 3748     case Op_OrVMask:
 3749     case Op_XorVMask:
 3750       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 3751         return false; // Implementation limitation
 3752       }
 3753       return true;
 3754 
 3755     case Op_PopCountVI:
 3756     case Op_PopCountVL:
 3757       if (!is_pop_count_instr_target(bt)) {
 3758         return false;
 3759       }
 3760       return true;
 3761 
 3762     case Op_MaskAll:
 3763       return true;
 3764 
 3765     case Op_CountLeadingZerosV:
 3766       if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) {
 3767         return true;
 3768       }
 3769     default:
 3770       return false;
 3771   }
 3772 }
 3773 
 3774 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
 3775   return false;
 3776 }
 3777 
 3778 // Return true if Vector::rearrange needs preparation of the shuffle argument
 3779 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
 3780   switch (elem_bt) {
 3781     case T_BYTE:  return false;
 3782     case T_SHORT: return !VM_Version::supports_avx512bw();
 3783     case T_INT:   return !VM_Version::supports_avx();
 3784     case T_LONG:  return vlen < 8 && !VM_Version::supports_avx512vl();
 3785     default:
 3786       ShouldNotReachHere();
 3787       return false;
 3788   }
 3789 }
 3790 
 3791 bool Matcher::mask_op_prefers_predicate(int opcode, const TypeVect* vt) {
 3792   // Prefer predicate if the mask type is "TypePVectMask".
 3793   return vt->isa_pvectmask() != nullptr;
 3794 }
 3795 
 3796 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 3797   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 3798   bool legacy = (generic_opnd->opcode() == LEGVEC);
 3799   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 3800       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 3801     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 3802     return new legVecZOper();
 3803   }
 3804   if (legacy) {
 3805     switch (ideal_reg) {
 3806       case Op_VecS: return new legVecSOper();
 3807       case Op_VecD: return new legVecDOper();
 3808       case Op_VecX: return new legVecXOper();
 3809       case Op_VecY: return new legVecYOper();
 3810       case Op_VecZ: return new legVecZOper();
 3811     }
 3812   } else {
 3813     switch (ideal_reg) {
 3814       case Op_VecS: return new vecSOper();
 3815       case Op_VecD: return new vecDOper();
 3816       case Op_VecX: return new vecXOper();
 3817       case Op_VecY: return new vecYOper();
 3818       case Op_VecZ: return new vecZOper();
 3819     }
 3820   }
 3821   ShouldNotReachHere();
 3822   return nullptr;
 3823 }
 3824 
 3825 bool Matcher::is_reg2reg_move(MachNode* m) {
 3826   switch (m->rule()) {
 3827     case MoveVec2Leg_rule:
 3828     case MoveLeg2Vec_rule:
 3829     case MoveF2VL_rule:
 3830     case MoveF2LEG_rule:
 3831     case MoveVL2F_rule:
 3832     case MoveLEG2F_rule:
 3833     case MoveD2VL_rule:
 3834     case MoveD2LEG_rule:
 3835     case MoveVL2D_rule:
 3836     case MoveLEG2D_rule:
 3837       return true;
 3838     default:
 3839       return false;
 3840   }
 3841 }
 3842 
 3843 bool Matcher::is_generic_vector(MachOper* opnd) {
 3844   switch (opnd->opcode()) {
 3845     case VEC:
 3846     case LEGVEC:
 3847       return true;
 3848     default:
 3849       return false;
 3850   }
 3851 }
 3852 
 3853 //------------------------------------------------------------------------
 3854 
 3855 const RegMask* Matcher::predicate_reg_mask(void) {
 3856   return &_VECTMASK_REG_mask;
 3857 }
 3858 
 3859 // Max vector size in bytes. 0 if not supported.
 3860 int Matcher::vector_width_in_bytes(BasicType bt) {
 3861   assert(is_java_primitive(bt), "only primitive type vectors");
 3862   // SSE2 supports 128bit vectors for all types.
 3863   // AVX2 supports 256bit vectors for all types.
 3864   // AVX2/EVEX supports 512bit vectors for all types.
 3865   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
 3866   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
 3867   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
 3868     size = (UseAVX > 2) ? 64 : 32;
 3869   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
 3870     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
 3871   // Use flag to limit vector size.
 3872   size = MIN2(size,(int)MaxVectorSize);
 3873   // Minimum 2 values in vector (or 4 for bytes).
 3874   switch (bt) {
 3875   case T_DOUBLE:
 3876   case T_LONG:
 3877     if (size < 16) return 0;
 3878     break;
 3879   case T_FLOAT:
 3880   case T_INT:
 3881     if (size < 8) return 0;
 3882     break;
 3883   case T_BOOLEAN:
 3884     if (size < 4) return 0;
 3885     break;
 3886   case T_CHAR:
 3887     if (size < 4) return 0;
 3888     break;
 3889   case T_BYTE:
 3890     if (size < 4) return 0;
 3891     break;
 3892   case T_SHORT:
 3893     if (size < 4) return 0;
 3894     break;
 3895   default:
 3896     ShouldNotReachHere();
 3897   }
 3898   return size;
 3899 }
 3900 
 3901 // Limits on vector size (number of elements) loaded into vector.
 3902 int Matcher::max_vector_size(const BasicType bt) {
 3903   return vector_width_in_bytes(bt)/type2aelembytes(bt);
 3904 }
 3905 int Matcher::min_vector_size(const BasicType bt) {
 3906   int max_size = max_vector_size(bt);
 3907   // Min size which can be loaded into vector is 4 bytes.
 3908   int size = (type2aelembytes(bt) == 1) ? 4 : 2;
 3909   // Support for calling svml double64 vectors
 3910   if (bt == T_DOUBLE) {
 3911     size = 1;
 3912   }
 3913   return MIN2(size,max_size);
 3914 }
 3915 
 3916 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) {
 3917   // Limit the max vector size for auto vectorization to 256 bits (32 bytes)
 3918   // by default on Cascade Lake
 3919   if (VM_Version::is_default_intel_cascade_lake()) {
 3920     return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt));
 3921   }
 3922   return Matcher::max_vector_size(bt);
 3923 }
 3924 
 3925 int Matcher::scalable_vector_reg_size(const BasicType bt) {
 3926   return -1;
 3927 }
 3928 
 3929 // Vector ideal reg corresponding to specified size in bytes
 3930 uint Matcher::vector_ideal_reg(int size) {
 3931   assert(MaxVectorSize >= size, "");
 3932   switch(size) {
 3933     case  4: return Op_VecS;
 3934     case  8: return Op_VecD;
 3935     case 16: return Op_VecX;
 3936     case 32: return Op_VecY;
 3937     case 64: return Op_VecZ;
 3938   }
 3939   ShouldNotReachHere();
 3940   return 0;
 3941 }
 3942 
 3943 // Check for shift by small constant as well
 3944 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
 3945   if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
 3946       shift->in(2)->get_int() <= 3 &&
 3947       // Are there other uses besides address expressions?
 3948       !matcher->is_visited(shift)) {
 3949     address_visited.set(shift->_idx); // Flag as address_visited
 3950     mstack.push(shift->in(2), Matcher::Visit);
 3951     Node *conv = shift->in(1);
 3952     // Allow Matcher to match the rule which bypass
 3953     // ConvI2L operation for an array index on LP64
 3954     // if the index value is positive.
 3955     if (conv->Opcode() == Op_ConvI2L &&
 3956         conv->as_Type()->type()->is_long()->_lo >= 0 &&
 3957         // Are there other uses besides address expressions?
 3958         !matcher->is_visited(conv)) {
 3959       address_visited.set(conv->_idx); // Flag as address_visited
 3960       mstack.push(conv->in(1), Matcher::Pre_Visit);
 3961     } else {
 3962       mstack.push(conv, Matcher::Pre_Visit);
 3963     }
 3964     return true;
 3965   }
 3966   return false;
 3967 }
 3968 
 3969 // This function identifies sub-graphs in which a 'load' node is
 3970 // input to two different nodes, and such that it can be matched
 3971 // with BMI instructions like blsi, blsr, etc.
 3972 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
 3973 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
 3974 // refers to the same node.
 3975 //
 3976 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
 3977 // This is a temporary solution until we make DAGs expressible in ADL.
 3978 template<typename ConType>
 3979 class FusedPatternMatcher {
 3980   Node* _op1_node;
 3981   Node* _mop_node;
 3982   int _con_op;
 3983 
 3984   static int match_next(Node* n, int next_op, int next_op_idx) {
 3985     if (n->in(1) == nullptr || n->in(2) == nullptr) {
 3986       return -1;
 3987     }
 3988 
 3989     if (next_op_idx == -1) { // n is commutative, try rotations
 3990       if (n->in(1)->Opcode() == next_op) {
 3991         return 1;
 3992       } else if (n->in(2)->Opcode() == next_op) {
 3993         return 2;
 3994       }
 3995     } else {
 3996       assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
 3997       if (n->in(next_op_idx)->Opcode() == next_op) {
 3998         return next_op_idx;
 3999       }
 4000     }
 4001     return -1;
 4002   }
 4003 
 4004  public:
 4005   FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
 4006     _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
 4007 
 4008   bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
 4009              int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
 4010              typename ConType::NativeType con_value) {
 4011     if (_op1_node->Opcode() != op1) {
 4012       return false;
 4013     }
 4014     if (_mop_node->outcnt() > 2) {
 4015       return false;
 4016     }
 4017     op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
 4018     if (op1_op2_idx == -1) {
 4019       return false;
 4020     }
 4021     // Memory operation must be the other edge
 4022     int op1_mop_idx = (op1_op2_idx & 1) + 1;
 4023 
 4024     // Check that the mop node is really what we want
 4025     if (_op1_node->in(op1_mop_idx) == _mop_node) {
 4026       Node* op2_node = _op1_node->in(op1_op2_idx);
 4027       if (op2_node->outcnt() > 1) {
 4028         return false;
 4029       }
 4030       assert(op2_node->Opcode() == op2, "Should be");
 4031       op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
 4032       if (op2_con_idx == -1) {
 4033         return false;
 4034       }
 4035       // Memory operation must be the other edge
 4036       int op2_mop_idx = (op2_con_idx & 1) + 1;
 4037       // Check that the memory operation is the same node
 4038       if (op2_node->in(op2_mop_idx) == _mop_node) {
 4039         // Now check the constant
 4040         const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
 4041         if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
 4042           return true;
 4043         }
 4044       }
 4045     }
 4046     return false;
 4047   }
 4048 };
 4049 
 4050 static bool is_bmi_pattern(Node* n, Node* m) {
 4051   assert(VM_Version::supports_bmi1() && VM_Version::supports_avx(), "sanity");
 4052   if (n != nullptr && m != nullptr) {
 4053     if (m->Opcode() == Op_LoadI) {
 4054       FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
 4055       return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
 4056              bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
 4057              bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
 4058     } else if (m->Opcode() == Op_LoadL) {
 4059       FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
 4060       return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
 4061              bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
 4062              bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
 4063     }
 4064   }
 4065   return false;
 4066 }
 4067 
 4068 // Should the matcher clone input 'm' of node 'n'?
 4069 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
 4070   // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
 4071   if (VM_Version::supports_bmi1() && VM_Version::supports_avx() && is_bmi_pattern(n, m)) {
 4072     mstack.push(m, Visit);
 4073     return true;
 4074   }
 4075   if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con)
 4076     mstack.push(m, Visit);           // m = ShiftCntV
 4077     return true;
 4078   }
 4079   if (is_encode_and_store_pattern(n, m)) {
 4080     mstack.push(m, Visit);
 4081     return true;
 4082   }
 4083   return false;
 4084 }
 4085 
 4086 // Should the Matcher clone shifts on addressing modes, expecting them
 4087 // to be subsumed into complex addressing expressions or compute them
 4088 // into registers?
 4089 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
 4090   Node *off = m->in(AddPNode::Offset);
 4091   if (off->is_Con()) {
 4092     address_visited.test_set(m->_idx); // Flag as address_visited
 4093     Node *adr = m->in(AddPNode::Address);
 4094 
 4095     // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset.
 4096     // AtomicAdd is not an addressing expression.
 4097     // Cheap to find it by looking for screwy base.
 4098     if (adr->is_AddP() &&
 4099         !adr->in(AddPNode::Base)->is_top() &&
 4100         !adr->in(AddPNode::Offset)->is_Con() &&
 4101         off->get_long() == (int) (off->get_long()) && // immL32
 4102         // Are there other uses besides address expressions?
 4103         !is_visited(adr)) {
 4104       address_visited.set(adr->_idx); // Flag as address_visited
 4105       Node *shift = adr->in(AddPNode::Offset);
 4106       if (!clone_shift(shift, this, mstack, address_visited)) {
 4107         mstack.push(shift, Pre_Visit);
 4108       }
 4109       mstack.push(adr->in(AddPNode::Address), Pre_Visit);
 4110       mstack.push(adr->in(AddPNode::Base), Pre_Visit);
 4111     } else {
 4112       mstack.push(adr, Pre_Visit);
 4113     }
 4114 
 4115     // Clone X+offset as it also folds into most addressing expressions
 4116     mstack.push(off, Visit);
 4117     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4118     return true;
 4119   } else if (clone_shift(off, this, mstack, address_visited)) {
 4120     address_visited.test_set(m->_idx); // Flag as address_visited
 4121     mstack.push(m->in(AddPNode::Address), Pre_Visit);
 4122     mstack.push(m->in(AddPNode::Base), Pre_Visit);
 4123     return true;
 4124   }
 4125   return false;
 4126 }
 4127 
 4128 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
 4129   switch (bt) {
 4130     case BoolTest::eq:
 4131       return Assembler::eq;
 4132     case BoolTest::ne:
 4133       return Assembler::neq;
 4134     case BoolTest::le:
 4135     case BoolTest::ule:
 4136       return Assembler::le;
 4137     case BoolTest::ge:
 4138     case BoolTest::uge:
 4139       return Assembler::nlt;
 4140     case BoolTest::lt:
 4141     case BoolTest::ult:
 4142       return Assembler::lt;
 4143     case BoolTest::gt:
 4144     case BoolTest::ugt:
 4145       return Assembler::nle;
 4146     default : ShouldNotReachHere(); return Assembler::_false;
 4147   }
 4148 }
 4149 
 4150 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
 4151   switch (bt) {
 4152   case BoolTest::eq: return Assembler::EQ_OQ;  // ordered non-signaling
 4153   // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
 4154   case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
 4155   case BoolTest::le: return Assembler::LE_OQ;  // ordered non-signaling
 4156   case BoolTest::ge: return Assembler::GE_OQ;  // ordered non-signaling
 4157   case BoolTest::lt: return Assembler::LT_OQ;  // ordered non-signaling
 4158   case BoolTest::gt: return Assembler::GT_OQ;  // ordered non-signaling
 4159   default: ShouldNotReachHere(); return Assembler::FALSE_OS;
 4160   }
 4161 }
 4162 
 4163 // Helper methods for MachSpillCopyNode::implementation().
 4164 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
 4165                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
 4166   assert(ireg == Op_VecS || // 32bit vector
 4167          ((src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
 4168           (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi),
 4169          "no non-adjacent vector moves" );
 4170   if (masm) {
 4171     switch (ireg) {
 4172     case Op_VecS: // copy whole register
 4173     case Op_VecD:
 4174     case Op_VecX:
 4175       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4176         __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4177       } else {
 4178         __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4179      }
 4180       break;
 4181     case Op_VecY:
 4182       if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4183         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
 4184       } else {
 4185         __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
 4186      }
 4187       break;
 4188     case Op_VecZ:
 4189       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
 4190       break;
 4191     default:
 4192       ShouldNotReachHere();
 4193     }
 4194 #ifndef PRODUCT
 4195   } else {
 4196     switch (ireg) {
 4197     case Op_VecS:
 4198     case Op_VecD:
 4199     case Op_VecX:
 4200       st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4201       break;
 4202     case Op_VecY:
 4203     case Op_VecZ:
 4204       st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 4205       break;
 4206     default:
 4207       ShouldNotReachHere();
 4208     }
 4209 #endif
 4210   }
 4211 }
 4212 
 4213 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
 4214                      int stack_offset, int reg, uint ireg, outputStream* st) {
 4215   if (masm) {
 4216     if (is_load) {
 4217       switch (ireg) {
 4218       case Op_VecS:
 4219         __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4220         break;
 4221       case Op_VecD:
 4222         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4223         break;
 4224       case Op_VecX:
 4225         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4226           __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4227         } else {
 4228           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4229           __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4230         }
 4231         break;
 4232       case Op_VecY:
 4233         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4234           __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
 4235         } else {
 4236           __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4237           __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
 4238         }
 4239         break;
 4240       case Op_VecZ:
 4241         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
 4242         break;
 4243       default:
 4244         ShouldNotReachHere();
 4245       }
 4246     } else { // store
 4247       switch (ireg) {
 4248       case Op_VecS:
 4249         __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4250         break;
 4251       case Op_VecD:
 4252         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4253         break;
 4254       case Op_VecX:
 4255         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4256           __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4257         }
 4258         else {
 4259           __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4260         }
 4261         break;
 4262       case Op_VecY:
 4263         if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
 4264           __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
 4265         }
 4266         else {
 4267           __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
 4268         }
 4269         break;
 4270       case Op_VecZ:
 4271         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
 4272         break;
 4273       default:
 4274         ShouldNotReachHere();
 4275       }
 4276     }
 4277 #ifndef PRODUCT
 4278   } else {
 4279     if (is_load) {
 4280       switch (ireg) {
 4281       case Op_VecS:
 4282         st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4283         break;
 4284       case Op_VecD:
 4285         st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4286         break;
 4287        case Op_VecX:
 4288         st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4289         break;
 4290       case Op_VecY:
 4291       case Op_VecZ:
 4292         st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
 4293         break;
 4294       default:
 4295         ShouldNotReachHere();
 4296       }
 4297     } else { // store
 4298       switch (ireg) {
 4299       case Op_VecS:
 4300         st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4301         break;
 4302       case Op_VecD:
 4303         st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4304         break;
 4305        case Op_VecX:
 4306         st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4307         break;
 4308       case Op_VecY:
 4309       case Op_VecZ:
 4310         st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
 4311         break;
 4312       default:
 4313         ShouldNotReachHere();
 4314       }
 4315     }
 4316 #endif
 4317   }
 4318 }
 4319 
 4320 template <class T>
 4321 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) {
 4322   int size = type2aelembytes(bt) * len;
 4323   GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0);
 4324   for (int i = 0; i < len; i++) {
 4325     int offset = i * type2aelembytes(bt);
 4326     switch (bt) {
 4327       case T_BYTE: val->at(i) = con; break;
 4328       case T_SHORT: {
 4329         jshort c = con;
 4330         memcpy(val->adr_at(offset), &c, sizeof(jshort));
 4331         break;
 4332       }
 4333       case T_INT: {
 4334         jint c = con;
 4335         memcpy(val->adr_at(offset), &c, sizeof(jint));
 4336         break;
 4337       }
 4338       case T_LONG: {
 4339         jlong c = con;
 4340         memcpy(val->adr_at(offset), &c, sizeof(jlong));
 4341         break;
 4342       }
 4343       case T_FLOAT: {
 4344         jfloat c = con;
 4345         memcpy(val->adr_at(offset), &c, sizeof(jfloat));
 4346         break;
 4347       }
 4348       case T_DOUBLE: {
 4349         jdouble c = con;
 4350         memcpy(val->adr_at(offset), &c, sizeof(jdouble));
 4351         break;
 4352       }
 4353       default: assert(false, "%s", type2name(bt));
 4354     }
 4355   }
 4356   return val;
 4357 }
 4358 
 4359 static inline jlong high_bit_set(BasicType bt) {
 4360   switch (bt) {
 4361     case T_BYTE:  return 0x8080808080808080;
 4362     case T_SHORT: return 0x8000800080008000;
 4363     case T_INT:   return 0x8000000080000000;
 4364     case T_LONG:  return 0x8000000000000000;
 4365     default:
 4366       ShouldNotReachHere();
 4367       return 0;
 4368   }
 4369 }
 4370 
 4371 #ifndef PRODUCT
 4372   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
 4373     st->print("nop \t# %d bytes pad for loops and calls", _count);
 4374   }
 4375 #endif
 4376 
 4377   void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const {
 4378     __ nop(_count);
 4379   }
 4380 
 4381   uint MachNopNode::size(PhaseRegAlloc*) const {
 4382     return _count;
 4383   }
 4384 
 4385 #ifndef PRODUCT
 4386   void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
 4387     st->print("# breakpoint");
 4388   }
 4389 #endif
 4390 
 4391   void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const {
 4392     __ int3();
 4393   }
 4394 
 4395   uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
 4396     return MachNode::size(ra_);
 4397   }
 4398 
 4399 %}
 4400 
 4401 //----------ENCODING BLOCK-----------------------------------------------------
 4402 // This block specifies the encoding classes used by the compiler to
 4403 // output byte streams.  Encoding classes are parameterized macros
 4404 // used by Machine Instruction Nodes in order to generate the bit
 4405 // encoding of the instruction.  Operands specify their base encoding
 4406 // interface with the interface keyword.  There are currently
 4407 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
 4408 // COND_INTER.  REG_INTER causes an operand to generate a function
 4409 // which returns its register number when queried.  CONST_INTER causes
 4410 // an operand to generate a function which returns the value of the
 4411 // constant when queried.  MEMORY_INTER causes an operand to generate
 4412 // four functions which return the Base Register, the Index Register,
 4413 // the Scale Value, and the Offset Value of the operand when queried.
 4414 // COND_INTER causes an operand to generate six functions which return
 4415 // the encoding code (ie - encoding bits for the instruction)
 4416 // associated with each basic boolean condition for a conditional
 4417 // instruction.
 4418 //
 4419 // Instructions specify two basic values for encoding.  Again, a
 4420 // function is available to check if the constant displacement is an
 4421 // oop. They use the ins_encode keyword to specify their encoding
 4422 // classes (which must be a sequence of enc_class names, and their
 4423 // parameters, specified in the encoding block), and they use the
 4424 // opcode keyword to specify, in order, their primary, secondary, and
 4425 // tertiary opcode.  Only the opcode sections which a particular
 4426 // instruction needs for encoding need to be specified.
 4427 encode %{
 4428   enc_class cdql_enc(no_rax_rdx_RegI div)
 4429   %{
 4430     // Full implementation of Java idiv and irem; checks for
 4431     // special case as described in JVM spec., p.243 & p.271.
 4432     //
 4433     //         normal case                           special case
 4434     //
 4435     // input : rax: dividend                         min_int
 4436     //         reg: divisor                          -1
 4437     //
 4438     // output: rax: quotient  (= rax idiv reg)       min_int
 4439     //         rdx: remainder (= rax irem reg)       0
 4440     //
 4441     //  Code sequnce:
 4442     //
 4443     //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
 4444     //    5:   75 07/08                jne    e <normal>
 4445     //    7:   33 d2                   xor    %edx,%edx
 4446     //  [div >= 8 -> offset + 1]
 4447     //  [REX_B]
 4448     //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
 4449     //    c:   74 03/04                je     11 <done>
 4450     // 000000000000000e <normal>:
 4451     //    e:   99                      cltd
 4452     //  [div >= 8 -> offset + 1]
 4453     //  [REX_B]
 4454     //    f:   f7 f9                   idiv   $div
 4455     // 0000000000000011 <done>:
 4456     Label normal;
 4457     Label done;
 4458 
 4459     // cmp    $0x80000000,%eax
 4460     __ cmpl(as_Register(RAX_enc), 0x80000000);
 4461 
 4462     // jne    e <normal>
 4463     __ jccb(Assembler::notEqual, normal);
 4464 
 4465     // xor    %edx,%edx
 4466     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4467 
 4468     // cmp    $0xffffffffffffffff,%ecx
 4469     __ cmpl($div$$Register, -1);
 4470 
 4471     // je     11 <done>
 4472     __ jccb(Assembler::equal, done);
 4473 
 4474     // <normal>
 4475     // cltd
 4476     __ bind(normal);
 4477     __ cdql();
 4478 
 4479     // idivl
 4480     // <done>
 4481     __ idivl($div$$Register);
 4482     __ bind(done);
 4483   %}
 4484 
 4485   enc_class cdqq_enc(no_rax_rdx_RegL div)
 4486   %{
 4487     // Full implementation of Java ldiv and lrem; checks for
 4488     // special case as described in JVM spec., p.243 & p.271.
 4489     //
 4490     //         normal case                           special case
 4491     //
 4492     // input : rax: dividend                         min_long
 4493     //         reg: divisor                          -1
 4494     //
 4495     // output: rax: quotient  (= rax idiv reg)       min_long
 4496     //         rdx: remainder (= rax irem reg)       0
 4497     //
 4498     //  Code sequnce:
 4499     //
 4500     //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
 4501     //    7:   00 00 80
 4502     //    a:   48 39 d0                cmp    %rdx,%rax
 4503     //    d:   75 08                   jne    17 <normal>
 4504     //    f:   33 d2                   xor    %edx,%edx
 4505     //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
 4506     //   15:   74 05                   je     1c <done>
 4507     // 0000000000000017 <normal>:
 4508     //   17:   48 99                   cqto
 4509     //   19:   48 f7 f9                idiv   $div
 4510     // 000000000000001c <done>:
 4511     Label normal;
 4512     Label done;
 4513 
 4514     // mov    $0x8000000000000000,%rdx
 4515     __ mov64(as_Register(RDX_enc), 0x8000000000000000);
 4516 
 4517     // cmp    %rdx,%rax
 4518     __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));
 4519 
 4520     // jne    17 <normal>
 4521     __ jccb(Assembler::notEqual, normal);
 4522 
 4523     // xor    %edx,%edx
 4524     __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));
 4525 
 4526     // cmp    $0xffffffffffffffff,$div
 4527     __ cmpq($div$$Register, -1);
 4528 
 4529     // je     1e <done>
 4530     __ jccb(Assembler::equal, done);
 4531 
 4532     // <normal>
 4533     // cqto
 4534     __ bind(normal);
 4535     __ cdqq();
 4536 
 4537     // idivq (note: must be emitted by the user of this rule)
 4538     // <done>
 4539     __ idivq($div$$Register);
 4540     __ bind(done);
 4541   %}
 4542 
 4543   enc_class clear_avx %{
 4544     DEBUG_ONLY(int off0 = __ offset());
 4545     if (generate_vzeroupper(Compile::current())) {
 4546       // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
 4547       // Clear upper bits of YMM registers when current compiled code uses
 4548       // wide vectors to avoid AVX <-> SSE transition penalty during call.
 4549       __ vzeroupper();
 4550     }
 4551     DEBUG_ONLY(int off1 = __ offset());
 4552     assert(off1 - off0 == clear_avx_size(), "correct size prediction");
 4553   %}
 4554 
 4555   enc_class Java_To_Runtime(method meth) %{
 4556     __ lea(r10, RuntimeAddress((address)$meth$$method));
 4557     __ call(r10);
 4558     __ post_call_nop();
 4559   %}
 4560 
 4561   enc_class Java_Static_Call(method meth)
 4562   %{
 4563     // JAVA STATIC CALL
 4564     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
 4565     // determine who we intended to call.
 4566     if (!_method) {
 4567       __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
 4568     } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
 4569       // The NOP here is purely to ensure that eliding a call to
 4570       // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
 4571       __ nop(5);
 4572       __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
 4573     } else {
 4574       int method_index = resolved_method_index(masm);
 4575       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 4576                                                   : static_call_Relocation::spec(method_index);
 4577       address mark = __ pc();
 4578       int call_offset = __ offset();
 4579       __ call(AddressLiteral(CAST_FROM_FN_PTR(address, $meth$$method), rspec));
 4580       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 4581         // Calls of the same statically bound method can share
 4582         // a stub to the interpreter.
 4583         __ code()->shared_stub_to_interp_for(_method, call_offset);
 4584       } else {
 4585         // Emit stubs for static call.
 4586         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 4587         __ clear_inst_mark();
 4588         if (stub == nullptr) {
 4589           ciEnv::current()->record_failure("CodeCache is full");
 4590           return;
 4591         }
 4592       }
 4593     }
 4594     __ post_call_nop();
 4595   %}
 4596 
 4597   enc_class Java_Dynamic_Call(method meth) %{
 4598     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 4599     __ post_call_nop();
 4600   %}
 4601 
 4602   enc_class call_epilog %{
 4603     if (VerifyStackAtCalls) {
 4604       // Check that stack depth is unchanged: find majik cookie on stack
 4605       int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
 4606       Label L;
 4607       __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
 4608       __ jccb(Assembler::equal, L);
 4609       // Die if stack mismatch
 4610       __ int3();
 4611       __ bind(L);
 4612     }
 4613   %}
 4614 
 4615 %}
 4616 
 4617 //----------FRAME--------------------------------------------------------------
 4618 // Definition of frame structure and management information.
 4619 //
 4620 //  S T A C K   L A Y O U T    Allocators stack-slot number
 4621 //                             |   (to get allocators register number
 4622 //  G  Owned by    |        |  v    add OptoReg::stack0())
 4623 //  r   CALLER     |        |
 4624 //  o     |        +--------+      pad to even-align allocators stack-slot
 4625 //  w     V        |  pad0  |        numbers; owned by CALLER
 4626 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 4627 //  h     ^        |   in   |  5
 4628 //        |        |  args  |  4   Holes in incoming args owned by SELF
 4629 //  |     |        |        |  3
 4630 //  |     |        +--------+
 4631 //  V     |        | old out|      Empty on Intel, window on Sparc
 4632 //        |    old |preserve|      Must be even aligned.
 4633 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 4634 //        |        |   in   |  3   area for Intel ret address
 4635 //     Owned by    |preserve|      Empty on Sparc.
 4636 //       SELF      +--------+
 4637 //        |        |  pad2  |  2   pad to align old SP
 4638 //        |        +--------+  1
 4639 //        |        | locks  |  0
 4640 //        |        +--------+----> OptoReg::stack0(), even aligned
 4641 //        |        |  pad1  | 11   pad to align new SP
 4642 //        |        +--------+
 4643 //        |        |        | 10
 4644 //        |        | spills |  9   spills
 4645 //        V        |        |  8   (pad0 slot for callee)
 4646 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 4647 //        ^        |  out   |  7
 4648 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 4649 //     Owned by    +--------+
 4650 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 4651 //        |    new |preserve|      Must be even-aligned.
 4652 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 4653 //        |        |        |
 4654 //
 4655 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 4656 //         known from SELF's arguments and the Java calling convention.
 4657 //         Region 6-7 is determined per call site.
 4658 // Note 2: If the calling convention leaves holes in the incoming argument
 4659 //         area, those holes are owned by SELF.  Holes in the outgoing area
 4660 //         are owned by the CALLEE.  Holes should not be necessary in the
 4661 //         incoming area, as the Java calling convention is completely under
 4662 //         the control of the AD file.  Doubles can be sorted and packed to
 4663 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 4664 //         varargs C calling conventions.
 4665 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 4666 //         even aligned with pad0 as needed.
 4667 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 4668 //         region 6-11 is even aligned; it may be padded out more so that
 4669 //         the region from SP to FP meets the minimum stack alignment.
 4670 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
 4671 //         alignment.  Region 11, pad1, may be dynamically extended so that
 4672 //         SP meets the minimum alignment.
 4673 
 4674 frame
 4675 %{
 4676   // These three registers define part of the calling convention
 4677   // between compiled code and the interpreter.
 4678   inline_cache_reg(RAX);                // Inline Cache Register
 4679 
 4680   // Optional: name the operand used by cisc-spilling to access
 4681   // [stack_pointer + offset]
 4682   cisc_spilling_operand_name(indOffset32);
 4683 
 4684   // Number of stack slots consumed by locking an object
 4685   sync_stack_slots(2);
 4686 
 4687   // Compiled code's Frame Pointer
 4688   frame_pointer(RSP);
 4689 
 4690   // Stack alignment requirement
 4691   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 4692 
 4693   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 4694   // for calls to C.  Supports the var-args backing area for register parms.
 4695   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
 4696 
 4697   // The after-PROLOG location of the return address.  Location of
 4698   // return address specifies a type (REG or STACK) and a number
 4699   // representing the register number (i.e. - use a register name) or
 4700   // stack slot.
 4701   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 4702   // Otherwise, it is above the locks and verification slot and alignment word
 4703   return_addr(STACK - 2 +
 4704               align_up((Compile::current()->in_preserve_stack_slots() +
 4705                         Compile::current()->fixed_slots()),
 4706                        stack_alignment_in_slots()));
 4707 
 4708   // Location of compiled Java return values.  Same as C for now.
 4709   return_value
 4710   %{
 4711     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
 4712            "only return normal values");
 4713 
 4714     static const int lo[Op_RegL + 1] = {
 4715       0,
 4716       0,
 4717       RAX_num,  // Op_RegN
 4718       RAX_num,  // Op_RegI
 4719       RAX_num,  // Op_RegP
 4720       XMM0_num, // Op_RegF
 4721       XMM0_num, // Op_RegD
 4722       RAX_num   // Op_RegL
 4723     };
 4724     static const int hi[Op_RegL + 1] = {
 4725       0,
 4726       0,
 4727       OptoReg::Bad, // Op_RegN
 4728       OptoReg::Bad, // Op_RegI
 4729       RAX_H_num,    // Op_RegP
 4730       OptoReg::Bad, // Op_RegF
 4731       XMM0b_num,    // Op_RegD
 4732       RAX_H_num     // Op_RegL
 4733     };
 4734     // Excluded flags and vector registers.
 4735     assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
 4736     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
 4737   %}
 4738 %}
 4739 
 4740 //----------ATTRIBUTES---------------------------------------------------------
 4741 //----------Operand Attributes-------------------------------------------------
 4742 op_attrib op_cost(0);        // Required cost attribute
 4743 
 4744 //----------Instruction Attributes---------------------------------------------
 4745 ins_attrib ins_cost(100);       // Required cost attribute
 4746 ins_attrib ins_size(8);         // Required size attribute (in bits)
 4747 ins_attrib ins_short_branch(0); // Required flag: is this instruction
 4748                                 // a non-matching short branch variant
 4749                                 // of some long branch?
 4750 ins_attrib ins_alignment(1);    // Required alignment attribute (must
 4751                                 // be a power of 2) specifies the
 4752                                 // alignment that some part of the
 4753                                 // instruction (not necessarily the
 4754                                 // start) requires.  If > 1, a
 4755                                 // compute_padding() function must be
 4756                                 // provided for the instruction
 4757 
 4758 // Whether this node is expanded during code emission into a sequence of
 4759 // instructions and the first instruction can perform an implicit null check.
 4760 ins_attrib ins_is_late_expanded_null_check_candidate(false);
 4761 
 4762 //----------OPERANDS-----------------------------------------------------------
 4763 // Operand definitions must precede instruction definitions for correct parsing
 4764 // in the ADLC because operands constitute user defined types which are used in
 4765 // instruction definitions.
 4766 
 4767 //----------Simple Operands----------------------------------------------------
 4768 // Immediate Operands
 4769 // Integer Immediate
 4770 operand immI()
 4771 %{
 4772   match(ConI);
 4773 
 4774   op_cost(10);
 4775   format %{ %}
 4776   interface(CONST_INTER);
 4777 %}
 4778 
 4779 // Constant for test vs zero
 4780 operand immI_0()
 4781 %{
 4782   predicate(n->get_int() == 0);
 4783   match(ConI);
 4784 
 4785   op_cost(0);
 4786   format %{ %}
 4787   interface(CONST_INTER);
 4788 %}
 4789 
 4790 // Constant for increment
 4791 operand immI_1()
 4792 %{
 4793   predicate(n->get_int() == 1);
 4794   match(ConI);
 4795 
 4796   op_cost(0);
 4797   format %{ %}
 4798   interface(CONST_INTER);
 4799 %}
 4800 
 4801 // Constant for decrement
 4802 operand immI_M1()
 4803 %{
 4804   predicate(n->get_int() == -1);
 4805   match(ConI);
 4806 
 4807   op_cost(0);
 4808   format %{ %}
 4809   interface(CONST_INTER);
 4810 %}
 4811 
 4812 operand immI_2()
 4813 %{
 4814   predicate(n->get_int() == 2);
 4815   match(ConI);
 4816 
 4817   op_cost(0);
 4818   format %{ %}
 4819   interface(CONST_INTER);
 4820 %}
 4821 
 4822 operand immI_4()
 4823 %{
 4824   predicate(n->get_int() == 4);
 4825   match(ConI);
 4826 
 4827   op_cost(0);
 4828   format %{ %}
 4829   interface(CONST_INTER);
 4830 %}
 4831 
 4832 operand immI_8()
 4833 %{
 4834   predicate(n->get_int() == 8);
 4835   match(ConI);
 4836 
 4837   op_cost(0);
 4838   format %{ %}
 4839   interface(CONST_INTER);
 4840 %}
 4841 
 4842 // Valid scale values for addressing modes
 4843 operand immI2()
 4844 %{
 4845   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 4846   match(ConI);
 4847 
 4848   format %{ %}
 4849   interface(CONST_INTER);
 4850 %}
 4851 
 4852 operand immU7()
 4853 %{
 4854   predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
 4855   match(ConI);
 4856 
 4857   op_cost(5);
 4858   format %{ %}
 4859   interface(CONST_INTER);
 4860 %}
 4861 
 4862 operand immI8()
 4863 %{
 4864   predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
 4865   match(ConI);
 4866 
 4867   op_cost(5);
 4868   format %{ %}
 4869   interface(CONST_INTER);
 4870 %}
 4871 
 4872 operand immU8()
 4873 %{
 4874   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 4875   match(ConI);
 4876 
 4877   op_cost(5);
 4878   format %{ %}
 4879   interface(CONST_INTER);
 4880 %}
 4881 
 4882 operand immI16()
 4883 %{
 4884   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 4885   match(ConI);
 4886 
 4887   op_cost(10);
 4888   format %{ %}
 4889   interface(CONST_INTER);
 4890 %}
 4891 
 4892 // Int Immediate non-negative
 4893 operand immU31()
 4894 %{
 4895   predicate(n->get_int() >= 0);
 4896   match(ConI);
 4897 
 4898   op_cost(0);
 4899   format %{ %}
 4900   interface(CONST_INTER);
 4901 %}
 4902 
 4903 // Pointer Immediate
 4904 operand immP()
 4905 %{
 4906   match(ConP);
 4907 
 4908   op_cost(10);
 4909   format %{ %}
 4910   interface(CONST_INTER);
 4911 %}
 4912 
 4913 // Null Pointer Immediate
 4914 operand immP0()
 4915 %{
 4916   predicate(n->get_ptr() == 0);
 4917   match(ConP);
 4918 
 4919   op_cost(5);
 4920   format %{ %}
 4921   interface(CONST_INTER);
 4922 %}
 4923 
 4924 // Pointer Immediate
 4925 operand immN() %{
 4926   match(ConN);
 4927 
 4928   op_cost(10);
 4929   format %{ %}
 4930   interface(CONST_INTER);
 4931 %}
 4932 
 4933 operand immNKlass() %{
 4934   match(ConNKlass);
 4935 
 4936   op_cost(10);
 4937   format %{ %}
 4938   interface(CONST_INTER);
 4939 %}
 4940 
 4941 // Null Pointer Immediate
 4942 operand immN0() %{
 4943   predicate(n->get_narrowcon() == 0);
 4944   match(ConN);
 4945 
 4946   op_cost(5);
 4947   format %{ %}
 4948   interface(CONST_INTER);
 4949 %}
 4950 
 4951 operand immP31()
 4952 %{
 4953   predicate(n->as_Type()->type()->reloc() == relocInfo::none
 4954             && (n->get_ptr() >> 31) == 0);
 4955   match(ConP);
 4956 
 4957   op_cost(5);
 4958   format %{ %}
 4959   interface(CONST_INTER);
 4960 %}
 4961 
 4962 
 4963 // Long Immediate
 4964 operand immL()
 4965 %{
 4966   match(ConL);
 4967 
 4968   op_cost(20);
 4969   format %{ %}
 4970   interface(CONST_INTER);
 4971 %}
 4972 
 4973 // Long Immediate 8-bit
 4974 operand immL8()
 4975 %{
 4976   predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
 4977   match(ConL);
 4978 
 4979   op_cost(5);
 4980   format %{ %}
 4981   interface(CONST_INTER);
 4982 %}
 4983 
 4984 // Long Immediate 32-bit unsigned
 4985 operand immUL32()
 4986 %{
 4987   predicate(n->get_long() == (unsigned int) (n->get_long()));
 4988   match(ConL);
 4989 
 4990   op_cost(10);
 4991   format %{ %}
 4992   interface(CONST_INTER);
 4993 %}
 4994 
 4995 // Long Immediate 32-bit signed
 4996 operand immL32()
 4997 %{
 4998   predicate(n->get_long() == (int) (n->get_long()));
 4999   match(ConL);
 5000 
 5001   op_cost(15);
 5002   format %{ %}
 5003   interface(CONST_INTER);
 5004 %}
 5005 
 5006 operand immL_Pow2()
 5007 %{
 5008   predicate(is_power_of_2((julong)n->get_long()));
 5009   match(ConL);
 5010 
 5011   op_cost(15);
 5012   format %{ %}
 5013   interface(CONST_INTER);
 5014 %}
 5015 
 5016 operand immL_NotPow2()
 5017 %{
 5018   predicate(is_power_of_2((julong)~n->get_long()));
 5019   match(ConL);
 5020 
 5021   op_cost(15);
 5022   format %{ %}
 5023   interface(CONST_INTER);
 5024 %}
 5025 
 5026 // Long Immediate zero
 5027 operand immL0()
 5028 %{
 5029   predicate(n->get_long() == 0L);
 5030   match(ConL);
 5031 
 5032   op_cost(10);
 5033   format %{ %}
 5034   interface(CONST_INTER);
 5035 %}
 5036 
 5037 // Constant for increment
 5038 operand immL1()
 5039 %{
 5040   predicate(n->get_long() == 1);
 5041   match(ConL);
 5042 
 5043   format %{ %}
 5044   interface(CONST_INTER);
 5045 %}
 5046 
 5047 // Constant for decrement
 5048 operand immL_M1()
 5049 %{
 5050   predicate(n->get_long() == -1);
 5051   match(ConL);
 5052 
 5053   format %{ %}
 5054   interface(CONST_INTER);
 5055 %}
 5056 
 5057 // Long Immediate: low 32-bit mask
 5058 operand immL_32bits()
 5059 %{
 5060   predicate(n->get_long() == 0xFFFFFFFFL);
 5061   match(ConL);
 5062   op_cost(20);
 5063 
 5064   format %{ %}
 5065   interface(CONST_INTER);
 5066 %}
 5067 
 5068 // Int Immediate: 2^n-1, positive
 5069 operand immI_Pow2M1()
 5070 %{
 5071   predicate((n->get_int() > 0)
 5072             && is_power_of_2((juint)n->get_int() + 1));
 5073   match(ConI);
 5074 
 5075   op_cost(20);
 5076   format %{ %}
 5077   interface(CONST_INTER);
 5078 %}
 5079 
 5080 // Float Immediate zero
 5081 operand immF0()
 5082 %{
 5083   predicate(jint_cast(n->getf()) == 0);
 5084   match(ConF);
 5085 
 5086   op_cost(5);
 5087   format %{ %}
 5088   interface(CONST_INTER);
 5089 %}
 5090 
 5091 // Float Immediate
 5092 operand immF()
 5093 %{
 5094   match(ConF);
 5095 
 5096   op_cost(15);
 5097   format %{ %}
 5098   interface(CONST_INTER);
 5099 %}
 5100 
 5101 // Half Float Immediate
 5102 operand immH()
 5103 %{
 5104   match(ConH);
 5105 
 5106   op_cost(15);
 5107   format %{ %}
 5108   interface(CONST_INTER);
 5109 %}
 5110 
 5111 // Double Immediate zero
 5112 operand immD0()
 5113 %{
 5114   predicate(jlong_cast(n->getd()) == 0);
 5115   match(ConD);
 5116 
 5117   op_cost(5);
 5118   format %{ %}
 5119   interface(CONST_INTER);
 5120 %}
 5121 
 5122 // Double Immediate
 5123 operand immD()
 5124 %{
 5125   match(ConD);
 5126 
 5127   op_cost(15);
 5128   format %{ %}
 5129   interface(CONST_INTER);
 5130 %}
 5131 
 5132 // Immediates for special shifts (sign extend)
 5133 
 5134 // Constants for increment
 5135 operand immI_16()
 5136 %{
 5137   predicate(n->get_int() == 16);
 5138   match(ConI);
 5139 
 5140   format %{ %}
 5141   interface(CONST_INTER);
 5142 %}
 5143 
 5144 operand immI_24()
 5145 %{
 5146   predicate(n->get_int() == 24);
 5147   match(ConI);
 5148 
 5149   format %{ %}
 5150   interface(CONST_INTER);
 5151 %}
 5152 
 5153 // Constant for byte-wide masking
 5154 operand immI_255()
 5155 %{
 5156   predicate(n->get_int() == 255);
 5157   match(ConI);
 5158 
 5159   format %{ %}
 5160   interface(CONST_INTER);
 5161 %}
 5162 
 5163 // Constant for short-wide masking
 5164 operand immI_65535()
 5165 %{
 5166   predicate(n->get_int() == 65535);
 5167   match(ConI);
 5168 
 5169   format %{ %}
 5170   interface(CONST_INTER);
 5171 %}
 5172 
 5173 // Constant for byte-wide masking
 5174 operand immL_255()
 5175 %{
 5176   predicate(n->get_long() == 255);
 5177   match(ConL);
 5178 
 5179   format %{ %}
 5180   interface(CONST_INTER);
 5181 %}
 5182 
 5183 // Constant for short-wide masking
 5184 operand immL_65535()
 5185 %{
 5186   predicate(n->get_long() == 65535);
 5187   match(ConL);
 5188 
 5189   format %{ %}
 5190   interface(CONST_INTER);
 5191 %}
 5192 
 5193 // AOT Runtime Constants Address
 5194 operand immAOTRuntimeConstantsAddress()
 5195 %{
 5196   // Check if the address is in the range of AOT Runtime Constants
 5197   predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
 5198   match(ConP);
 5199 
 5200   op_cost(0);
 5201   format %{ %}
 5202   interface(CONST_INTER);
 5203 %}
 5204 
 5205 operand kReg()
 5206 %{
 5207   constraint(ALLOC_IN_RC(vectmask_reg));
 5208   match(RegVectMask);
 5209   format %{%}
 5210   interface(REG_INTER);
 5211 %}
 5212 
 5213 // Register Operands
 5214 // Integer Register
 5215 operand rRegI()
 5216 %{
 5217   constraint(ALLOC_IN_RC(int_reg));
 5218   match(RegI);
 5219 
 5220   match(rax_RegI);
 5221   match(rbx_RegI);
 5222   match(rcx_RegI);
 5223   match(rdx_RegI);
 5224   match(rdi_RegI);
 5225 
 5226   format %{ %}
 5227   interface(REG_INTER);
 5228 %}
 5229 
 5230 // Special Registers
 5231 operand rax_RegI()
 5232 %{
 5233   constraint(ALLOC_IN_RC(int_rax_reg));
 5234   match(RegI);
 5235   match(rRegI);
 5236 
 5237   format %{ "RAX" %}
 5238   interface(REG_INTER);
 5239 %}
 5240 
 5241 // Special Registers
 5242 operand rbx_RegI()
 5243 %{
 5244   constraint(ALLOC_IN_RC(int_rbx_reg));
 5245   match(RegI);
 5246   match(rRegI);
 5247 
 5248   format %{ "RBX" %}
 5249   interface(REG_INTER);
 5250 %}
 5251 
 5252 operand rcx_RegI()
 5253 %{
 5254   constraint(ALLOC_IN_RC(int_rcx_reg));
 5255   match(RegI);
 5256   match(rRegI);
 5257 
 5258   format %{ "RCX" %}
 5259   interface(REG_INTER);
 5260 %}
 5261 
 5262 operand rdx_RegI()
 5263 %{
 5264   constraint(ALLOC_IN_RC(int_rdx_reg));
 5265   match(RegI);
 5266   match(rRegI);
 5267 
 5268   format %{ "RDX" %}
 5269   interface(REG_INTER);
 5270 %}
 5271 
 5272 operand rdi_RegI()
 5273 %{
 5274   constraint(ALLOC_IN_RC(int_rdi_reg));
 5275   match(RegI);
 5276   match(rRegI);
 5277 
 5278   format %{ "RDI" %}
 5279   interface(REG_INTER);
 5280 %}
 5281 
 5282 operand no_rax_rdx_RegI()
 5283 %{
 5284   constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
 5285   match(RegI);
 5286   match(rbx_RegI);
 5287   match(rcx_RegI);
 5288   match(rdi_RegI);
 5289 
 5290   format %{ %}
 5291   interface(REG_INTER);
 5292 %}
 5293 
 5294 operand no_rbp_r13_RegI()
 5295 %{
 5296   constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
 5297   match(RegI);
 5298   match(rRegI);
 5299   match(rax_RegI);
 5300   match(rbx_RegI);
 5301   match(rcx_RegI);
 5302   match(rdx_RegI);
 5303   match(rdi_RegI);
 5304 
 5305   format %{ %}
 5306   interface(REG_INTER);
 5307 %}
 5308 
 5309 // Pointer Register
 5310 operand any_RegP()
 5311 %{
 5312   constraint(ALLOC_IN_RC(any_reg));
 5313   match(RegP);
 5314   match(rax_RegP);
 5315   match(rbx_RegP);
 5316   match(rdi_RegP);
 5317   match(rsi_RegP);
 5318   match(rbp_RegP);
 5319   match(r15_RegP);
 5320   match(rRegP);
 5321 
 5322   format %{ %}
 5323   interface(REG_INTER);
 5324 %}
 5325 
 5326 operand rRegP()
 5327 %{
 5328   constraint(ALLOC_IN_RC(ptr_reg));
 5329   match(RegP);
 5330   match(rax_RegP);
 5331   match(rbx_RegP);
 5332   match(rdi_RegP);
 5333   match(rsi_RegP);
 5334   match(rbp_RegP);  // See Q&A below about
 5335   match(r15_RegP);  // r15_RegP and rbp_RegP.
 5336 
 5337   format %{ %}
 5338   interface(REG_INTER);
 5339 %}
 5340 
 5341 operand rRegN() %{
 5342   constraint(ALLOC_IN_RC(int_reg));
 5343   match(RegN);
 5344 
 5345   format %{ %}
 5346   interface(REG_INTER);
 5347 %}
 5348 
 5349 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 5350 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
 5351 // It's fine for an instruction input that expects rRegP to match a r15_RegP.
 5352 // The output of an instruction is controlled by the allocator, which respects
 5353 // register class masks, not match rules.  Unless an instruction mentions
 5354 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 5355 // by the allocator as an input.
 5356 // The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
 5357 // the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
 5358 // result, RBP is not included in the output of the instruction either.
 5359 
 5360 // This operand is not allowed to use RBP even if
 5361 // RBP is not used to hold the frame pointer.
 5362 operand no_rbp_RegP()
 5363 %{
 5364   constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
 5365   match(RegP);
 5366   match(rbx_RegP);
 5367   match(rsi_RegP);
 5368   match(rdi_RegP);
 5369 
 5370   format %{ %}
 5371   interface(REG_INTER);
 5372 %}
 5373 
 5374 // Special Registers
 5375 // Return a pointer value
 5376 operand rax_RegP()
 5377 %{
 5378   constraint(ALLOC_IN_RC(ptr_rax_reg));
 5379   match(RegP);
 5380   match(rRegP);
 5381 
 5382   format %{ %}
 5383   interface(REG_INTER);
 5384 %}
 5385 
 5386 // Special Registers
 5387 // Return a compressed pointer value
 5388 operand rax_RegN()
 5389 %{
 5390   constraint(ALLOC_IN_RC(int_rax_reg));
 5391   match(RegN);
 5392   match(rRegN);
 5393 
 5394   format %{ %}
 5395   interface(REG_INTER);
 5396 %}
 5397 
 5398 // Used in AtomicAdd
 5399 operand rbx_RegP()
 5400 %{
 5401   constraint(ALLOC_IN_RC(ptr_rbx_reg));
 5402   match(RegP);
 5403   match(rRegP);
 5404 
 5405   format %{ %}
 5406   interface(REG_INTER);
 5407 %}
 5408 
 5409 operand rsi_RegP()
 5410 %{
 5411   constraint(ALLOC_IN_RC(ptr_rsi_reg));
 5412   match(RegP);
 5413   match(rRegP);
 5414 
 5415   format %{ %}
 5416   interface(REG_INTER);
 5417 %}
 5418 
 5419 operand rbp_RegP()
 5420 %{
 5421   constraint(ALLOC_IN_RC(ptr_rbp_reg));
 5422   match(RegP);
 5423   match(rRegP);
 5424 
 5425   format %{ %}
 5426   interface(REG_INTER);
 5427 %}
 5428 
 5429 // Used in rep stosq
 5430 operand rdi_RegP()
 5431 %{
 5432   constraint(ALLOC_IN_RC(ptr_rdi_reg));
 5433   match(RegP);
 5434   match(rRegP);
 5435 
 5436   format %{ %}
 5437   interface(REG_INTER);
 5438 %}
 5439 
 5440 operand r15_RegP()
 5441 %{
 5442   constraint(ALLOC_IN_RC(ptr_r15_reg));
 5443   match(RegP);
 5444   match(rRegP);
 5445 
 5446   format %{ %}
 5447   interface(REG_INTER);
 5448 %}
 5449 
 5450 operand rRegL()
 5451 %{
 5452   constraint(ALLOC_IN_RC(long_reg));
 5453   match(RegL);
 5454   match(rax_RegL);
 5455   match(rdx_RegL);
 5456 
 5457   format %{ %}
 5458   interface(REG_INTER);
 5459 %}
 5460 
 5461 // Special Registers
 5462 operand no_rax_rdx_RegL()
 5463 %{
 5464   constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
 5465   match(RegL);
 5466   match(rRegL);
 5467 
 5468   format %{ %}
 5469   interface(REG_INTER);
 5470 %}
 5471 
 5472 operand rax_RegL()
 5473 %{
 5474   constraint(ALLOC_IN_RC(long_rax_reg));
 5475   match(RegL);
 5476   match(rRegL);
 5477 
 5478   format %{ "RAX" %}
 5479   interface(REG_INTER);
 5480 %}
 5481 
 5482 operand rcx_RegL()
 5483 %{
 5484   constraint(ALLOC_IN_RC(long_rcx_reg));
 5485   match(RegL);
 5486   match(rRegL);
 5487 
 5488   format %{ %}
 5489   interface(REG_INTER);
 5490 %}
 5491 
 5492 operand rdx_RegL()
 5493 %{
 5494   constraint(ALLOC_IN_RC(long_rdx_reg));
 5495   match(RegL);
 5496   match(rRegL);
 5497 
 5498   format %{ %}
 5499   interface(REG_INTER);
 5500 %}
 5501 
 5502 operand r11_RegL()
 5503 %{
 5504   constraint(ALLOC_IN_RC(long_r11_reg));
 5505   match(RegL);
 5506   match(rRegL);
 5507 
 5508   format %{ %}
 5509   interface(REG_INTER);
 5510 %}
 5511 
 5512 operand no_rbp_r13_RegL()
 5513 %{
 5514   constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
 5515   match(RegL);
 5516   match(rRegL);
 5517   match(rax_RegL);
 5518   match(rcx_RegL);
 5519   match(rdx_RegL);
 5520 
 5521   format %{ %}
 5522   interface(REG_INTER);
 5523 %}
 5524 
 5525 // Flags register, used as output of compare instructions
 5526 operand rFlagsReg()
 5527 %{
 5528   constraint(ALLOC_IN_RC(int_flags));
 5529   match(RegFlags);
 5530 
 5531   format %{ "RFLAGS" %}
 5532   interface(REG_INTER);
 5533 %}
 5534 
 5535 // Flags register, used as output of FLOATING POINT compare instructions
 5536 operand rFlagsRegU()
 5537 %{
 5538   constraint(ALLOC_IN_RC(int_flags));
 5539   match(RegFlags);
 5540 
 5541   format %{ "RFLAGS_U" %}
 5542   interface(REG_INTER);
 5543 %}
 5544 
 5545 operand rFlagsRegUCF() %{
 5546   constraint(ALLOC_IN_RC(int_flags));
 5547   match(RegFlags);
 5548   predicate(!UseAPX || !VM_Version::supports_avx10_2());
 5549 
 5550   format %{ "RFLAGS_U_CF" %}
 5551   interface(REG_INTER);
 5552 %}
 5553 
 5554 operand rFlagsRegUCFE() %{
 5555   constraint(ALLOC_IN_RC(int_flags));
 5556   match(RegFlags);
 5557   predicate(UseAPX && VM_Version::supports_avx10_2());
 5558 
 5559   format %{ "RFLAGS_U_CFE" %}
 5560   interface(REG_INTER);
 5561 %}
 5562 
 5563 // Float register operands
 5564 operand regF() %{
 5565    constraint(ALLOC_IN_RC(float_reg));
 5566    match(RegF);
 5567 
 5568    format %{ %}
 5569    interface(REG_INTER);
 5570 %}
 5571 
 5572 // Float register operands
 5573 operand legRegF() %{
 5574    constraint(ALLOC_IN_RC(float_reg_legacy));
 5575    match(RegF);
 5576 
 5577    format %{ %}
 5578    interface(REG_INTER);
 5579 %}
 5580 
 5581 // Float register operands
 5582 operand vlRegF() %{
 5583    constraint(ALLOC_IN_RC(float_reg_vl));
 5584    match(RegF);
 5585 
 5586    format %{ %}
 5587    interface(REG_INTER);
 5588 %}
 5589 
 5590 // Double register operands
 5591 operand regD() %{
 5592    constraint(ALLOC_IN_RC(double_reg));
 5593    match(RegD);
 5594 
 5595    format %{ %}
 5596    interface(REG_INTER);
 5597 %}
 5598 
 5599 // Double register operands
 5600 operand legRegD() %{
 5601    constraint(ALLOC_IN_RC(double_reg_legacy));
 5602    match(RegD);
 5603 
 5604    format %{ %}
 5605    interface(REG_INTER);
 5606 %}
 5607 
 5608 // Double register operands
 5609 operand vlRegD() %{
 5610    constraint(ALLOC_IN_RC(double_reg_vl));
 5611    match(RegD);
 5612 
 5613    format %{ %}
 5614    interface(REG_INTER);
 5615 %}
 5616 
 5617 //----------Memory Operands----------------------------------------------------
 5618 // Direct Memory Operand
 5619 // operand direct(immP addr)
 5620 // %{
 5621 //   match(addr);
 5622 
 5623 //   format %{ "[$addr]" %}
 5624 //   interface(MEMORY_INTER) %{
 5625 //     base(0xFFFFFFFF);
 5626 //     index(0x4);
 5627 //     scale(0x0);
 5628 //     disp($addr);
 5629 //   %}
 5630 // %}
 5631 
 5632 // Indirect Memory Operand
 5633 operand indirect(any_RegP reg)
 5634 %{
 5635   constraint(ALLOC_IN_RC(ptr_reg));
 5636   match(reg);
 5637 
 5638   format %{ "[$reg]" %}
 5639   interface(MEMORY_INTER) %{
 5640     base($reg);
 5641     index(0x4);
 5642     scale(0x0);
 5643     disp(0x0);
 5644   %}
 5645 %}
 5646 
 5647 // Indirect Memory Plus Short Offset Operand
 5648 operand indOffset8(any_RegP reg, immL8 off)
 5649 %{
 5650   constraint(ALLOC_IN_RC(ptr_reg));
 5651   match(AddP reg off);
 5652 
 5653   format %{ "[$reg + $off (8-bit)]" %}
 5654   interface(MEMORY_INTER) %{
 5655     base($reg);
 5656     index(0x4);
 5657     scale(0x0);
 5658     disp($off);
 5659   %}
 5660 %}
 5661 
 5662 // Indirect Memory Plus Long Offset Operand
 5663 operand indOffset32(any_RegP reg, immL32 off)
 5664 %{
 5665   constraint(ALLOC_IN_RC(ptr_reg));
 5666   match(AddP reg off);
 5667 
 5668   format %{ "[$reg + $off (32-bit)]" %}
 5669   interface(MEMORY_INTER) %{
 5670     base($reg);
 5671     index(0x4);
 5672     scale(0x0);
 5673     disp($off);
 5674   %}
 5675 %}
 5676 
 5677 // Indirect Memory Plus Index Register Plus Offset Operand
 5678 operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
 5679 %{
 5680   constraint(ALLOC_IN_RC(ptr_reg));
 5681   match(AddP (AddP reg lreg) off);
 5682 
 5683   op_cost(10);
 5684   format %{"[$reg + $off + $lreg]" %}
 5685   interface(MEMORY_INTER) %{
 5686     base($reg);
 5687     index($lreg);
 5688     scale(0x0);
 5689     disp($off);
 5690   %}
 5691 %}
 5692 
 5693 // Indirect Memory Plus Index Register Plus Offset Operand
 5694 operand indIndex(any_RegP reg, rRegL lreg)
 5695 %{
 5696   constraint(ALLOC_IN_RC(ptr_reg));
 5697   match(AddP reg lreg);
 5698 
 5699   op_cost(10);
 5700   format %{"[$reg + $lreg]" %}
 5701   interface(MEMORY_INTER) %{
 5702     base($reg);
 5703     index($lreg);
 5704     scale(0x0);
 5705     disp(0x0);
 5706   %}
 5707 %}
 5708 
 5709 // Indirect Memory Times Scale Plus Index Register
 5710 operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
 5711 %{
 5712   constraint(ALLOC_IN_RC(ptr_reg));
 5713   match(AddP reg (LShiftL lreg scale));
 5714 
 5715   op_cost(10);
 5716   format %{"[$reg + $lreg << $scale]" %}
 5717   interface(MEMORY_INTER) %{
 5718     base($reg);
 5719     index($lreg);
 5720     scale($scale);
 5721     disp(0x0);
 5722   %}
 5723 %}
 5724 
 5725 operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
 5726 %{
 5727   constraint(ALLOC_IN_RC(ptr_reg));
 5728   predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5729   match(AddP reg (LShiftL (ConvI2L idx) scale));
 5730 
 5731   op_cost(10);
 5732   format %{"[$reg + pos $idx << $scale]" %}
 5733   interface(MEMORY_INTER) %{
 5734     base($reg);
 5735     index($idx);
 5736     scale($scale);
 5737     disp(0x0);
 5738   %}
 5739 %}
 5740 
 5741 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5742 operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
 5743 %{
 5744   constraint(ALLOC_IN_RC(ptr_reg));
 5745   match(AddP (AddP reg (LShiftL lreg scale)) off);
 5746 
 5747   op_cost(10);
 5748   format %{"[$reg + $off + $lreg << $scale]" %}
 5749   interface(MEMORY_INTER) %{
 5750     base($reg);
 5751     index($lreg);
 5752     scale($scale);
 5753     disp($off);
 5754   %}
 5755 %}
 5756 
 5757 // Indirect Memory Plus Positive Index Register Plus Offset Operand
 5758 operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
 5759 %{
 5760   constraint(ALLOC_IN_RC(ptr_reg));
 5761   predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5762   match(AddP (AddP reg (ConvI2L idx)) off);
 5763 
 5764   op_cost(10);
 5765   format %{"[$reg + $off + $idx]" %}
 5766   interface(MEMORY_INTER) %{
 5767     base($reg);
 5768     index($idx);
 5769     scale(0x0);
 5770     disp($off);
 5771   %}
 5772 %}
 5773 
 5774 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5775 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 5776 %{
 5777   constraint(ALLOC_IN_RC(ptr_reg));
 5778   predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5779   match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
 5780 
 5781   op_cost(10);
 5782   format %{"[$reg + $off + $idx << $scale]" %}
 5783   interface(MEMORY_INTER) %{
 5784     base($reg);
 5785     index($idx);
 5786     scale($scale);
 5787     disp($off);
 5788   %}
 5789 %}
 5790 
 5791 // Indirect Narrow Oop Plus Offset Operand
 5792 // Note: x86 architecture doesn't support "scale * index + offset" without a base
 5793 // we can't free r12 even with CompressedOops::base() == nullptr.
 5794 operand indCompressedOopOffset(rRegN reg, immL32 off) %{
 5795   predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
 5796   constraint(ALLOC_IN_RC(ptr_reg));
 5797   match(AddP (DecodeN reg) off);
 5798 
 5799   op_cost(10);
 5800   format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
 5801   interface(MEMORY_INTER) %{
 5802     base(0xc); // R12
 5803     index($reg);
 5804     scale(0x3);
 5805     disp($off);
 5806   %}
 5807 %}
 5808 
 5809 // Indirect Memory Operand
 5810 operand indirectNarrow(rRegN reg)
 5811 %{
 5812   predicate(CompressedOops::shift() == 0);
 5813   constraint(ALLOC_IN_RC(ptr_reg));
 5814   match(DecodeN reg);
 5815 
 5816   format %{ "[$reg]" %}
 5817   interface(MEMORY_INTER) %{
 5818     base($reg);
 5819     index(0x4);
 5820     scale(0x0);
 5821     disp(0x0);
 5822   %}
 5823 %}
 5824 
 5825 // Indirect Memory Plus Short Offset Operand
 5826 operand indOffset8Narrow(rRegN reg, immL8 off)
 5827 %{
 5828   predicate(CompressedOops::shift() == 0);
 5829   constraint(ALLOC_IN_RC(ptr_reg));
 5830   match(AddP (DecodeN reg) off);
 5831 
 5832   format %{ "[$reg + $off (8-bit)]" %}
 5833   interface(MEMORY_INTER) %{
 5834     base($reg);
 5835     index(0x4);
 5836     scale(0x0);
 5837     disp($off);
 5838   %}
 5839 %}
 5840 
 5841 // Indirect Memory Plus Long Offset Operand
 5842 operand indOffset32Narrow(rRegN reg, immL32 off)
 5843 %{
 5844   predicate(CompressedOops::shift() == 0);
 5845   constraint(ALLOC_IN_RC(ptr_reg));
 5846   match(AddP (DecodeN reg) off);
 5847 
 5848   format %{ "[$reg + $off (32-bit)]" %}
 5849   interface(MEMORY_INTER) %{
 5850     base($reg);
 5851     index(0x4);
 5852     scale(0x0);
 5853     disp($off);
 5854   %}
 5855 %}
 5856 
 5857 // Indirect Memory Plus Index Register Plus Offset Operand
 5858 operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
 5859 %{
 5860   predicate(CompressedOops::shift() == 0);
 5861   constraint(ALLOC_IN_RC(ptr_reg));
 5862   match(AddP (AddP (DecodeN reg) lreg) off);
 5863 
 5864   op_cost(10);
 5865   format %{"[$reg + $off + $lreg]" %}
 5866   interface(MEMORY_INTER) %{
 5867     base($reg);
 5868     index($lreg);
 5869     scale(0x0);
 5870     disp($off);
 5871   %}
 5872 %}
 5873 
 5874 // Indirect Memory Plus Index Register Plus Offset Operand
 5875 operand indIndexNarrow(rRegN reg, rRegL lreg)
 5876 %{
 5877   predicate(CompressedOops::shift() == 0);
 5878   constraint(ALLOC_IN_RC(ptr_reg));
 5879   match(AddP (DecodeN reg) lreg);
 5880 
 5881   op_cost(10);
 5882   format %{"[$reg + $lreg]" %}
 5883   interface(MEMORY_INTER) %{
 5884     base($reg);
 5885     index($lreg);
 5886     scale(0x0);
 5887     disp(0x0);
 5888   %}
 5889 %}
 5890 
 5891 // Indirect Memory Times Scale Plus Index Register
 5892 operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
 5893 %{
 5894   predicate(CompressedOops::shift() == 0);
 5895   constraint(ALLOC_IN_RC(ptr_reg));
 5896   match(AddP (DecodeN reg) (LShiftL lreg scale));
 5897 
 5898   op_cost(10);
 5899   format %{"[$reg + $lreg << $scale]" %}
 5900   interface(MEMORY_INTER) %{
 5901     base($reg);
 5902     index($lreg);
 5903     scale($scale);
 5904     disp(0x0);
 5905   %}
 5906 %}
 5907 
 5908 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 5909 operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
 5910 %{
 5911   predicate(CompressedOops::shift() == 0);
 5912   constraint(ALLOC_IN_RC(ptr_reg));
 5913   match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
 5914 
 5915   op_cost(10);
 5916   format %{"[$reg + $off + $lreg << $scale]" %}
 5917   interface(MEMORY_INTER) %{
 5918     base($reg);
 5919     index($lreg);
 5920     scale($scale);
 5921     disp($off);
 5922   %}
 5923 %}
 5924 
 5925 // Indirect Memory Times Plus Positive Index Register Plus Offset Operand
 5926 operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
 5927 %{
 5928   constraint(ALLOC_IN_RC(ptr_reg));
 5929   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
 5930   match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
 5931 
 5932   op_cost(10);
 5933   format %{"[$reg + $off + $idx]" %}
 5934   interface(MEMORY_INTER) %{
 5935     base($reg);
 5936     index($idx);
 5937     scale(0x0);
 5938     disp($off);
 5939   %}
 5940 %}
 5941 
 5942 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 5943 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 5944 %{
 5945   constraint(ALLOC_IN_RC(ptr_reg));
 5946   predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
 5947   match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
 5948 
 5949   op_cost(10);
 5950   format %{"[$reg + $off + $idx << $scale]" %}
 5951   interface(MEMORY_INTER) %{
 5952     base($reg);
 5953     index($idx);
 5954     scale($scale);
 5955     disp($off);
 5956   %}
 5957 %}
 5958 
 5959 //----------Special Memory Operands--------------------------------------------
 5960 // Stack Slot Operand - This operand is used for loading and storing temporary
 5961 //                      values on the stack where a match requires a value to
 5962 //                      flow through memory.
 5963 operand stackSlotP(sRegP reg)
 5964 %{
 5965   constraint(ALLOC_IN_RC(stack_slots));
 5966   // No match rule because this operand is only generated in matching
 5967 
 5968   format %{ "[$reg]" %}
 5969   interface(MEMORY_INTER) %{
 5970     base(0x4);   // RSP
 5971     index(0x4);  // No Index
 5972     scale(0x0);  // No Scale
 5973     disp($reg);  // Stack Offset
 5974   %}
 5975 %}
 5976 
 5977 operand stackSlotI(sRegI reg)
 5978 %{
 5979   constraint(ALLOC_IN_RC(stack_slots));
 5980   // No match rule because this operand is only generated in matching
 5981 
 5982   format %{ "[$reg]" %}
 5983   interface(MEMORY_INTER) %{
 5984     base(0x4);   // RSP
 5985     index(0x4);  // No Index
 5986     scale(0x0);  // No Scale
 5987     disp($reg);  // Stack Offset
 5988   %}
 5989 %}
 5990 
 5991 operand stackSlotF(sRegF reg)
 5992 %{
 5993   constraint(ALLOC_IN_RC(stack_slots));
 5994   // No match rule because this operand is only generated in matching
 5995 
 5996   format %{ "[$reg]" %}
 5997   interface(MEMORY_INTER) %{
 5998     base(0x4);   // RSP
 5999     index(0x4);  // No Index
 6000     scale(0x0);  // No Scale
 6001     disp($reg);  // Stack Offset
 6002   %}
 6003 %}
 6004 
 6005 operand stackSlotD(sRegD reg)
 6006 %{
 6007   constraint(ALLOC_IN_RC(stack_slots));
 6008   // No match rule because this operand is only generated in matching
 6009 
 6010   format %{ "[$reg]" %}
 6011   interface(MEMORY_INTER) %{
 6012     base(0x4);   // RSP
 6013     index(0x4);  // No Index
 6014     scale(0x0);  // No Scale
 6015     disp($reg);  // Stack Offset
 6016   %}
 6017 %}
 6018 operand stackSlotL(sRegL reg)
 6019 %{
 6020   constraint(ALLOC_IN_RC(stack_slots));
 6021   // No match rule because this operand is only generated in matching
 6022 
 6023   format %{ "[$reg]" %}
 6024   interface(MEMORY_INTER) %{
 6025     base(0x4);   // RSP
 6026     index(0x4);  // No Index
 6027     scale(0x0);  // No Scale
 6028     disp($reg);  // Stack Offset
 6029   %}
 6030 %}
 6031 
 6032 //----------Conditional Branch Operands----------------------------------------
 6033 // Comparison Op  - This is the operation of the comparison, and is limited to
 6034 //                  the following set of codes:
 6035 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 6036 //
 6037 // Other attributes of the comparison, such as unsignedness, are specified
 6038 // by the comparison instruction that sets a condition code flags register.
 6039 // That result is represented by a flags operand whose subtype is appropriate
 6040 // to the unsignedness (etc.) of the comparison.
 6041 //
 6042 // Later, the instruction which matches both the Comparison Op (a Bool) and
 6043 // the flags (produced by the Cmp) specifies the coding of the comparison op
 6044 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 6045 
 6046 // Comparison Code
 6047 operand cmpOp()
 6048 %{
 6049   match(Bool);
 6050 
 6051   format %{ "" %}
 6052   interface(COND_INTER) %{
 6053     equal(0x4, "e");
 6054     not_equal(0x5, "ne");
 6055     less(0xc, "l");
 6056     greater_equal(0xd, "ge");
 6057     less_equal(0xe, "le");
 6058     greater(0xf, "g");
 6059     overflow(0x0, "o");
 6060     no_overflow(0x1, "no");
 6061   %}
 6062 %}
 6063 
 6064 // Comparison Code, unsigned compare.  Used by FP also, with
 6065 // C2 (unordered) turned into GT or LT already.  The other bits
 6066 // C0 and C3 are turned into Carry & Zero flags.
 6067 operand cmpOpU()
 6068 %{
 6069   match(Bool);
 6070 
 6071   format %{ "" %}
 6072   interface(COND_INTER) %{
 6073     equal(0x4, "e");
 6074     not_equal(0x5, "ne");
 6075     less(0x2, "b");
 6076     greater_equal(0x3, "ae");
 6077     less_equal(0x6, "be");
 6078     greater(0x7, "a");
 6079     overflow(0x0, "o");
 6080     no_overflow(0x1, "no");
 6081   %}
 6082 %}
 6083 
 6084 
 6085 // Floating comparisons that don't require any fixup for the unordered case,
 6086 // If both inputs of the comparison are the same, ZF is always set so we
 6087 // don't need to use cmpOpUCF2 for eq/ne
 6088 operand cmpOpUCF() %{
 6089   match(Bool);
 6090   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6091             (n->as_Bool()->_test._test == BoolTest::lt ||
 6092              n->as_Bool()->_test._test == BoolTest::ge ||
 6093              n->as_Bool()->_test._test == BoolTest::le ||
 6094              n->as_Bool()->_test._test == BoolTest::gt ||
 6095              n->in(1)->in(1) == n->in(1)->in(2)));
 6096   format %{ "" %}
 6097   interface(COND_INTER) %{
 6098     equal(0xb, "np");
 6099     not_equal(0xa, "p");
 6100     less(0x2, "b");
 6101     greater_equal(0x3, "ae");
 6102     less_equal(0x6, "be");
 6103     greater(0x7, "a");
 6104     overflow(0x0, "o");
 6105     no_overflow(0x1, "no");
 6106   %}
 6107 %}
 6108 
 6109 
 6110 // Floating comparisons that can be fixed up with extra conditional jumps
 6111 operand cmpOpUCF2() %{
 6112   match(Bool);
 6113   predicate((!UseAPX || !VM_Version::supports_avx10_2()) &&
 6114             (n->as_Bool()->_test._test == BoolTest::ne ||
 6115              n->as_Bool()->_test._test == BoolTest::eq) &&
 6116             n->in(1)->in(1) != n->in(1)->in(2));
 6117   format %{ "" %}
 6118   interface(COND_INTER) %{
 6119     equal(0x4, "e");
 6120     not_equal(0x5, "ne");
 6121     less(0x2, "b");
 6122     greater_equal(0x3, "ae");
 6123     less_equal(0x6, "be");
 6124     greater(0x7, "a");
 6125     overflow(0x0, "o");
 6126     no_overflow(0x1, "no");
 6127   %}
 6128 %}
 6129 
 6130 
 6131 // Floating point comparisons that set condition flags to test more directly,
 6132 // Unsigned tests are used for G (>) and GE (>=) conditions while signed tests
 6133 // are used for L (<) and LE (<=) conditions. It's important to convert these
 6134 // latter conditions to ones that use unsigned tests before passing into an
 6135 // instruction because the preceding comparison might be based on a three way
 6136 // comparison (CmpF3 or CmpD3) that also assigns unordered outcomes to -1.
 6137 operand cmpOpUCFE()
 6138 %{
 6139   match(Bool);
 6140   predicate((UseAPX && VM_Version::supports_avx10_2()) &&
 6141             (n->as_Bool()->_test._test == BoolTest::ne ||
 6142              n->as_Bool()->_test._test == BoolTest::eq ||
 6143              n->as_Bool()->_test._test == BoolTest::lt ||
 6144              n->as_Bool()->_test._test == BoolTest::ge ||
 6145              n->as_Bool()->_test._test == BoolTest::le ||
 6146              n->as_Bool()->_test._test == BoolTest::gt));
 6147 
 6148   format %{ "" %}
 6149   interface(COND_INTER) %{
 6150     equal(0x4, "e");
 6151     not_equal(0x5, "ne");
 6152     less(0x2, "b");
 6153     greater_equal(0x3, "ae");
 6154     less_equal(0x6, "be");
 6155     greater(0x7, "a");
 6156     overflow(0x0, "o");
 6157     no_overflow(0x1, "no");
 6158   %}
 6159 %}
 6160 
 6161 // Operands for bound floating pointer register arguments
 6162 operand rxmm0() %{
 6163   constraint(ALLOC_IN_RC(xmm0_reg));
 6164   match(VecX);
 6165   format%{%}
 6166   interface(REG_INTER);
 6167 %}
 6168 
 6169 // Vectors
 6170 
 6171 // Dummy generic vector class. Should be used for all vector operands.
 6172 // Replaced with vec[SDXYZ] during post-selection pass.
 6173 operand vec() %{
 6174   constraint(ALLOC_IN_RC(dynamic));
 6175   match(VecX);
 6176   match(VecY);
 6177   match(VecZ);
 6178   match(VecS);
 6179   match(VecD);
 6180 
 6181   format %{ %}
 6182   interface(REG_INTER);
 6183 %}
 6184 
 6185 // Dummy generic legacy vector class. Should be used for all legacy vector operands.
 6186 // Replaced with legVec[SDXYZ] during post-selection cleanup.
 6187 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
 6188 // runtime code generation via reg_class_dynamic.
 6189 operand legVec() %{
 6190   constraint(ALLOC_IN_RC(dynamic));
 6191   match(VecX);
 6192   match(VecY);
 6193   match(VecZ);
 6194   match(VecS);
 6195   match(VecD);
 6196 
 6197   format %{ %}
 6198   interface(REG_INTER);
 6199 %}
 6200 
 6201 // Replaces vec during post-selection cleanup. See above.
 6202 operand vecS() %{
 6203   constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
 6204   match(VecS);
 6205 
 6206   format %{ %}
 6207   interface(REG_INTER);
 6208 %}
 6209 
 6210 // Replaces legVec during post-selection cleanup. See above.
 6211 operand legVecS() %{
 6212   constraint(ALLOC_IN_RC(vectors_reg_legacy));
 6213   match(VecS);
 6214 
 6215   format %{ %}
 6216   interface(REG_INTER);
 6217 %}
 6218 
 6219 // Replaces vec during post-selection cleanup. See above.
 6220 operand vecD() %{
 6221   constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
 6222   match(VecD);
 6223 
 6224   format %{ %}
 6225   interface(REG_INTER);
 6226 %}
 6227 
 6228 // Replaces legVec during post-selection cleanup. See above.
 6229 operand legVecD() %{
 6230   constraint(ALLOC_IN_RC(vectord_reg_legacy));
 6231   match(VecD);
 6232 
 6233   format %{ %}
 6234   interface(REG_INTER);
 6235 %}
 6236 
 6237 // Replaces vec during post-selection cleanup. See above.
 6238 operand vecX() %{
 6239   constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
 6240   match(VecX);
 6241 
 6242   format %{ %}
 6243   interface(REG_INTER);
 6244 %}
 6245 
 6246 // Replaces legVec during post-selection cleanup. See above.
 6247 operand legVecX() %{
 6248   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
 6249   match(VecX);
 6250 
 6251   format %{ %}
 6252   interface(REG_INTER);
 6253 %}
 6254 
 6255 // Replaces vec during post-selection cleanup. See above.
 6256 operand vecY() %{
 6257   constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
 6258   match(VecY);
 6259 
 6260   format %{ %}
 6261   interface(REG_INTER);
 6262 %}
 6263 
 6264 // Replaces legVec during post-selection cleanup. See above.
 6265 operand legVecY() %{
 6266   constraint(ALLOC_IN_RC(vectory_reg_legacy));
 6267   match(VecY);
 6268 
 6269   format %{ %}
 6270   interface(REG_INTER);
 6271 %}
 6272 
 6273 // Replaces vec during post-selection cleanup. See above.
 6274 operand vecZ() %{
 6275   constraint(ALLOC_IN_RC(vectorz_reg));
 6276   match(VecZ);
 6277 
 6278   format %{ %}
 6279   interface(REG_INTER);
 6280 %}
 6281 
 6282 // Replaces legVec during post-selection cleanup. See above.
 6283 operand legVecZ() %{
 6284   constraint(ALLOC_IN_RC(vectorz_reg_legacy));
 6285   match(VecZ);
 6286 
 6287   format %{ %}
 6288   interface(REG_INTER);
 6289 %}
 6290 
 6291 //----------OPERAND CLASSES----------------------------------------------------
 6292 // Operand Classes are groups of operands that are used as to simplify
 6293 // instruction definitions by not requiring the AD writer to specify separate
 6294 // instructions for every form of operand when the instruction accepts
 6295 // multiple operand types with the same basic encoding and format.  The classic
 6296 // case of this is memory operands.
 6297 
 6298 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
 6299                indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
 6300                indCompressedOopOffset,
 6301                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
 6302                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
 6303                indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
 6304 
 6305 //----------PIPELINE-----------------------------------------------------------
 6306 // Rules which define the behavior of the target architectures pipeline.
 6307 pipeline %{
 6308 
 6309 //----------ATTRIBUTES---------------------------------------------------------
 6310 attributes %{
 6311   variable_size_instructions;        // Fixed size instructions
 6312   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 6313   instruction_unit_size = 1;         // An instruction is 1 bytes long
 6314   instruction_fetch_unit_size = 16;  // The processor fetches one line
 6315   instruction_fetch_units = 1;       // of 16 bytes
 6316 %}
 6317 
 6318 //----------RESOURCES----------------------------------------------------------
 6319 // Resources are the functional units available to the machine
 6320 
 6321 // Generic P2/P3 pipeline
 6322 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 6323 // 3 instructions decoded per cycle.
 6324 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 6325 // 3 ALU op, only ALU0 handles mul instructions.
 6326 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 6327            MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
 6328            BR, FPU,
 6329            ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);
 6330 
 6331 //----------PIPELINE DESCRIPTION-----------------------------------------------
 6332 // Pipeline Description specifies the stages in the machine's pipeline
 6333 
 6334 // Generic P2/P3 pipeline
 6335 pipe_desc(S0, S1, S2, S3, S4, S5);
 6336 
 6337 //----------PIPELINE CLASSES---------------------------------------------------
 6338 // Pipeline Classes describe the stages in which input and output are
 6339 // referenced by the hardware pipeline.
 6340 
 6341 // Naming convention: ialu or fpu
 6342 // Then: _reg
 6343 // Then: _reg if there is a 2nd register
 6344 // Then: _long if it's a pair of instructions implementing a long
 6345 // Then: _fat if it requires the big decoder
 6346 //   Or: _mem if it requires the big decoder and a memory unit.
 6347 
 6348 // Integer ALU reg operation
 6349 pipe_class ialu_reg(rRegI dst)
 6350 %{
 6351     single_instruction;
 6352     dst    : S4(write);
 6353     dst    : S3(read);
 6354     DECODE : S0;        // any decoder
 6355     ALU    : S3;        // any alu
 6356 %}
 6357 
 6358 // Long ALU reg operation
 6359 pipe_class ialu_reg_long(rRegL dst)
 6360 %{
 6361     instruction_count(2);
 6362     dst    : S4(write);
 6363     dst    : S3(read);
 6364     DECODE : S0(2);     // any 2 decoders
 6365     ALU    : S3(2);     // both alus
 6366 %}
 6367 
 6368 // Integer ALU reg operation using big decoder
 6369 pipe_class ialu_reg_fat(rRegI dst)
 6370 %{
 6371     single_instruction;
 6372     dst    : S4(write);
 6373     dst    : S3(read);
 6374     D0     : S0;        // big decoder only
 6375     ALU    : S3;        // any alu
 6376 %}
 6377 
 6378 // Integer ALU reg-reg operation
 6379 pipe_class ialu_reg_reg(rRegI dst, rRegI src)
 6380 %{
 6381     single_instruction;
 6382     dst    : S4(write);
 6383     src    : S3(read);
 6384     DECODE : S0;        // any decoder
 6385     ALU    : S3;        // any alu
 6386 %}
 6387 
 6388 // Integer ALU reg-reg operation
 6389 pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
 6390 %{
 6391     single_instruction;
 6392     dst    : S4(write);
 6393     src    : S3(read);
 6394     D0     : S0;        // big decoder only
 6395     ALU    : S3;        // any alu
 6396 %}
 6397 
 6398 // Integer ALU reg-mem operation
 6399 pipe_class ialu_reg_mem(rRegI dst, memory mem)
 6400 %{
 6401     single_instruction;
 6402     dst    : S5(write);
 6403     mem    : S3(read);
 6404     D0     : S0;        // big decoder only
 6405     ALU    : S4;        // any alu
 6406     MEM    : S3;        // any mem
 6407 %}
 6408 
 6409 // Integer mem operation (prefetch)
 6410 pipe_class ialu_mem(memory mem)
 6411 %{
 6412     single_instruction;
 6413     mem    : S3(read);
 6414     D0     : S0;        // big decoder only
 6415     MEM    : S3;        // any mem
 6416 %}
 6417 
 6418 // Integer Store to Memory
 6419 pipe_class ialu_mem_reg(memory mem, rRegI src)
 6420 %{
 6421     single_instruction;
 6422     mem    : S3(read);
 6423     src    : S5(read);
 6424     D0     : S0;        // big decoder only
 6425     ALU    : S4;        // any alu
 6426     MEM    : S3;
 6427 %}
 6428 
 6429 // // Long Store to Memory
 6430 // pipe_class ialu_mem_long_reg(memory mem, rRegL src)
 6431 // %{
 6432 //     instruction_count(2);
 6433 //     mem    : S3(read);
 6434 //     src    : S5(read);
 6435 //     D0     : S0(2);          // big decoder only; twice
 6436 //     ALU    : S4(2);     // any 2 alus
 6437 //     MEM    : S3(2);  // Both mems
 6438 // %}
 6439 
 6440 // Integer Store to Memory
 6441 pipe_class ialu_mem_imm(memory mem)
 6442 %{
 6443     single_instruction;
 6444     mem    : S3(read);
 6445     D0     : S0;        // big decoder only
 6446     ALU    : S4;        // any alu
 6447     MEM    : S3;
 6448 %}
 6449 
 6450 // Integer ALU0 reg-reg operation
 6451 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
 6452 %{
 6453     single_instruction;
 6454     dst    : S4(write);
 6455     src    : S3(read);
 6456     D0     : S0;        // Big decoder only
 6457     ALU0   : S3;        // only alu0
 6458 %}
 6459 
 6460 // Integer ALU0 reg-mem operation
 6461 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
 6462 %{
 6463     single_instruction;
 6464     dst    : S5(write);
 6465     mem    : S3(read);
 6466     D0     : S0;        // big decoder only
 6467     ALU0   : S4;        // ALU0 only
 6468     MEM    : S3;        // any mem
 6469 %}
 6470 
 6471 // Integer ALU reg-reg operation
 6472 pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
 6473 %{
 6474     single_instruction;
 6475     cr     : S4(write);
 6476     src1   : S3(read);
 6477     src2   : S3(read);
 6478     DECODE : S0;        // any decoder
 6479     ALU    : S3;        // any alu
 6480 %}
 6481 
 6482 // Integer ALU reg-imm operation
 6483 pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
 6484 %{
 6485     single_instruction;
 6486     cr     : S4(write);
 6487     src1   : S3(read);
 6488     DECODE : S0;        // any decoder
 6489     ALU    : S3;        // any alu
 6490 %}
 6491 
 6492 // Integer ALU reg-mem operation
 6493 pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
 6494 %{
 6495     single_instruction;
 6496     cr     : S4(write);
 6497     src1   : S3(read);
 6498     src2   : S3(read);
 6499     D0     : S0;        // big decoder only
 6500     ALU    : S4;        // any alu
 6501     MEM    : S3;
 6502 %}
 6503 
 6504 // Conditional move reg-reg
 6505 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
 6506 %{
 6507     instruction_count(4);
 6508     y      : S4(read);
 6509     q      : S3(read);
 6510     p      : S3(read);
 6511     DECODE : S0(4);     // any decoder
 6512 %}
 6513 
 6514 // Conditional move reg-reg
 6515 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
 6516 %{
 6517     single_instruction;
 6518     dst    : S4(write);
 6519     src    : S3(read);
 6520     cr     : S3(read);
 6521     DECODE : S0;        // any decoder
 6522 %}
 6523 
 6524 // Conditional move reg-mem
 6525 pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
 6526 %{
 6527     single_instruction;
 6528     dst    : S4(write);
 6529     src    : S3(read);
 6530     cr     : S3(read);
 6531     DECODE : S0;        // any decoder
 6532     MEM    : S3;
 6533 %}
 6534 
 6535 // Conditional move reg-reg long
 6536 pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
 6537 %{
 6538     single_instruction;
 6539     dst    : S4(write);
 6540     src    : S3(read);
 6541     cr     : S3(read);
 6542     DECODE : S0(2);     // any 2 decoders
 6543 %}
 6544 
 6545 // Float reg-reg operation
 6546 pipe_class fpu_reg(regD dst)
 6547 %{
 6548     instruction_count(2);
 6549     dst    : S3(read);
 6550     DECODE : S0(2);     // any 2 decoders
 6551     FPU    : S3;
 6552 %}
 6553 
 6554 // Float reg-reg operation
 6555 pipe_class fpu_reg_reg(regD dst, regD src)
 6556 %{
 6557     instruction_count(2);
 6558     dst    : S4(write);
 6559     src    : S3(read);
 6560     DECODE : S0(2);     // any 2 decoders
 6561     FPU    : S3;
 6562 %}
 6563 
 6564 // Float reg-reg operation
 6565 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
 6566 %{
 6567     instruction_count(3);
 6568     dst    : S4(write);
 6569     src1   : S3(read);
 6570     src2   : S3(read);
 6571     DECODE : S0(3);     // any 3 decoders
 6572     FPU    : S3(2);
 6573 %}
 6574 
 6575 // Float reg-reg operation
 6576 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
 6577 %{
 6578     instruction_count(4);
 6579     dst    : S4(write);
 6580     src1   : S3(read);
 6581     src2   : S3(read);
 6582     src3   : S3(read);
 6583     DECODE : S0(4);     // any 3 decoders
 6584     FPU    : S3(2);
 6585 %}
 6586 
 6587 // Float reg-reg operation
 6588 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
 6589 %{
 6590     instruction_count(4);
 6591     dst    : S4(write);
 6592     src1   : S3(read);
 6593     src2   : S3(read);
 6594     src3   : S3(read);
 6595     DECODE : S1(3);     // any 3 decoders
 6596     D0     : S0;        // Big decoder only
 6597     FPU    : S3(2);
 6598     MEM    : S3;
 6599 %}
 6600 
 6601 // Float reg-mem operation
 6602 pipe_class fpu_reg_mem(regD dst, memory mem)
 6603 %{
 6604     instruction_count(2);
 6605     dst    : S5(write);
 6606     mem    : S3(read);
 6607     D0     : S0;        // big decoder only
 6608     DECODE : S1;        // any decoder for FPU POP
 6609     FPU    : S4;
 6610     MEM    : S3;        // any mem
 6611 %}
 6612 
 6613 // Float reg-mem operation
 6614 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
 6615 %{
 6616     instruction_count(3);
 6617     dst    : S5(write);
 6618     src1   : S3(read);
 6619     mem    : S3(read);
 6620     D0     : S0;        // big decoder only
 6621     DECODE : S1(2);     // any decoder for FPU POP
 6622     FPU    : S4;
 6623     MEM    : S3;        // any mem
 6624 %}
 6625 
 6626 // Float mem-reg operation
 6627 pipe_class fpu_mem_reg(memory mem, regD src)
 6628 %{
 6629     instruction_count(2);
 6630     src    : S5(read);
 6631     mem    : S3(read);
 6632     DECODE : S0;        // any decoder for FPU PUSH
 6633     D0     : S1;        // big decoder only
 6634     FPU    : S4;
 6635     MEM    : S3;        // any mem
 6636 %}
 6637 
 6638 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
 6639 %{
 6640     instruction_count(3);
 6641     src1   : S3(read);
 6642     src2   : S3(read);
 6643     mem    : S3(read);
 6644     DECODE : S0(2);     // any decoder for FPU PUSH
 6645     D0     : S1;        // big decoder only
 6646     FPU    : S4;
 6647     MEM    : S3;        // any mem
 6648 %}
 6649 
 6650 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
 6651 %{
 6652     instruction_count(3);
 6653     src1   : S3(read);
 6654     src2   : S3(read);
 6655     mem    : S4(read);
 6656     DECODE : S0;        // any decoder for FPU PUSH
 6657     D0     : S0(2);     // big decoder only
 6658     FPU    : S4;
 6659     MEM    : S3(2);     // any mem
 6660 %}
 6661 
 6662 pipe_class fpu_mem_mem(memory dst, memory src1)
 6663 %{
 6664     instruction_count(2);
 6665     src1   : S3(read);
 6666     dst    : S4(read);
 6667     D0     : S0(2);     // big decoder only
 6668     MEM    : S3(2);     // any mem
 6669 %}
 6670 
 6671 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
 6672 %{
 6673     instruction_count(3);
 6674     src1   : S3(read);
 6675     src2   : S3(read);
 6676     dst    : S4(read);
 6677     D0     : S0(3);     // big decoder only
 6678     FPU    : S4;
 6679     MEM    : S3(3);     // any mem
 6680 %}
 6681 
 6682 pipe_class fpu_mem_reg_con(memory mem, regD src1)
 6683 %{
 6684     instruction_count(3);
 6685     src1   : S4(read);
 6686     mem    : S4(read);
 6687     DECODE : S0;        // any decoder for FPU PUSH
 6688     D0     : S0(2);     // big decoder only
 6689     FPU    : S4;
 6690     MEM    : S3(2);     // any mem
 6691 %}
 6692 
 6693 // Float load constant
 6694 pipe_class fpu_reg_con(regD dst)
 6695 %{
 6696     instruction_count(2);
 6697     dst    : S5(write);
 6698     D0     : S0;        // big decoder only for the load
 6699     DECODE : S1;        // any decoder for FPU POP
 6700     FPU    : S4;
 6701     MEM    : S3;        // any mem
 6702 %}
 6703 
 6704 // Float load constant
 6705 pipe_class fpu_reg_reg_con(regD dst, regD src)
 6706 %{
 6707     instruction_count(3);
 6708     dst    : S5(write);
 6709     src    : S3(read);
 6710     D0     : S0;        // big decoder only for the load
 6711     DECODE : S1(2);     // any decoder for FPU POP
 6712     FPU    : S4;
 6713     MEM    : S3;        // any mem
 6714 %}
 6715 
 6716 // UnConditional branch
 6717 pipe_class pipe_jmp(label labl)
 6718 %{
 6719     single_instruction;
 6720     BR   : S3;
 6721 %}
 6722 
 6723 // Conditional branch
 6724 pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
 6725 %{
 6726     single_instruction;
 6727     cr    : S1(read);
 6728     BR    : S3;
 6729 %}
 6730 
 6731 // Allocation idiom
 6732 pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
 6733 %{
 6734     instruction_count(1); force_serialization;
 6735     fixed_latency(6);
 6736     heap_ptr : S3(read);
 6737     DECODE   : S0(3);
 6738     D0       : S2;
 6739     MEM      : S3;
 6740     ALU      : S3(2);
 6741     dst      : S5(write);
 6742     BR       : S5;
 6743 %}
 6744 
 6745 // Generic big/slow expanded idiom
 6746 pipe_class pipe_slow()
 6747 %{
 6748     instruction_count(10); multiple_bundles; force_serialization;
 6749     fixed_latency(100);
 6750     D0  : S0(2);
 6751     MEM : S3(2);
 6752 %}
 6753 
 6754 // The real do-nothing guy
 6755 pipe_class empty()
 6756 %{
 6757     instruction_count(0);
 6758 %}
 6759 
 6760 // Define the class for the Nop node
 6761 define
 6762 %{
 6763    MachNop = empty;
 6764 %}
 6765 
 6766 %}
 6767 
 6768 //----------INSTRUCTIONS-------------------------------------------------------
 6769 //
 6770 // match      -- States which machine-independent subtree may be replaced
 6771 //               by this instruction.
 6772 // ins_cost   -- The estimated cost of this instruction is used by instruction
 6773 //               selection to identify a minimum cost tree of machine
 6774 //               instructions that matches a tree of machine-independent
 6775 //               instructions.
 6776 // format     -- A string providing the disassembly for this instruction.
 6777 //               The value of an instruction's operand may be inserted
 6778 //               by referring to it with a '$' prefix.
 6779 // opcode     -- Three instruction opcodes may be provided.  These are referred
 6780 //               to within an encode class as $primary, $secondary, and $tertiary
 6781 //               rrspectively.  The primary opcode is commonly used to
 6782 //               indicate the type of machine instruction, while secondary
 6783 //               and tertiary are often used for prefix options or addressing
 6784 //               modes.
 6785 // ins_encode -- A list of encode classes with parameters. The encode class
 6786 //               name must have been defined in an 'enc_class' specification
 6787 //               in the encode section of the architecture description.
 6788 
 6789 // ============================================================================
 6790 
 6791 instruct ShouldNotReachHere() %{
 6792   match(Halt);
 6793   format %{ "stop\t# ShouldNotReachHere" %}
 6794   ins_encode %{
 6795     if (is_reachable()) {
 6796       const char* str = __ code_string(_halt_reason);
 6797       __ stop(str);
 6798     }
 6799   %}
 6800   ins_pipe(pipe_slow);
 6801 %}
 6802 
 6803 // ============================================================================
 6804 
 6805 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 6806 // Load Float
 6807 instruct MoveF2VL(vlRegF dst, regF src) %{
 6808   match(Set dst src);
 6809   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6810   ins_encode %{
 6811     ShouldNotReachHere();
 6812   %}
 6813   ins_pipe( fpu_reg_reg );
 6814 %}
 6815 
 6816 // Load Float
 6817 instruct MoveF2LEG(legRegF dst, regF src) %{
 6818   match(Set dst src);
 6819   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6820   ins_encode %{
 6821     ShouldNotReachHere();
 6822   %}
 6823   ins_pipe( fpu_reg_reg );
 6824 %}
 6825 
 6826 // Load Float
 6827 instruct MoveVL2F(regF dst, vlRegF src) %{
 6828   match(Set dst src);
 6829   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6830   ins_encode %{
 6831     ShouldNotReachHere();
 6832   %}
 6833   ins_pipe( fpu_reg_reg );
 6834 %}
 6835 
 6836 // Load Float
 6837 instruct MoveLEG2F(regF dst, legRegF src) %{
 6838   match(Set dst src);
 6839   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 6840   ins_encode %{
 6841     ShouldNotReachHere();
 6842   %}
 6843   ins_pipe( fpu_reg_reg );
 6844 %}
 6845 
 6846 // Load Double
 6847 instruct MoveD2VL(vlRegD dst, regD src) %{
 6848   match(Set dst src);
 6849   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6850   ins_encode %{
 6851     ShouldNotReachHere();
 6852   %}
 6853   ins_pipe( fpu_reg_reg );
 6854 %}
 6855 
 6856 // Load Double
 6857 instruct MoveD2LEG(legRegD dst, regD src) %{
 6858   match(Set dst src);
 6859   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6860   ins_encode %{
 6861     ShouldNotReachHere();
 6862   %}
 6863   ins_pipe( fpu_reg_reg );
 6864 %}
 6865 
 6866 // Load Double
 6867 instruct MoveVL2D(regD dst, vlRegD src) %{
 6868   match(Set dst src);
 6869   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6870   ins_encode %{
 6871     ShouldNotReachHere();
 6872   %}
 6873   ins_pipe( fpu_reg_reg );
 6874 %}
 6875 
 6876 // Load Double
 6877 instruct MoveLEG2D(regD dst, legRegD src) %{
 6878   match(Set dst src);
 6879   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 6880   ins_encode %{
 6881     ShouldNotReachHere();
 6882   %}
 6883   ins_pipe( fpu_reg_reg );
 6884 %}
 6885 
 6886 //----------Load/Store/Move Instructions---------------------------------------
 6887 //----------Load Instructions--------------------------------------------------
 6888 
 6889 // Load Byte (8 bit signed)
 6890 instruct loadB(rRegI dst, memory mem)
 6891 %{
 6892   match(Set dst (LoadB mem));
 6893 
 6894   ins_cost(125);
 6895   format %{ "movsbl  $dst, $mem\t# byte" %}
 6896 
 6897   ins_encode %{
 6898     __ movsbl($dst$$Register, $mem$$Address);
 6899   %}
 6900 
 6901   ins_pipe(ialu_reg_mem);
 6902 %}
 6903 
 6904 // Load Byte (8 bit signed) into Long Register
 6905 instruct loadB2L(rRegL dst, memory mem)
 6906 %{
 6907   match(Set dst (ConvI2L (LoadB mem)));
 6908 
 6909   ins_cost(125);
 6910   format %{ "movsbq  $dst, $mem\t# byte -> long" %}
 6911 
 6912   ins_encode %{
 6913     __ movsbq($dst$$Register, $mem$$Address);
 6914   %}
 6915 
 6916   ins_pipe(ialu_reg_mem);
 6917 %}
 6918 
 6919 // Load Unsigned Byte (8 bit UNsigned)
 6920 instruct loadUB(rRegI dst, memory mem)
 6921 %{
 6922   match(Set dst (LoadUB mem));
 6923 
 6924   ins_cost(125);
 6925   format %{ "movzbl  $dst, $mem\t# ubyte" %}
 6926 
 6927   ins_encode %{
 6928     __ movzbl($dst$$Register, $mem$$Address);
 6929   %}
 6930 
 6931   ins_pipe(ialu_reg_mem);
 6932 %}
 6933 
 6934 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 6935 instruct loadUB2L(rRegL dst, memory mem)
 6936 %{
 6937   match(Set dst (ConvI2L (LoadUB mem)));
 6938 
 6939   ins_cost(125);
 6940   format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}
 6941 
 6942   ins_encode %{
 6943     __ movzbq($dst$$Register, $mem$$Address);
 6944   %}
 6945 
 6946   ins_pipe(ialu_reg_mem);
 6947 %}
 6948 
 6949 // Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
 6950 instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 6951   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 6952   effect(KILL cr);
 6953 
 6954   format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
 6955             "andl    $dst, right_n_bits($mask, 8)" %}
 6956   ins_encode %{
 6957     Register Rdst = $dst$$Register;
 6958     __ movzbq(Rdst, $mem$$Address);
 6959     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 6960   %}
 6961   ins_pipe(ialu_reg_mem);
 6962 %}
 6963 
 6964 // Load Short (16 bit signed)
 6965 instruct loadS(rRegI dst, memory mem)
 6966 %{
 6967   match(Set dst (LoadS mem));
 6968 
 6969   ins_cost(125);
 6970   format %{ "movswl $dst, $mem\t# short" %}
 6971 
 6972   ins_encode %{
 6973     __ movswl($dst$$Register, $mem$$Address);
 6974   %}
 6975 
 6976   ins_pipe(ialu_reg_mem);
 6977 %}
 6978 
 6979 // Load Short (16 bit signed) to Byte (8 bit signed)
 6980 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 6981   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 6982 
 6983   ins_cost(125);
 6984   format %{ "movsbl $dst, $mem\t# short -> byte" %}
 6985   ins_encode %{
 6986     __ movsbl($dst$$Register, $mem$$Address);
 6987   %}
 6988   ins_pipe(ialu_reg_mem);
 6989 %}
 6990 
 6991 // Load Short (16 bit signed) into Long Register
 6992 instruct loadS2L(rRegL dst, memory mem)
 6993 %{
 6994   match(Set dst (ConvI2L (LoadS mem)));
 6995 
 6996   ins_cost(125);
 6997   format %{ "movswq $dst, $mem\t# short -> long" %}
 6998 
 6999   ins_encode %{
 7000     __ movswq($dst$$Register, $mem$$Address);
 7001   %}
 7002 
 7003   ins_pipe(ialu_reg_mem);
 7004 %}
 7005 
 7006 // Load Unsigned Short/Char (16 bit UNsigned)
 7007 instruct loadUS(rRegI dst, memory mem)
 7008 %{
 7009   match(Set dst (LoadUS mem));
 7010 
 7011   ins_cost(125);
 7012   format %{ "movzwl  $dst, $mem\t# ushort/char" %}
 7013 
 7014   ins_encode %{
 7015     __ movzwl($dst$$Register, $mem$$Address);
 7016   %}
 7017 
 7018   ins_pipe(ialu_reg_mem);
 7019 %}
 7020 
 7021 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 7022 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7023   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 7024 
 7025   ins_cost(125);
 7026   format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
 7027   ins_encode %{
 7028     __ movsbl($dst$$Register, $mem$$Address);
 7029   %}
 7030   ins_pipe(ialu_reg_mem);
 7031 %}
 7032 
 7033 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 7034 instruct loadUS2L(rRegL dst, memory mem)
 7035 %{
 7036   match(Set dst (ConvI2L (LoadUS mem)));
 7037 
 7038   ins_cost(125);
 7039   format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}
 7040 
 7041   ins_encode %{
 7042     __ movzwq($dst$$Register, $mem$$Address);
 7043   %}
 7044 
 7045   ins_pipe(ialu_reg_mem);
 7046 %}
 7047 
 7048 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 7049 instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7050   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7051 
 7052   format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
 7053   ins_encode %{
 7054     __ movzbq($dst$$Register, $mem$$Address);
 7055   %}
 7056   ins_pipe(ialu_reg_mem);
 7057 %}
 7058 
 7059 // Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
 7060 instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
 7061   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 7062   effect(KILL cr);
 7063 
 7064   format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 7065             "andl    $dst, right_n_bits($mask, 16)" %}
 7066   ins_encode %{
 7067     Register Rdst = $dst$$Register;
 7068     __ movzwq(Rdst, $mem$$Address);
 7069     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 7070   %}
 7071   ins_pipe(ialu_reg_mem);
 7072 %}
 7073 
 7074 // Load Integer
 7075 instruct loadI(rRegI dst, memory mem)
 7076 %{
 7077   match(Set dst (LoadI mem));
 7078 
 7079   ins_cost(125);
 7080   format %{ "movl    $dst, $mem\t# int" %}
 7081 
 7082   ins_encode %{
 7083     __ movl($dst$$Register, $mem$$Address);
 7084   %}
 7085 
 7086   ins_pipe(ialu_reg_mem);
 7087 %}
 7088 
 7089 // Load Integer (32 bit signed) to Byte (8 bit signed)
 7090 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 7091   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 7092 
 7093   ins_cost(125);
 7094   format %{ "movsbl  $dst, $mem\t# int -> byte" %}
 7095   ins_encode %{
 7096     __ movsbl($dst$$Register, $mem$$Address);
 7097   %}
 7098   ins_pipe(ialu_reg_mem);
 7099 %}
 7100 
 7101 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 7102 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 7103   match(Set dst (AndI (LoadI mem) mask));
 7104 
 7105   ins_cost(125);
 7106   format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
 7107   ins_encode %{
 7108     __ movzbl($dst$$Register, $mem$$Address);
 7109   %}
 7110   ins_pipe(ialu_reg_mem);
 7111 %}
 7112 
 7113 // Load Integer (32 bit signed) to Short (16 bit signed)
 7114 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 7115   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 7116 
 7117   ins_cost(125);
 7118   format %{ "movswl  $dst, $mem\t# int -> short" %}
 7119   ins_encode %{
 7120     __ movswl($dst$$Register, $mem$$Address);
 7121   %}
 7122   ins_pipe(ialu_reg_mem);
 7123 %}
 7124 
 7125 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 7126 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 7127   match(Set dst (AndI (LoadI mem) mask));
 7128 
 7129   ins_cost(125);
 7130   format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
 7131   ins_encode %{
 7132     __ movzwl($dst$$Register, $mem$$Address);
 7133   %}
 7134   ins_pipe(ialu_reg_mem);
 7135 %}
 7136 
 7137 // Load Integer into Long Register
 7138 instruct loadI2L(rRegL dst, memory mem)
 7139 %{
 7140   match(Set dst (ConvI2L (LoadI mem)));
 7141 
 7142   ins_cost(125);
 7143   format %{ "movslq  $dst, $mem\t# int -> long" %}
 7144 
 7145   ins_encode %{
 7146     __ movslq($dst$$Register, $mem$$Address);
 7147   %}
 7148 
 7149   ins_pipe(ialu_reg_mem);
 7150 %}
 7151 
 7152 // Load Integer with mask 0xFF into Long Register
 7153 instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
 7154   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7155 
 7156   format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
 7157   ins_encode %{
 7158     __ movzbq($dst$$Register, $mem$$Address);
 7159   %}
 7160   ins_pipe(ialu_reg_mem);
 7161 %}
 7162 
 7163 // Load Integer with mask 0xFFFF into Long Register
 7164 instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
 7165   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7166 
 7167   format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
 7168   ins_encode %{
 7169     __ movzwq($dst$$Register, $mem$$Address);
 7170   %}
 7171   ins_pipe(ialu_reg_mem);
 7172 %}
 7173 
 7174 // Load Integer with a 31-bit mask into Long Register
 7175 instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
 7176   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 7177   effect(KILL cr);
 7178 
 7179   format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
 7180             "andl    $dst, $mask" %}
 7181   ins_encode %{
 7182     Register Rdst = $dst$$Register;
 7183     __ movl(Rdst, $mem$$Address);
 7184     __ andl(Rdst, $mask$$constant);
 7185   %}
 7186   ins_pipe(ialu_reg_mem);
 7187 %}
 7188 
 7189 // Load Unsigned Integer into Long Register
 7190 instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
 7191 %{
 7192   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 7193 
 7194   ins_cost(125);
 7195   format %{ "movl    $dst, $mem\t# uint -> long" %}
 7196 
 7197   ins_encode %{
 7198     __ movl($dst$$Register, $mem$$Address);
 7199   %}
 7200 
 7201   ins_pipe(ialu_reg_mem);
 7202 %}
 7203 
 7204 // Load Long
 7205 instruct loadL(rRegL dst, memory mem)
 7206 %{
 7207   match(Set dst (LoadL mem));
 7208 
 7209   ins_cost(125);
 7210   format %{ "movq    $dst, $mem\t# long" %}
 7211 
 7212   ins_encode %{
 7213     __ movq($dst$$Register, $mem$$Address);
 7214   %}
 7215 
 7216   ins_pipe(ialu_reg_mem); // XXX
 7217 %}
 7218 
 7219 // Load Range
 7220 instruct loadRange(rRegI dst, memory mem)
 7221 %{
 7222   match(Set dst (LoadRange mem));
 7223 
 7224   ins_cost(125); // XXX
 7225   format %{ "movl    $dst, $mem\t# range" %}
 7226   ins_encode %{
 7227     __ movl($dst$$Register, $mem$$Address);
 7228   %}
 7229   ins_pipe(ialu_reg_mem);
 7230 %}
 7231 
 7232 // Load Pointer
 7233 instruct loadP(rRegP dst, memory mem)
 7234 %{
 7235   match(Set dst (LoadP mem));
 7236   predicate(n->as_Load()->barrier_data() == 0);
 7237 
 7238   ins_cost(125); // XXX
 7239   format %{ "movq    $dst, $mem\t# ptr" %}
 7240   ins_encode %{
 7241     __ movq($dst$$Register, $mem$$Address);
 7242   %}
 7243   ins_pipe(ialu_reg_mem); // XXX
 7244 %}
 7245 
 7246 // Load Compressed Pointer
 7247 instruct loadN(rRegN dst, memory mem)
 7248 %{
 7249    predicate(n->as_Load()->barrier_data() == 0);
 7250    match(Set dst (LoadN mem));
 7251 
 7252    ins_cost(125); // XXX
 7253    format %{ "movl    $dst, $mem\t# compressed ptr" %}
 7254    ins_encode %{
 7255      __ movl($dst$$Register, $mem$$Address);
 7256    %}
 7257    ins_pipe(ialu_reg_mem); // XXX
 7258 %}
 7259 
 7260 
 7261 // Load Klass Pointer
 7262 instruct loadKlass(rRegP dst, memory mem)
 7263 %{
 7264   match(Set dst (LoadKlass mem));
 7265 
 7266   ins_cost(125); // XXX
 7267   format %{ "movq    $dst, $mem\t# class" %}
 7268   ins_encode %{
 7269     __ movq($dst$$Register, $mem$$Address);
 7270   %}
 7271   ins_pipe(ialu_reg_mem); // XXX
 7272 %}
 7273 
 7274 // Load narrow Klass Pointer
 7275 instruct loadNKlass(rRegN dst, memory mem)
 7276 %{
 7277   predicate(!UseCompactObjectHeaders);
 7278   match(Set dst (LoadNKlass mem));
 7279 
 7280   ins_cost(125); // XXX
 7281   format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
 7282   ins_encode %{
 7283     __ movl($dst$$Register, $mem$$Address);
 7284   %}
 7285   ins_pipe(ialu_reg_mem); // XXX
 7286 %}
 7287 
 7288 instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
 7289 %{
 7290   predicate(UseCompactObjectHeaders);
 7291   match(Set dst (LoadNKlass mem));
 7292   effect(KILL cr);
 7293   ins_cost(125);
 7294   format %{
 7295     "movl    $dst, $mem\t# compressed klass ptr, shifted\n\t"
 7296     "shrl    $dst, markWord::klass_shift"
 7297   %}
 7298   ins_encode %{
 7299     // The incoming address is pointing into obj-start + Type::klass_offset(). We need to extract
 7300     // obj-start, so that we can load from the object's mark-word instead.
 7301     Register d = $dst$$Register;
 7302     Address  s = ($mem$$Address).plus_disp(-Type::klass_offset());
 7303     if (UseAPX) {
 7304       __ eshrl(d, s, markWord::klass_shift, false);
 7305     } else {
 7306       __ movl(d, s);
 7307       __ shrl(d, markWord::klass_shift);
 7308     }
 7309   %}
 7310   ins_pipe(ialu_reg_mem);
 7311 %}
 7312 
 7313 // Load Float
 7314 instruct loadF(regF dst, memory mem)
 7315 %{
 7316   match(Set dst (LoadF mem));
 7317 
 7318   ins_cost(145); // XXX
 7319   format %{ "movss   $dst, $mem\t# float" %}
 7320   ins_encode %{
 7321     __ movflt($dst$$XMMRegister, $mem$$Address);
 7322   %}
 7323   ins_pipe(pipe_slow); // XXX
 7324 %}
 7325 
 7326 // Load Double
 7327 instruct loadD_partial(regD dst, memory mem)
 7328 %{
 7329   predicate(!UseXmmLoadAndClearUpper);
 7330   match(Set dst (LoadD mem));
 7331 
 7332   ins_cost(145); // XXX
 7333   format %{ "movlpd  $dst, $mem\t# double" %}
 7334   ins_encode %{
 7335     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7336   %}
 7337   ins_pipe(pipe_slow); // XXX
 7338 %}
 7339 
 7340 instruct loadD(regD dst, memory mem)
 7341 %{
 7342   predicate(UseXmmLoadAndClearUpper);
 7343   match(Set dst (LoadD mem));
 7344 
 7345   ins_cost(145); // XXX
 7346   format %{ "movsd   $dst, $mem\t# double" %}
 7347   ins_encode %{
 7348     __ movdbl($dst$$XMMRegister, $mem$$Address);
 7349   %}
 7350   ins_pipe(pipe_slow); // XXX
 7351 %}
 7352 
 7353 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
 7354 %{
 7355   match(Set dst con);
 7356 
 7357   format %{ "leaq  $dst, $con\t# AOT Runtime Constants Address" %}
 7358 
 7359   ins_encode %{
 7360     __ load_aotrc_address($dst$$Register, (address)$con$$constant);
 7361   %}
 7362 
 7363   ins_pipe(ialu_reg_fat);
 7364 %}
 7365 
 7366 // min = java.lang.Math.min(float a, float b)
 7367 // max = java.lang.Math.max(float a, float b)
 7368 instruct minmaxF_reg_avx10_2(regF dst, regF a, regF b)
 7369 %{
 7370   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7371   match(Set dst (MaxF a b));
 7372   match(Set dst (MinF a b));
 7373 
 7374   format %{ "minmaxF $dst, $a, $b" %}
 7375   ins_encode %{
 7376     int opcode = this->ideal_Opcode();
 7377     __ sminmax_fp_avx10_2(opcode, T_FLOAT, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7378   %}
 7379   ins_pipe( pipe_slow );
 7380 %}
 7381 
 7382 instruct minmaxF_reduction_reg_avx10_2(regF dst, regF a, regF b, regF xtmp, rRegI rtmp, rFlagsReg cr)
 7383 %{
 7384   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7385   match(Set dst (MaxF a b));
 7386   match(Set dst (MinF a b));
 7387   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7388 
 7389   format %{ "minmaxF_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7390   ins_encode %{
 7391     int opcode = this->ideal_Opcode();
 7392     bool min = (opcode == Op_MinF) ? true : false;
 7393     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7394                     min, fp_prec_flt /*pt*/);
 7395   %}
 7396   ins_pipe( pipe_slow );
 7397 %}
 7398 
 7399 // min = java.lang.Math.min(float a, float b)
 7400 // max = java.lang.Math.max(float a, float b)
 7401 instruct minmaxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp)
 7402 %{
 7403   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7404   match(Set dst (MaxF a b));
 7405   match(Set dst (MinF a b));
 7406   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
 7407 
 7408   format %{ "minmaxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7409   ins_encode %{
 7410     int opcode = this->ideal_Opcode();
 7411     int param_opcode = (opcode == Op_MinF) ? Op_MinV : Op_MaxV;
 7412     __ vminmax_fp(param_opcode, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7413                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7414   %}
 7415   ins_pipe( pipe_slow );
 7416 %}
 7417 
 7418 instruct minmaxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr)
 7419 %{
 7420   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7421   match(Set dst (MaxF a b));
 7422   match(Set dst (MinF a b));
 7423   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7424 
 7425   format %{ "minmaxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
 7426   ins_encode %{
 7427     int opcode = this->ideal_Opcode();
 7428     bool min = (opcode == Op_MinF) ? true : false;
 7429     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7430                     min, fp_prec_flt /*pt*/);
 7431   %}
 7432   ins_pipe( pipe_slow );
 7433 %}
 7434 
 7435 // min = java.lang.Math.min(double a, double b)
 7436 // max = java.lang.Math.max(double a, double b)
 7437 instruct minmaxD_reg_avx10_2(regD dst, regD a, regD b)
 7438 %{
 7439   predicate(VM_Version::supports_avx10_2() && !VLoopReductions::is_reduction(n));
 7440   match(Set dst (MaxD a b));
 7441   match(Set dst (MinD a b));
 7442 
 7443   format %{ "minmaxD $dst, $a, $b" %}
 7444   ins_encode %{
 7445     int opcode = this->ideal_Opcode();
 7446     __ sminmax_fp_avx10_2(opcode, T_DOUBLE, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister);
 7447   %}
 7448   ins_pipe( pipe_slow );
 7449 %}
 7450 
 7451 instruct minmaxD_reduction_reg_avx10_2(regD dst, regD a, regD b, regD xtmp, rRegI rtmp, rFlagsReg cr)
 7452 %{
 7453   predicate(VM_Version::supports_avx10_2() && VLoopReductions::is_reduction(n));
 7454   match(Set dst (MaxD a b));
 7455   match(Set dst (MinD a b));
 7456   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7457 
 7458   format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7459   ins_encode %{
 7460     int opcode = this->ideal_Opcode();
 7461     bool min = (opcode == Op_MinD) ? true : false;
 7462     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7463                     min, fp_prec_dbl /*pt*/);
 7464   %}
 7465   ins_pipe( pipe_slow );
 7466 %}
 7467 
 7468 // min = java.lang.Math.min(double a, double b)
 7469 // max = java.lang.Math.max(double a, double b)
 7470 instruct minmaxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp)
 7471 %{
 7472   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && !VLoopReductions::is_reduction(n));
 7473   match(Set dst (MaxD a b));
 7474   match(Set dst (MinD a b));
 7475   effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
 7476 
 7477   format %{ "minmaxD $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
 7478   ins_encode %{
 7479     int opcode = this->ideal_Opcode();
 7480     int param_opcode = (opcode == Op_MinD) ? Op_MinV : Op_MaxV;
 7481     __ vminmax_fp(param_opcode, T_DOUBLE, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister,
 7482                   $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
 7483   %}
 7484   ins_pipe( pipe_slow );
 7485 %}
 7486 
 7487 instruct minmaxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xtmp, rRegL rtmp, rFlagsReg cr)
 7488 %{
 7489   predicate(!VM_Version::supports_avx10_2() && UseAVX > 0 && VLoopReductions::is_reduction(n));
 7490   match(Set dst (MaxD a b));
 7491   match(Set dst (MinD a b));
 7492   effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
 7493 
 7494   format %{ "minmaxD_reduction $dst, $a, $b \t! using $xtmp and $rtmp as TEMP" %}
 7495   ins_encode %{
 7496     int opcode = this->ideal_Opcode();
 7497     bool min = (opcode == Op_MinD) ? true : false;
 7498     emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
 7499                     min, fp_prec_dbl /*pt*/);
 7500   %}
 7501   ins_pipe( pipe_slow );
 7502 %}
 7503 
 7504 // Load Effective Address
 7505 instruct leaP8(rRegP dst, indOffset8 mem)
 7506 %{
 7507   match(Set dst mem);
 7508 
 7509   ins_cost(110); // XXX
 7510   format %{ "leaq    $dst, $mem\t# ptr 8" %}
 7511   ins_encode %{
 7512     __ leaq($dst$$Register, $mem$$Address);
 7513   %}
 7514   ins_pipe(ialu_reg_reg_fat);
 7515 %}
 7516 
 7517 instruct leaP32(rRegP dst, indOffset32 mem)
 7518 %{
 7519   match(Set dst mem);
 7520 
 7521   ins_cost(110);
 7522   format %{ "leaq    $dst, $mem\t# ptr 32" %}
 7523   ins_encode %{
 7524     __ leaq($dst$$Register, $mem$$Address);
 7525   %}
 7526   ins_pipe(ialu_reg_reg_fat);
 7527 %}
 7528 
 7529 instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
 7530 %{
 7531   match(Set dst mem);
 7532 
 7533   ins_cost(110);
 7534   format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
 7535   ins_encode %{
 7536     __ leaq($dst$$Register, $mem$$Address);
 7537   %}
 7538   ins_pipe(ialu_reg_reg_fat);
 7539 %}
 7540 
 7541 instruct leaPIdxScale(rRegP dst, indIndexScale mem)
 7542 %{
 7543   match(Set dst mem);
 7544 
 7545   ins_cost(110);
 7546   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7547   ins_encode %{
 7548     __ leaq($dst$$Register, $mem$$Address);
 7549   %}
 7550   ins_pipe(ialu_reg_reg_fat);
 7551 %}
 7552 
 7553 instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
 7554 %{
 7555   match(Set dst mem);
 7556 
 7557   ins_cost(110);
 7558   format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
 7559   ins_encode %{
 7560     __ leaq($dst$$Register, $mem$$Address);
 7561   %}
 7562   ins_pipe(ialu_reg_reg_fat);
 7563 %}
 7564 
 7565 instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
 7566 %{
 7567   match(Set dst mem);
 7568 
 7569   ins_cost(110);
 7570   format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
 7571   ins_encode %{
 7572     __ leaq($dst$$Register, $mem$$Address);
 7573   %}
 7574   ins_pipe(ialu_reg_reg_fat);
 7575 %}
 7576 
 7577 instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
 7578 %{
 7579   match(Set dst mem);
 7580 
 7581   ins_cost(110);
 7582   format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
 7583   ins_encode %{
 7584     __ leaq($dst$$Register, $mem$$Address);
 7585   %}
 7586   ins_pipe(ialu_reg_reg_fat);
 7587 %}
 7588 
 7589 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 7590 %{
 7591   match(Set dst mem);
 7592 
 7593   ins_cost(110);
 7594   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
 7595   ins_encode %{
 7596     __ leaq($dst$$Register, $mem$$Address);
 7597   %}
 7598   ins_pipe(ialu_reg_reg_fat);
 7599 %}
 7600 
 7601 // Load Effective Address which uses Narrow (32-bits) oop
 7602 instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
 7603 %{
 7604   predicate(UseCompressedOops && (CompressedOops::shift() != 0));
 7605   match(Set dst mem);
 7606 
 7607   ins_cost(110);
 7608   format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
 7609   ins_encode %{
 7610     __ leaq($dst$$Register, $mem$$Address);
 7611   %}
 7612   ins_pipe(ialu_reg_reg_fat);
 7613 %}
 7614 
 7615 instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
 7616 %{
 7617   predicate(CompressedOops::shift() == 0);
 7618   match(Set dst mem);
 7619 
 7620   ins_cost(110); // XXX
 7621   format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
 7622   ins_encode %{
 7623     __ leaq($dst$$Register, $mem$$Address);
 7624   %}
 7625   ins_pipe(ialu_reg_reg_fat);
 7626 %}
 7627 
 7628 instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
 7629 %{
 7630   predicate(CompressedOops::shift() == 0);
 7631   match(Set dst mem);
 7632 
 7633   ins_cost(110);
 7634   format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
 7635   ins_encode %{
 7636     __ leaq($dst$$Register, $mem$$Address);
 7637   %}
 7638   ins_pipe(ialu_reg_reg_fat);
 7639 %}
 7640 
 7641 instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
 7642 %{
 7643   predicate(CompressedOops::shift() == 0);
 7644   match(Set dst mem);
 7645 
 7646   ins_cost(110);
 7647   format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
 7648   ins_encode %{
 7649     __ leaq($dst$$Register, $mem$$Address);
 7650   %}
 7651   ins_pipe(ialu_reg_reg_fat);
 7652 %}
 7653 
 7654 instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
 7655 %{
 7656   predicate(CompressedOops::shift() == 0);
 7657   match(Set dst mem);
 7658 
 7659   ins_cost(110);
 7660   format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
 7661   ins_encode %{
 7662     __ leaq($dst$$Register, $mem$$Address);
 7663   %}
 7664   ins_pipe(ialu_reg_reg_fat);
 7665 %}
 7666 
 7667 instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
 7668 %{
 7669   predicate(CompressedOops::shift() == 0);
 7670   match(Set dst mem);
 7671 
 7672   ins_cost(110);
 7673   format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
 7674   ins_encode %{
 7675     __ leaq($dst$$Register, $mem$$Address);
 7676   %}
 7677   ins_pipe(ialu_reg_reg_fat);
 7678 %}
 7679 
 7680 instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
 7681 %{
 7682   predicate(CompressedOops::shift() == 0);
 7683   match(Set dst mem);
 7684 
 7685   ins_cost(110);
 7686   format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
 7687   ins_encode %{
 7688     __ leaq($dst$$Register, $mem$$Address);
 7689   %}
 7690   ins_pipe(ialu_reg_reg_fat);
 7691 %}
 7692 
 7693 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 7694 %{
 7695   predicate(CompressedOops::shift() == 0);
 7696   match(Set dst mem);
 7697 
 7698   ins_cost(110);
 7699   format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
 7700   ins_encode %{
 7701     __ leaq($dst$$Register, $mem$$Address);
 7702   %}
 7703   ins_pipe(ialu_reg_reg_fat);
 7704 %}
 7705 
 7706 instruct loadConI(rRegI dst, immI src)
 7707 %{
 7708   match(Set dst src);
 7709 
 7710   format %{ "movl    $dst, $src\t# int" %}
 7711   ins_encode %{
 7712     __ movl($dst$$Register, $src$$constant);
 7713   %}
 7714   ins_pipe(ialu_reg_fat); // XXX
 7715 %}
 7716 
 7717 instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
 7718 %{
 7719   match(Set dst src);
 7720   effect(KILL cr);
 7721 
 7722   ins_cost(50);
 7723   format %{ "xorl    $dst, $dst\t# int" %}
 7724   ins_encode %{
 7725     __ xorl($dst$$Register, $dst$$Register);
 7726   %}
 7727   ins_pipe(ialu_reg);
 7728 %}
 7729 
 7730 instruct loadConL(rRegL dst, immL src)
 7731 %{
 7732   match(Set dst src);
 7733 
 7734   ins_cost(150);
 7735   format %{ "movq    $dst, $src\t# long" %}
 7736   ins_encode %{
 7737     __ mov64($dst$$Register, $src$$constant);
 7738   %}
 7739   ins_pipe(ialu_reg);
 7740 %}
 7741 
 7742 instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
 7743 %{
 7744   match(Set dst src);
 7745   effect(KILL cr);
 7746 
 7747   ins_cost(50);
 7748   format %{ "xorl    $dst, $dst\t# long" %}
 7749   ins_encode %{
 7750     __ xorl($dst$$Register, $dst$$Register);
 7751   %}
 7752   ins_pipe(ialu_reg); // XXX
 7753 %}
 7754 
 7755 instruct loadConUL32(rRegL dst, immUL32 src)
 7756 %{
 7757   match(Set dst src);
 7758 
 7759   ins_cost(60);
 7760   format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
 7761   ins_encode %{
 7762     __ movl($dst$$Register, $src$$constant);
 7763   %}
 7764   ins_pipe(ialu_reg);
 7765 %}
 7766 
 7767 instruct loadConL32(rRegL dst, immL32 src)
 7768 %{
 7769   match(Set dst src);
 7770 
 7771   ins_cost(70);
 7772   format %{ "movq    $dst, $src\t# long (32-bit)" %}
 7773   ins_encode %{
 7774     __ movq($dst$$Register, $src$$constant);
 7775   %}
 7776   ins_pipe(ialu_reg);
 7777 %}
 7778 
 7779 instruct loadConP(rRegP dst, immP con) %{
 7780   match(Set dst con);
 7781 
 7782   format %{ "movq    $dst, $con\t# ptr" %}
 7783   ins_encode %{
 7784     __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
 7785   %}
 7786   ins_pipe(ialu_reg_fat); // XXX
 7787 %}
 7788 
 7789 instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
 7790 %{
 7791   match(Set dst src);
 7792   effect(KILL cr);
 7793 
 7794   ins_cost(50);
 7795   format %{ "xorl    $dst, $dst\t# ptr" %}
 7796   ins_encode %{
 7797     __ xorl($dst$$Register, $dst$$Register);
 7798   %}
 7799   ins_pipe(ialu_reg);
 7800 %}
 7801 
 7802 instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
 7803 %{
 7804   match(Set dst src);
 7805   effect(KILL cr);
 7806 
 7807   ins_cost(60);
 7808   format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
 7809   ins_encode %{
 7810     __ movl($dst$$Register, $src$$constant);
 7811   %}
 7812   ins_pipe(ialu_reg);
 7813 %}
 7814 
 7815 instruct loadConF(regF dst, immF con) %{
 7816   match(Set dst con);
 7817   ins_cost(125);
 7818   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
 7819   ins_encode %{
 7820     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7821   %}
 7822   ins_pipe(pipe_slow);
 7823 %}
 7824 
 7825 instruct loadConH(regF dst, immH con) %{
 7826   match(Set dst con);
 7827   ins_cost(125);
 7828   format %{ "movss   $dst, [$constantaddress]\t# load from constant table: halffloat=$con" %}
 7829   ins_encode %{
 7830     __ movflt($dst$$XMMRegister, $constantaddress($con));
 7831   %}
 7832   ins_pipe(pipe_slow);
 7833 %}
 7834 
 7835 instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
 7836   match(Set dst src);
 7837   effect(KILL cr);
 7838   format %{ "xorq    $dst, $src\t# compressed null pointer" %}
 7839   ins_encode %{
 7840     __ xorq($dst$$Register, $dst$$Register);
 7841   %}
 7842   ins_pipe(ialu_reg);
 7843 %}
 7844 
 7845 instruct loadConN(rRegN dst, immN src) %{
 7846   match(Set dst src);
 7847 
 7848   ins_cost(125);
 7849   format %{ "movl    $dst, $src\t# compressed ptr" %}
 7850   ins_encode %{
 7851     address con = (address)$src$$constant;
 7852     if (con == nullptr) {
 7853       ShouldNotReachHere();
 7854     } else {
 7855       __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
 7856     }
 7857   %}
 7858   ins_pipe(ialu_reg_fat); // XXX
 7859 %}
 7860 
 7861 instruct loadConNKlass(rRegN dst, immNKlass src) %{
 7862   match(Set dst src);
 7863 
 7864   ins_cost(125);
 7865   format %{ "movl    $dst, $src\t# compressed klass ptr" %}
 7866   ins_encode %{
 7867     address con = (address)$src$$constant;
 7868     if (con == nullptr) {
 7869       ShouldNotReachHere();
 7870     } else {
 7871       __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
 7872     }
 7873   %}
 7874   ins_pipe(ialu_reg_fat); // XXX
 7875 %}
 7876 
 7877 instruct loadConF0(regF dst, immF0 src)
 7878 %{
 7879   match(Set dst src);
 7880   ins_cost(100);
 7881 
 7882   format %{ "xorps   $dst, $dst\t# float 0.0" %}
 7883   ins_encode %{
 7884     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 7885   %}
 7886   ins_pipe(pipe_slow);
 7887 %}
 7888 
 7889 // Use the same format since predicate() can not be used here.
 7890 instruct loadConD(regD dst, immD con) %{
 7891   match(Set dst con);
 7892   ins_cost(125);
 7893   format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
 7894   ins_encode %{
 7895     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 7896   %}
 7897   ins_pipe(pipe_slow);
 7898 %}
 7899 
 7900 instruct loadConD0(regD dst, immD0 src)
 7901 %{
 7902   match(Set dst src);
 7903   ins_cost(100);
 7904 
 7905   format %{ "xorpd   $dst, $dst\t# double 0.0" %}
 7906   ins_encode %{
 7907     __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
 7908   %}
 7909   ins_pipe(pipe_slow);
 7910 %}
 7911 
 7912 instruct loadSSI(rRegI dst, stackSlotI src)
 7913 %{
 7914   match(Set dst src);
 7915 
 7916   ins_cost(125);
 7917   format %{ "movl    $dst, $src\t# int stk" %}
 7918   ins_encode %{
 7919     __ movl($dst$$Register, $src$$Address);
 7920   %}
 7921   ins_pipe(ialu_reg_mem);
 7922 %}
 7923 
 7924 instruct loadSSL(rRegL dst, stackSlotL src)
 7925 %{
 7926   match(Set dst src);
 7927 
 7928   ins_cost(125);
 7929   format %{ "movq    $dst, $src\t# long stk" %}
 7930   ins_encode %{
 7931     __ movq($dst$$Register, $src$$Address);
 7932   %}
 7933   ins_pipe(ialu_reg_mem);
 7934 %}
 7935 
 7936 instruct loadSSP(rRegP dst, stackSlotP src)
 7937 %{
 7938   match(Set dst src);
 7939 
 7940   ins_cost(125);
 7941   format %{ "movq    $dst, $src\t# ptr stk" %}
 7942   ins_encode %{
 7943     __ movq($dst$$Register, $src$$Address);
 7944   %}
 7945   ins_pipe(ialu_reg_mem);
 7946 %}
 7947 
 7948 instruct loadSSF(regF dst, stackSlotF src)
 7949 %{
 7950   match(Set dst src);
 7951 
 7952   ins_cost(125);
 7953   format %{ "movss   $dst, $src\t# float stk" %}
 7954   ins_encode %{
 7955     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
 7956   %}
 7957   ins_pipe(pipe_slow); // XXX
 7958 %}
 7959 
 7960 // Use the same format since predicate() can not be used here.
 7961 instruct loadSSD(regD dst, stackSlotD src)
 7962 %{
 7963   match(Set dst src);
 7964 
 7965   ins_cost(125);
 7966   format %{ "movsd   $dst, $src\t# double stk" %}
 7967   ins_encode  %{
 7968     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
 7969   %}
 7970   ins_pipe(pipe_slow); // XXX
 7971 %}
 7972 
 7973 // Prefetch instructions for allocation.
 7974 // Must be safe to execute with invalid address (cannot fault).
 7975 
 7976 instruct prefetchAlloc( memory mem ) %{
 7977   predicate(AllocatePrefetchInstr==3);
 7978   match(PrefetchAllocation mem);
 7979   ins_cost(125);
 7980 
 7981   format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
 7982   ins_encode %{
 7983     __ prefetchw($mem$$Address);
 7984   %}
 7985   ins_pipe(ialu_mem);
 7986 %}
 7987 
 7988 instruct prefetchAllocNTA( memory mem ) %{
 7989   predicate(AllocatePrefetchInstr==0);
 7990   match(PrefetchAllocation mem);
 7991   ins_cost(125);
 7992 
 7993   format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
 7994   ins_encode %{
 7995     __ prefetchnta($mem$$Address);
 7996   %}
 7997   ins_pipe(ialu_mem);
 7998 %}
 7999 
 8000 instruct prefetchAllocT0( memory mem ) %{
 8001   predicate(AllocatePrefetchInstr==1);
 8002   match(PrefetchAllocation mem);
 8003   ins_cost(125);
 8004 
 8005   format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
 8006   ins_encode %{
 8007     __ prefetcht0($mem$$Address);
 8008   %}
 8009   ins_pipe(ialu_mem);
 8010 %}
 8011 
 8012 instruct prefetchAllocT2( memory mem ) %{
 8013   predicate(AllocatePrefetchInstr==2);
 8014   match(PrefetchAllocation mem);
 8015   ins_cost(125);
 8016 
 8017   format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
 8018   ins_encode %{
 8019     __ prefetcht2($mem$$Address);
 8020   %}
 8021   ins_pipe(ialu_mem);
 8022 %}
 8023 
 8024 //----------Store Instructions-------------------------------------------------
 8025 
 8026 // Store Byte
 8027 instruct storeB(memory mem, rRegI src)
 8028 %{
 8029   match(Set mem (StoreB mem src));
 8030 
 8031   ins_cost(125); // XXX
 8032   format %{ "movb    $mem, $src\t# byte" %}
 8033   ins_encode %{
 8034     __ movb($mem$$Address, $src$$Register);
 8035   %}
 8036   ins_pipe(ialu_mem_reg);
 8037 %}
 8038 
 8039 // Store Char/Short
 8040 instruct storeC(memory mem, rRegI src)
 8041 %{
 8042   match(Set mem (StoreC mem src));
 8043 
 8044   ins_cost(125); // XXX
 8045   format %{ "movw    $mem, $src\t# char/short" %}
 8046   ins_encode %{
 8047     __ movw($mem$$Address, $src$$Register);
 8048   %}
 8049   ins_pipe(ialu_mem_reg);
 8050 %}
 8051 
 8052 // Store Integer
 8053 instruct storeI(memory mem, rRegI src)
 8054 %{
 8055   match(Set mem (StoreI mem src));
 8056 
 8057   ins_cost(125); // XXX
 8058   format %{ "movl    $mem, $src\t# int" %}
 8059   ins_encode %{
 8060     __ movl($mem$$Address, $src$$Register);
 8061   %}
 8062   ins_pipe(ialu_mem_reg);
 8063 %}
 8064 
 8065 // Store Long
 8066 instruct storeL(memory mem, rRegL src)
 8067 %{
 8068   match(Set mem (StoreL mem src));
 8069 
 8070   ins_cost(125); // XXX
 8071   format %{ "movq    $mem, $src\t# long" %}
 8072   ins_encode %{
 8073     __ movq($mem$$Address, $src$$Register);
 8074   %}
 8075   ins_pipe(ialu_mem_reg); // XXX
 8076 %}
 8077 
 8078 // Store Pointer
 8079 instruct storeP(memory mem, any_RegP src)
 8080 %{
 8081   predicate(n->as_Store()->barrier_data() == 0);
 8082   match(Set mem (StoreP mem src));
 8083 
 8084   ins_cost(125); // XXX
 8085   format %{ "movq    $mem, $src\t# ptr" %}
 8086   ins_encode %{
 8087     __ movq($mem$$Address, $src$$Register);
 8088   %}
 8089   ins_pipe(ialu_mem_reg);
 8090 %}
 8091 
 8092 instruct storeImmP0(memory mem, immP0 zero)
 8093 %{
 8094   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) && n->as_Store()->barrier_data() == 0);
 8095   match(Set mem (StoreP mem zero));
 8096 
 8097   ins_cost(125); // XXX
 8098   format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
 8099   ins_encode %{
 8100     __ movq($mem$$Address, r12);
 8101   %}
 8102   ins_pipe(ialu_mem_reg);
 8103 %}
 8104 
 8105 // Store Null Pointer, mark word, or other simple pointer constant.
 8106 instruct storeImmP(memory mem, immP31 src)
 8107 %{
 8108   predicate(n->as_Store()->barrier_data() == 0);
 8109   match(Set mem (StoreP mem src));
 8110 
 8111   ins_cost(150); // XXX
 8112   format %{ "movq    $mem, $src\t# ptr" %}
 8113   ins_encode %{
 8114     __ movq($mem$$Address, $src$$constant);
 8115   %}
 8116   ins_pipe(ialu_mem_imm);
 8117 %}
 8118 
 8119 // Store Compressed Pointer
 8120 instruct storeN(memory mem, rRegN src)
 8121 %{
 8122   predicate(n->as_Store()->barrier_data() == 0);
 8123   match(Set mem (StoreN mem src));
 8124 
 8125   ins_cost(125); // XXX
 8126   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8127   ins_encode %{
 8128     __ movl($mem$$Address, $src$$Register);
 8129   %}
 8130   ins_pipe(ialu_mem_reg);
 8131 %}
 8132 
 8133 instruct storeNKlass(memory mem, rRegN src)
 8134 %{
 8135   match(Set mem (StoreNKlass mem src));
 8136 
 8137   ins_cost(125); // XXX
 8138   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8139   ins_encode %{
 8140     __ movl($mem$$Address, $src$$Register);
 8141   %}
 8142   ins_pipe(ialu_mem_reg);
 8143 %}
 8144 
 8145 instruct storeImmN0(memory mem, immN0 zero)
 8146 %{
 8147   predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0);
 8148   match(Set mem (StoreN mem zero));
 8149 
 8150   ins_cost(125); // XXX
 8151   format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
 8152   ins_encode %{
 8153     __ movl($mem$$Address, r12);
 8154   %}
 8155   ins_pipe(ialu_mem_reg);
 8156 %}
 8157 
 8158 instruct storeImmN(memory mem, immN src)
 8159 %{
 8160   predicate(n->as_Store()->barrier_data() == 0);
 8161   match(Set mem (StoreN mem src));
 8162 
 8163   ins_cost(150); // XXX
 8164   format %{ "movl    $mem, $src\t# compressed ptr" %}
 8165   ins_encode %{
 8166     address con = (address)$src$$constant;
 8167     if (con == nullptr) {
 8168       __ movl($mem$$Address, 0);
 8169     } else {
 8170       __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
 8171     }
 8172   %}
 8173   ins_pipe(ialu_mem_imm);
 8174 %}
 8175 
 8176 instruct storeImmNKlass(memory mem, immNKlass src)
 8177 %{
 8178   match(Set mem (StoreNKlass mem src));
 8179 
 8180   ins_cost(150); // XXX
 8181   format %{ "movl    $mem, $src\t# compressed klass ptr" %}
 8182   ins_encode %{
 8183     __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
 8184   %}
 8185   ins_pipe(ialu_mem_imm);
 8186 %}
 8187 
 8188 // Store Integer Immediate
 8189 instruct storeImmI0(memory mem, immI_0 zero)
 8190 %{
 8191   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8192   match(Set mem (StoreI mem zero));
 8193 
 8194   ins_cost(125); // XXX
 8195   format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
 8196   ins_encode %{
 8197     __ movl($mem$$Address, r12);
 8198   %}
 8199   ins_pipe(ialu_mem_reg);
 8200 %}
 8201 
 8202 instruct storeImmI(memory mem, immI src)
 8203 %{
 8204   match(Set mem (StoreI mem src));
 8205 
 8206   ins_cost(150);
 8207   format %{ "movl    $mem, $src\t# int" %}
 8208   ins_encode %{
 8209     __ movl($mem$$Address, $src$$constant);
 8210   %}
 8211   ins_pipe(ialu_mem_imm);
 8212 %}
 8213 
 8214 // Store Long Immediate
 8215 instruct storeImmL0(memory mem, immL0 zero)
 8216 %{
 8217   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8218   match(Set mem (StoreL mem zero));
 8219 
 8220   ins_cost(125); // XXX
 8221   format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
 8222   ins_encode %{
 8223     __ movq($mem$$Address, r12);
 8224   %}
 8225   ins_pipe(ialu_mem_reg);
 8226 %}
 8227 
 8228 instruct storeImmL(memory mem, immL32 src)
 8229 %{
 8230   match(Set mem (StoreL mem src));
 8231 
 8232   ins_cost(150);
 8233   format %{ "movq    $mem, $src\t# long" %}
 8234   ins_encode %{
 8235     __ movq($mem$$Address, $src$$constant);
 8236   %}
 8237   ins_pipe(ialu_mem_imm);
 8238 %}
 8239 
 8240 // Store Short/Char Immediate
 8241 instruct storeImmC0(memory mem, immI_0 zero)
 8242 %{
 8243   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8244   match(Set mem (StoreC mem zero));
 8245 
 8246   ins_cost(125); // XXX
 8247   format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8248   ins_encode %{
 8249     __ movw($mem$$Address, r12);
 8250   %}
 8251   ins_pipe(ialu_mem_reg);
 8252 %}
 8253 
 8254 instruct storeImmI16(memory mem, immI16 src)
 8255 %{
 8256   predicate(UseStoreImmI16);
 8257   match(Set mem (StoreC mem src));
 8258 
 8259   ins_cost(150);
 8260   format %{ "movw    $mem, $src\t# short/char" %}
 8261   ins_encode %{
 8262     __ movw($mem$$Address, $src$$constant);
 8263   %}
 8264   ins_pipe(ialu_mem_imm);
 8265 %}
 8266 
 8267 // Store Byte Immediate
 8268 instruct storeImmB0(memory mem, immI_0 zero)
 8269 %{
 8270   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8271   match(Set mem (StoreB mem zero));
 8272 
 8273   ins_cost(125); // XXX
 8274   format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
 8275   ins_encode %{
 8276     __ movb($mem$$Address, r12);
 8277   %}
 8278   ins_pipe(ialu_mem_reg);
 8279 %}
 8280 
 8281 instruct storeImmB(memory mem, immI8 src)
 8282 %{
 8283   match(Set mem (StoreB mem src));
 8284 
 8285   ins_cost(150); // XXX
 8286   format %{ "movb    $mem, $src\t# byte" %}
 8287   ins_encode %{
 8288     __ movb($mem$$Address, $src$$constant);
 8289   %}
 8290   ins_pipe(ialu_mem_imm);
 8291 %}
 8292 
 8293 // Store Float
 8294 instruct storeF(memory mem, regF src)
 8295 %{
 8296   match(Set mem (StoreF mem src));
 8297 
 8298   ins_cost(95); // XXX
 8299   format %{ "movss   $mem, $src\t# float" %}
 8300   ins_encode %{
 8301     __ movflt($mem$$Address, $src$$XMMRegister);
 8302   %}
 8303   ins_pipe(pipe_slow); // XXX
 8304 %}
 8305 
 8306 // Store immediate Float value (it is faster than store from XMM register)
 8307 instruct storeF0(memory mem, immF0 zero)
 8308 %{
 8309   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8310   match(Set mem (StoreF mem zero));
 8311 
 8312   ins_cost(25); // XXX
 8313   format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
 8314   ins_encode %{
 8315     __ movl($mem$$Address, r12);
 8316   %}
 8317   ins_pipe(ialu_mem_reg);
 8318 %}
 8319 
 8320 instruct storeF_imm(memory mem, immF src)
 8321 %{
 8322   match(Set mem (StoreF mem src));
 8323 
 8324   ins_cost(50);
 8325   format %{ "movl    $mem, $src\t# float" %}
 8326   ins_encode %{
 8327     __ movl($mem$$Address, jint_cast($src$$constant));
 8328   %}
 8329   ins_pipe(ialu_mem_imm);
 8330 %}
 8331 
 8332 // Store Double
 8333 instruct storeD(memory mem, regD src)
 8334 %{
 8335   match(Set mem (StoreD mem src));
 8336 
 8337   ins_cost(95); // XXX
 8338   format %{ "movsd   $mem, $src\t# double" %}
 8339   ins_encode %{
 8340     __ movdbl($mem$$Address, $src$$XMMRegister);
 8341   %}
 8342   ins_pipe(pipe_slow); // XXX
 8343 %}
 8344 
 8345 // Store immediate double 0.0 (it is faster than store from XMM register)
 8346 instruct storeD0_imm(memory mem, immD0 src)
 8347 %{
 8348   predicate(!UseCompressedOops || (CompressedOops::base() != nullptr));
 8349   match(Set mem (StoreD mem src));
 8350 
 8351   ins_cost(50);
 8352   format %{ "movq    $mem, $src\t# double 0." %}
 8353   ins_encode %{
 8354     __ movq($mem$$Address, $src$$constant);
 8355   %}
 8356   ins_pipe(ialu_mem_imm);
 8357 %}
 8358 
 8359 instruct storeD0(memory mem, immD0 zero)
 8360 %{
 8361   predicate(UseCompressedOops && (CompressedOops::base() == nullptr));
 8362   match(Set mem (StoreD mem zero));
 8363 
 8364   ins_cost(25); // XXX
 8365   format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
 8366   ins_encode %{
 8367     __ movq($mem$$Address, r12);
 8368   %}
 8369   ins_pipe(ialu_mem_reg);
 8370 %}
 8371 
 8372 instruct storeSSI(stackSlotI dst, rRegI src)
 8373 %{
 8374   match(Set dst src);
 8375 
 8376   ins_cost(100);
 8377   format %{ "movl    $dst, $src\t# int stk" %}
 8378   ins_encode %{
 8379     __ movl($dst$$Address, $src$$Register);
 8380   %}
 8381   ins_pipe( ialu_mem_reg );
 8382 %}
 8383 
 8384 instruct storeSSL(stackSlotL dst, rRegL src)
 8385 %{
 8386   match(Set dst src);
 8387 
 8388   ins_cost(100);
 8389   format %{ "movq    $dst, $src\t# long stk" %}
 8390   ins_encode %{
 8391     __ movq($dst$$Address, $src$$Register);
 8392   %}
 8393   ins_pipe(ialu_mem_reg);
 8394 %}
 8395 
 8396 instruct storeSSP(stackSlotP dst, rRegP src)
 8397 %{
 8398   match(Set dst src);
 8399 
 8400   ins_cost(100);
 8401   format %{ "movq    $dst, $src\t# ptr stk" %}
 8402   ins_encode %{
 8403     __ movq($dst$$Address, $src$$Register);
 8404   %}
 8405   ins_pipe(ialu_mem_reg);
 8406 %}
 8407 
 8408 instruct storeSSF(stackSlotF dst, regF src)
 8409 %{
 8410   match(Set dst src);
 8411 
 8412   ins_cost(95); // XXX
 8413   format %{ "movss   $dst, $src\t# float stk" %}
 8414   ins_encode %{
 8415     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8416   %}
 8417   ins_pipe(pipe_slow); // XXX
 8418 %}
 8419 
 8420 instruct storeSSD(stackSlotD dst, regD src)
 8421 %{
 8422   match(Set dst src);
 8423 
 8424   ins_cost(95); // XXX
 8425   format %{ "movsd   $dst, $src\t# double stk" %}
 8426   ins_encode %{
 8427     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
 8428   %}
 8429   ins_pipe(pipe_slow); // XXX
 8430 %}
 8431 
 8432 instruct cacheWB(indirect addr)
 8433 %{
 8434   predicate(VM_Version::supports_data_cache_line_flush());
 8435   match(CacheWB addr);
 8436 
 8437   ins_cost(100);
 8438   format %{"cache wb $addr" %}
 8439   ins_encode %{
 8440     assert($addr->index_position() < 0, "should be");
 8441     assert($addr$$disp == 0, "should be");
 8442     __ cache_wb(Address($addr$$base$$Register, 0));
 8443   %}
 8444   ins_pipe(pipe_slow); // XXX
 8445 %}
 8446 
 8447 instruct cacheWBPreSync()
 8448 %{
 8449   predicate(VM_Version::supports_data_cache_line_flush());
 8450   match(CacheWBPreSync);
 8451 
 8452   ins_cost(100);
 8453   format %{"cache wb presync" %}
 8454   ins_encode %{
 8455     __ cache_wbsync(true);
 8456   %}
 8457   ins_pipe(pipe_slow); // XXX
 8458 %}
 8459 
 8460 instruct cacheWBPostSync()
 8461 %{
 8462   predicate(VM_Version::supports_data_cache_line_flush());
 8463   match(CacheWBPostSync);
 8464 
 8465   ins_cost(100);
 8466   format %{"cache wb postsync" %}
 8467   ins_encode %{
 8468     __ cache_wbsync(false);
 8469   %}
 8470   ins_pipe(pipe_slow); // XXX
 8471 %}
 8472 
 8473 //----------BSWAP Instructions-------------------------------------------------
 8474 instruct bytes_reverse_int(rRegI dst) %{
 8475   match(Set dst (ReverseBytesI dst));
 8476 
 8477   format %{ "bswapl  $dst" %}
 8478   ins_encode %{
 8479     __ bswapl($dst$$Register);
 8480   %}
 8481   ins_pipe( ialu_reg );
 8482 %}
 8483 
 8484 instruct bytes_reverse_long(rRegL dst) %{
 8485   match(Set dst (ReverseBytesL dst));
 8486 
 8487   format %{ "bswapq  $dst" %}
 8488   ins_encode %{
 8489     __ bswapq($dst$$Register);
 8490   %}
 8491   ins_pipe( ialu_reg);
 8492 %}
 8493 
 8494 instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
 8495   match(Set dst (ReverseBytesUS dst));
 8496   effect(KILL cr);
 8497 
 8498   format %{ "bswapl  $dst\n\t"
 8499             "shrl    $dst,16\n\t" %}
 8500   ins_encode %{
 8501     __ bswapl($dst$$Register);
 8502     __ shrl($dst$$Register, 16);
 8503   %}
 8504   ins_pipe( ialu_reg );
 8505 %}
 8506 
 8507 instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
 8508   match(Set dst (ReverseBytesS dst));
 8509   effect(KILL cr);
 8510 
 8511   format %{ "bswapl  $dst\n\t"
 8512             "sar     $dst,16\n\t" %}
 8513   ins_encode %{
 8514     __ bswapl($dst$$Register);
 8515     __ sarl($dst$$Register, 16);
 8516   %}
 8517   ins_pipe( ialu_reg );
 8518 %}
 8519 
 8520 //---------- Zeros Count Instructions ------------------------------------------
 8521 
 8522 instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8523   predicate(UseCountLeadingZerosInstruction);
 8524   match(Set dst (CountLeadingZerosI src));
 8525   effect(KILL cr);
 8526 
 8527   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8528   ins_encode %{
 8529     __ lzcntl($dst$$Register, $src$$Register);
 8530   %}
 8531   ins_pipe(ialu_reg);
 8532 %}
 8533 
 8534 instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8535   predicate(UseCountLeadingZerosInstruction);
 8536   match(Set dst (CountLeadingZerosI (LoadI src)));
 8537   effect(KILL cr);
 8538   ins_cost(175);
 8539   format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
 8540   ins_encode %{
 8541     __ lzcntl($dst$$Register, $src$$Address);
 8542   %}
 8543   ins_pipe(ialu_reg_mem);
 8544 %}
 8545 
 8546 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
 8547   predicate(!UseCountLeadingZerosInstruction);
 8548   match(Set dst (CountLeadingZerosI src));
 8549   effect(KILL cr);
 8550 
 8551   format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
 8552             "jnz     skip\n\t"
 8553             "movl    $dst, -1\n"
 8554       "skip:\n\t"
 8555             "negl    $dst\n\t"
 8556             "addl    $dst, 31" %}
 8557   ins_encode %{
 8558     Register Rdst = $dst$$Register;
 8559     Register Rsrc = $src$$Register;
 8560     Label skip;
 8561     __ bsrl(Rdst, Rsrc);
 8562     __ jccb(Assembler::notZero, skip);
 8563     __ movl(Rdst, -1);
 8564     __ bind(skip);
 8565     __ negl(Rdst);
 8566     __ addl(Rdst, BitsPerInt - 1);
 8567   %}
 8568   ins_pipe(ialu_reg);
 8569 %}
 8570 
 8571 instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8572   predicate(UseCountLeadingZerosInstruction);
 8573   match(Set dst (CountLeadingZerosL src));
 8574   effect(KILL cr);
 8575 
 8576   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8577   ins_encode %{
 8578     __ lzcntq($dst$$Register, $src$$Register);
 8579   %}
 8580   ins_pipe(ialu_reg);
 8581 %}
 8582 
 8583 instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8584   predicate(UseCountLeadingZerosInstruction);
 8585   match(Set dst (CountLeadingZerosL (LoadL src)));
 8586   effect(KILL cr);
 8587   ins_cost(175);
 8588   format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
 8589   ins_encode %{
 8590     __ lzcntq($dst$$Register, $src$$Address);
 8591   %}
 8592   ins_pipe(ialu_reg_mem);
 8593 %}
 8594 
 8595 instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
 8596   predicate(!UseCountLeadingZerosInstruction);
 8597   match(Set dst (CountLeadingZerosL src));
 8598   effect(KILL cr);
 8599 
 8600   format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
 8601             "jnz     skip\n\t"
 8602             "movl    $dst, -1\n"
 8603       "skip:\n\t"
 8604             "negl    $dst\n\t"
 8605             "addl    $dst, 63" %}
 8606   ins_encode %{
 8607     Register Rdst = $dst$$Register;
 8608     Register Rsrc = $src$$Register;
 8609     Label skip;
 8610     __ bsrq(Rdst, Rsrc);
 8611     __ jccb(Assembler::notZero, skip);
 8612     __ movl(Rdst, -1);
 8613     __ bind(skip);
 8614     __ negl(Rdst);
 8615     __ addl(Rdst, BitsPerLong - 1);
 8616   %}
 8617   ins_pipe(ialu_reg);
 8618 %}
 8619 
 8620 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8621   predicate(UseCountTrailingZerosInstruction);
 8622   match(Set dst (CountTrailingZerosI src));
 8623   effect(KILL cr);
 8624 
 8625   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8626   ins_encode %{
 8627     __ tzcntl($dst$$Register, $src$$Register);
 8628   %}
 8629   ins_pipe(ialu_reg);
 8630 %}
 8631 
 8632 instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8633   predicate(UseCountTrailingZerosInstruction);
 8634   match(Set dst (CountTrailingZerosI (LoadI src)));
 8635   effect(KILL cr);
 8636   ins_cost(175);
 8637   format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
 8638   ins_encode %{
 8639     __ tzcntl($dst$$Register, $src$$Address);
 8640   %}
 8641   ins_pipe(ialu_reg_mem);
 8642 %}
 8643 
 8644 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
 8645   predicate(!UseCountTrailingZerosInstruction);
 8646   match(Set dst (CountTrailingZerosI src));
 8647   effect(KILL cr);
 8648 
 8649   format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
 8650             "jnz     done\n\t"
 8651             "movl    $dst, 32\n"
 8652       "done:" %}
 8653   ins_encode %{
 8654     Register Rdst = $dst$$Register;
 8655     Label done;
 8656     __ bsfl(Rdst, $src$$Register);
 8657     __ jccb(Assembler::notZero, done);
 8658     __ movl(Rdst, BitsPerInt);
 8659     __ bind(done);
 8660   %}
 8661   ins_pipe(ialu_reg);
 8662 %}
 8663 
 8664 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8665   predicate(UseCountTrailingZerosInstruction);
 8666   match(Set dst (CountTrailingZerosL src));
 8667   effect(KILL cr);
 8668 
 8669   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8670   ins_encode %{
 8671     __ tzcntq($dst$$Register, $src$$Register);
 8672   %}
 8673   ins_pipe(ialu_reg);
 8674 %}
 8675 
 8676 instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
 8677   predicate(UseCountTrailingZerosInstruction);
 8678   match(Set dst (CountTrailingZerosL (LoadL src)));
 8679   effect(KILL cr);
 8680   ins_cost(175);
 8681   format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
 8682   ins_encode %{
 8683     __ tzcntq($dst$$Register, $src$$Address);
 8684   %}
 8685   ins_pipe(ialu_reg_mem);
 8686 %}
 8687 
 8688 instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
 8689   predicate(!UseCountTrailingZerosInstruction);
 8690   match(Set dst (CountTrailingZerosL src));
 8691   effect(KILL cr);
 8692 
 8693   format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
 8694             "jnz     done\n\t"
 8695             "movl    $dst, 64\n"
 8696       "done:" %}
 8697   ins_encode %{
 8698     Register Rdst = $dst$$Register;
 8699     Label done;
 8700     __ bsfq(Rdst, $src$$Register);
 8701     __ jccb(Assembler::notZero, done);
 8702     __ movl(Rdst, BitsPerLong);
 8703     __ bind(done);
 8704   %}
 8705   ins_pipe(ialu_reg);
 8706 %}
 8707 
 8708 //--------------- Reverse Operation Instructions ----------------
 8709 instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
 8710   predicate(!VM_Version::supports_gfni());
 8711   match(Set dst (ReverseI src));
 8712   effect(TEMP dst, TEMP rtmp, KILL cr);
 8713   format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
 8714   ins_encode %{
 8715     __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
 8716   %}
 8717   ins_pipe( ialu_reg );
 8718 %}
 8719 
 8720 instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, vlRegF xtmp1, vlRegF xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8721   predicate(VM_Version::supports_gfni());
 8722   match(Set dst (ReverseI src));
 8723   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8724   format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8725   ins_encode %{
 8726     __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
 8727   %}
 8728   ins_pipe( ialu_reg );
 8729 %}
 8730 
 8731 instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 8732   predicate(!VM_Version::supports_gfni());
 8733   match(Set dst (ReverseL src));
 8734   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
 8735   format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
 8736   ins_encode %{
 8737     __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
 8738   %}
 8739   ins_pipe( ialu_reg );
 8740 %}
 8741 
 8742 instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, vlRegD xtmp1, vlRegD xtmp2, rRegL rtmp, rFlagsReg cr) %{
 8743   predicate(VM_Version::supports_gfni());
 8744   match(Set dst (ReverseL src));
 8745   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
 8746   format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
 8747   ins_encode %{
 8748     __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
 8749   %}
 8750   ins_pipe( ialu_reg );
 8751 %}
 8752 
 8753 //---------- Population Count Instructions -------------------------------------
 8754 
 8755 instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
 8756   predicate(UsePopCountInstruction);
 8757   match(Set dst (PopCountI src));
 8758   effect(KILL cr);
 8759 
 8760   format %{ "popcnt  $dst, $src" %}
 8761   ins_encode %{
 8762     __ popcntl($dst$$Register, $src$$Register);
 8763   %}
 8764   ins_pipe(ialu_reg);
 8765 %}
 8766 
 8767 instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8768   predicate(UsePopCountInstruction);
 8769   match(Set dst (PopCountI (LoadI mem)));
 8770   effect(KILL cr);
 8771 
 8772   format %{ "popcnt  $dst, $mem" %}
 8773   ins_encode %{
 8774     __ popcntl($dst$$Register, $mem$$Address);
 8775   %}
 8776   ins_pipe(ialu_reg);
 8777 %}
 8778 
 8779 // Note: Long.bitCount(long) returns an int.
 8780 instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
 8781   predicate(UsePopCountInstruction);
 8782   match(Set dst (PopCountL src));
 8783   effect(KILL cr);
 8784 
 8785   format %{ "popcnt  $dst, $src" %}
 8786   ins_encode %{
 8787     __ popcntq($dst$$Register, $src$$Register);
 8788   %}
 8789   ins_pipe(ialu_reg);
 8790 %}
 8791 
 8792 // Note: Long.bitCount(long) returns an int.
 8793 instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
 8794   predicate(UsePopCountInstruction);
 8795   match(Set dst (PopCountL (LoadL mem)));
 8796   effect(KILL cr);
 8797 
 8798   format %{ "popcnt  $dst, $mem" %}
 8799   ins_encode %{
 8800     __ popcntq($dst$$Register, $mem$$Address);
 8801   %}
 8802   ins_pipe(ialu_reg);
 8803 %}
 8804 
 8805 
 8806 //----------MemBar Instructions-----------------------------------------------
 8807 // Memory barrier flavors
 8808 
 8809 instruct membar_acquire()
 8810 %{
 8811   match(MemBarAcquire);
 8812   match(LoadFence);
 8813   ins_cost(0);
 8814 
 8815   size(0);
 8816   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 8817   ins_encode();
 8818   ins_pipe(empty);
 8819 %}
 8820 
 8821 instruct membar_acquire_lock()
 8822 %{
 8823   match(MemBarAcquireLock);
 8824   ins_cost(0);
 8825 
 8826   size(0);
 8827   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 8828   ins_encode();
 8829   ins_pipe(empty);
 8830 %}
 8831 
 8832 instruct membar_release()
 8833 %{
 8834   match(MemBarRelease);
 8835   match(StoreFence);
 8836   ins_cost(0);
 8837 
 8838   size(0);
 8839   format %{ "MEMBAR-release ! (empty encoding)" %}
 8840   ins_encode();
 8841   ins_pipe(empty);
 8842 %}
 8843 
 8844 instruct membar_release_lock()
 8845 %{
 8846   match(MemBarReleaseLock);
 8847   ins_cost(0);
 8848 
 8849   size(0);
 8850   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 8851   ins_encode();
 8852   ins_pipe(empty);
 8853 %}
 8854 
 8855 instruct membar_storeload(rFlagsReg cr) %{
 8856   match(MemBarStoreLoad);
 8857   effect(KILL cr);
 8858   ins_cost(400);
 8859 
 8860   format %{
 8861     $$template
 8862     $$emit$$"lock addl [rsp + #0], 0\t! membar_storeload"
 8863   %}
 8864   ins_encode %{
 8865     __ membar(Assembler::StoreLoad);
 8866   %}
 8867   ins_pipe(pipe_slow);
 8868 %}
 8869 
 8870 instruct membar_volatile(rFlagsReg cr) %{
 8871   match(MemBarVolatile);
 8872   effect(KILL cr);
 8873   ins_cost(400);
 8874 
 8875   format %{
 8876     $$template
 8877     $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
 8878   %}
 8879   ins_encode %{
 8880     __ membar(Assembler::StoreLoad);
 8881   %}
 8882   ins_pipe(pipe_slow);
 8883 %}
 8884 
 8885 instruct unnecessary_membar_volatile()
 8886 %{
 8887   match(MemBarVolatile);
 8888   predicate(Matcher::post_store_load_barrier(n));
 8889   ins_cost(0);
 8890 
 8891   size(0);
 8892   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 8893   ins_encode();
 8894   ins_pipe(empty);
 8895 %}
 8896 
 8897 instruct membar_full(rFlagsReg cr) %{
 8898   match(MemBarFull);
 8899   effect(KILL cr);
 8900   ins_cost(400);
 8901 
 8902   format %{
 8903     $$template
 8904     $$emit$$"lock addl [rsp + #0], 0\t! membar_full"
 8905   %}
 8906   ins_encode %{
 8907     __ membar(Assembler::StoreLoad);
 8908   %}
 8909   ins_pipe(pipe_slow);
 8910 %}
 8911 
 8912 instruct membar_storestore() %{
 8913   match(MemBarStoreStore);
 8914   match(StoreStoreFence);
 8915   ins_cost(0);
 8916 
 8917   size(0);
 8918   format %{ "MEMBAR-storestore (empty encoding)" %}
 8919   ins_encode( );
 8920   ins_pipe(empty);
 8921 %}
 8922 
 8923 //----------Move Instructions--------------------------------------------------
 8924 
 8925 instruct castX2P(rRegP dst, rRegL src)
 8926 %{
 8927   match(Set dst (CastX2P src));
 8928 
 8929   format %{ "movq    $dst, $src\t# long->ptr" %}
 8930   ins_encode %{
 8931     if ($dst$$reg != $src$$reg) {
 8932       __ movptr($dst$$Register, $src$$Register);
 8933     }
 8934   %}
 8935   ins_pipe(ialu_reg_reg); // XXX
 8936 %}
 8937 
 8938 instruct castP2X(rRegL dst, rRegP src)
 8939 %{
 8940   match(Set dst (CastP2X src));
 8941 
 8942   format %{ "movq    $dst, $src\t# ptr -> long" %}
 8943   ins_encode %{
 8944     if ($dst$$reg != $src$$reg) {
 8945       __ movptr($dst$$Register, $src$$Register);
 8946     }
 8947   %}
 8948   ins_pipe(ialu_reg_reg); // XXX
 8949 %}
 8950 
 8951 // Convert oop into int for vectors alignment masking
 8952 instruct convP2I(rRegI dst, rRegP src)
 8953 %{
 8954   match(Set dst (ConvL2I (CastP2X src)));
 8955 
 8956   format %{ "movl    $dst, $src\t# ptr -> int" %}
 8957   ins_encode %{
 8958     __ movl($dst$$Register, $src$$Register);
 8959   %}
 8960   ins_pipe(ialu_reg_reg); // XXX
 8961 %}
 8962 
 8963 // Convert compressed oop into int for vectors alignment masking
 8964 // in case of 32bit oops (heap < 4Gb).
 8965 instruct convN2I(rRegI dst, rRegN src)
 8966 %{
 8967   predicate(CompressedOops::shift() == 0);
 8968   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 8969 
 8970   format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
 8971   ins_encode %{
 8972     __ movl($dst$$Register, $src$$Register);
 8973   %}
 8974   ins_pipe(ialu_reg_reg); // XXX
 8975 %}
 8976 
 8977 // Convert oop pointer into compressed form
 8978 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
 8979   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
 8980   match(Set dst (EncodeP src));
 8981   effect(KILL cr);
 8982   format %{ "encode_heap_oop $dst,$src" %}
 8983   ins_encode %{
 8984     Register s = $src$$Register;
 8985     Register d = $dst$$Register;
 8986     if (s != d) {
 8987       __ movq(d, s);
 8988     }
 8989     __ encode_heap_oop(d);
 8990   %}
 8991   ins_pipe(ialu_reg_long);
 8992 %}
 8993 
 8994 instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 8995   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
 8996   match(Set dst (EncodeP src));
 8997   effect(KILL cr);
 8998   format %{ "encode_heap_oop_not_null $dst,$src" %}
 8999   ins_encode %{
 9000     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
 9001   %}
 9002   ins_pipe(ialu_reg_long);
 9003 %}
 9004 
 9005 instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
 9006   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
 9007             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
 9008   match(Set dst (DecodeN src));
 9009   effect(KILL cr);
 9010   format %{ "decode_heap_oop $dst,$src" %}
 9011   ins_encode %{
 9012     Register s = $src$$Register;
 9013     Register d = $dst$$Register;
 9014     if (s != d) {
 9015       __ movq(d, s);
 9016     }
 9017     __ decode_heap_oop(d);
 9018   %}
 9019   ins_pipe(ialu_reg_long);
 9020 %}
 9021 
 9022 instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9023   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
 9024             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
 9025   match(Set dst (DecodeN src));
 9026   effect(KILL cr);
 9027   format %{ "decode_heap_oop_not_null $dst,$src" %}
 9028   ins_encode %{
 9029     Register s = $src$$Register;
 9030     Register d = $dst$$Register;
 9031     if (s != d) {
 9032       __ decode_heap_oop_not_null(d, s);
 9033     } else {
 9034       __ decode_heap_oop_not_null(d);
 9035     }
 9036   %}
 9037   ins_pipe(ialu_reg_long);
 9038 %}
 9039 
 9040 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 9041   match(Set dst (EncodePKlass src));
 9042   effect(TEMP dst, KILL cr);
 9043   format %{ "encode_and_move_klass_not_null $dst,$src" %}
 9044   ins_encode %{
 9045     __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9046   %}
 9047   ins_pipe(ialu_reg_long);
 9048 %}
 9049 
 9050 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 9051   match(Set dst (DecodeNKlass src));
 9052   effect(TEMP dst, KILL cr);
 9053   format %{ "decode_and_move_klass_not_null $dst,$src" %}
 9054   ins_encode %{
 9055     __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
 9056   %}
 9057   ins_pipe(ialu_reg_long);
 9058 %}
 9059 
 9060 //----------Conditional Move---------------------------------------------------
 9061 // Jump
 9062 // dummy instruction for generating temp registers
 9063 instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
 9064   match(Jump (LShiftL switch_val shift));
 9065   ins_cost(350);
 9066   predicate(false);
 9067   effect(TEMP dest);
 9068 
 9069   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9070             "jmp     [$dest + $switch_val << $shift]\n\t" %}
 9071   ins_encode %{
 9072     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9073     // to do that and the compiler is using that register as one it can allocate.
 9074     // So we build it all by hand.
 9075     // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
 9076     // ArrayAddress dispatch(table, index);
 9077     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
 9078     __ lea($dest$$Register, $constantaddress);
 9079     __ jmp(dispatch);
 9080   %}
 9081   ins_pipe(pipe_jmp);
 9082 %}
 9083 
 9084 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
 9085   match(Jump (AddL (LShiftL switch_val shift) offset));
 9086   ins_cost(350);
 9087   effect(TEMP dest);
 9088 
 9089   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9090             "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
 9091   ins_encode %{
 9092     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9093     // to do that and the compiler is using that register as one it can allocate.
 9094     // So we build it all by hand.
 9095     // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9096     // ArrayAddress dispatch(table, index);
 9097     Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
 9098     __ lea($dest$$Register, $constantaddress);
 9099     __ jmp(dispatch);
 9100   %}
 9101   ins_pipe(pipe_jmp);
 9102 %}
 9103 
 9104 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
 9105   match(Jump switch_val);
 9106   ins_cost(350);
 9107   effect(TEMP dest);
 9108 
 9109   format %{ "leaq    $dest, [$constantaddress]\n\t"
 9110             "jmp     [$dest + $switch_val]\n\t" %}
 9111   ins_encode %{
 9112     // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
 9113     // to do that and the compiler is using that register as one it can allocate.
 9114     // So we build it all by hand.
 9115     // Address index(noreg, switch_reg, Address::times_1);
 9116     // ArrayAddress dispatch(table, index);
 9117     Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
 9118     __ lea($dest$$Register, $constantaddress);
 9119     __ jmp(dispatch);
 9120   %}
 9121   ins_pipe(pipe_jmp);
 9122 %}
 9123 
 9124 // Conditional move
 9125 instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
 9126 %{
 9127   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9128   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9129 
 9130   ins_cost(100); // XXX
 9131   format %{ "setbn$cop $dst\t# signed, int" %}
 9132   ins_encode %{
 9133     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9134     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9135   %}
 9136   ins_pipe(ialu_reg);
 9137 %}
 9138 
 9139 instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
 9140 %{
 9141   predicate(!UseAPX);
 9142   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9143 
 9144   ins_cost(200); // XXX
 9145   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9146   ins_encode %{
 9147     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9148   %}
 9149   ins_pipe(pipe_cmov_reg);
 9150 %}
 9151 
 9152 instruct cmovI_reg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr, cmpOp cop)
 9153 %{
 9154   predicate(UseAPX);
 9155   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9156 
 9157   ins_cost(200);
 9158   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9159   ins_encode %{
 9160     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9161   %}
 9162   ins_pipe(pipe_cmov_reg);
 9163 %}
 9164 
 9165 instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
 9166 %{
 9167   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9168   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9169 
 9170   ins_cost(100); // XXX
 9171   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9172   ins_encode %{
 9173     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9174     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9175   %}
 9176   ins_pipe(ialu_reg);
 9177 %}
 9178 
 9179 instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
 9180   predicate(!UseAPX);
 9181   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9182 
 9183   ins_cost(200); // XXX
 9184   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9185   ins_encode %{
 9186     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9187   %}
 9188   ins_pipe(pipe_cmov_reg);
 9189 %}
 9190 
 9191 instruct cmovI_regU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, rRegI src2) %{
 9192   predicate(UseAPX);
 9193   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9194 
 9195   ins_cost(200);
 9196   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9197   ins_encode %{
 9198     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9199   %}
 9200   ins_pipe(pipe_cmov_reg);
 9201 %}
 9202 
 9203 instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9204 %{
 9205   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9206   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9207 
 9208   ins_cost(100); // XXX
 9209   format %{ "setbn$cop $dst\t# unsigned, int" %}
 9210   ins_encode %{
 9211     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9212     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9213   %}
 9214   ins_pipe(ialu_reg);
 9215 %}
 9216 
 9217 instruct cmovI_imm_01UCFE(rRegI dst, immI_1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9218 %{
 9219   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
 9220   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9221 
 9222   ins_cost(100); // XXX
 9223   format %{ "setbn$cop $dst\t# signed, unsigned, int" %}
 9224   ins_encode %{
 9225     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9226     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9227   %}
 9228   ins_pipe(ialu_reg);
 9229 %}
 9230 
 9231 instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9232   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9233 
 9234   ins_cost(200);
 9235   expand %{
 9236     cmovI_regU(cop, cr, dst, src);
 9237   %}
 9238 %}
 9239 
 9240 instruct cmovI_regUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, rRegI src2) %{
 9241   match(Set dst (CMoveI (Binary cop cr) (Binary src1 src2)));
 9242 
 9243   ins_cost(200);
 9244   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9245   ins_encode %{
 9246     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9247   %}
 9248   ins_pipe(pipe_cmov_reg);
 9249 %}
 9250 
 9251 instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9252   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9253   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 9254 
 9255   ins_cost(200); // XXX
 9256   format %{ "cmovpl  $dst, $src\n\t"
 9257             "cmovnel $dst, $src" %}
 9258   ins_encode %{
 9259     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9260     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9261   %}
 9262   ins_pipe(pipe_cmov_reg);
 9263 %}
 9264 
 9265 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9266 // inputs of the CMove
 9267 instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
 9268   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9269   match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));
 9270   effect(TEMP dst);
 9271 
 9272   ins_cost(200); // XXX
 9273   format %{ "cmovpl  $dst, $src\n\t"
 9274             "cmovnel $dst, $src" %}
 9275   ins_encode %{
 9276     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9277     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9278   %}
 9279   ins_pipe(pipe_cmov_reg);
 9280 %}
 9281 
 9282 // Conditional move
 9283 instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
 9284   predicate(!UseAPX);
 9285   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9286 
 9287   ins_cost(250); // XXX
 9288   format %{ "cmovl$cop $dst, $src\t# signed, int" %}
 9289   ins_encode %{
 9290     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9291   %}
 9292   ins_pipe(pipe_cmov_mem);
 9293 %}
 9294 
 9295 // Conditional move
 9296 instruct cmovI_rReg_rReg_mem_ndd(rRegI dst, cmpOp cop, rFlagsReg cr, rRegI src1, memory src2)
 9297 %{
 9298   predicate(UseAPX);
 9299   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9300 
 9301   ins_cost(250);
 9302   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, int ndd" %}
 9303   ins_encode %{
 9304     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9305   %}
 9306   ins_pipe(pipe_cmov_mem);
 9307 %}
 9308 
 9309 // Conditional move
 9310 instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
 9311 %{
 9312   predicate(!UseAPX);
 9313   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9314 
 9315   ins_cost(250); // XXX
 9316   format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
 9317   ins_encode %{
 9318     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9319   %}
 9320   ins_pipe(pipe_cmov_mem);
 9321 %}
 9322 
 9323 instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
 9324   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 9325 
 9326   ins_cost(250);
 9327   expand %{
 9328     cmovI_memU(cop, cr, dst, src);
 9329   %}
 9330 %}
 9331 
 9332 instruct cmovI_rReg_rReg_memU_ndd(rRegI dst, cmpOpU cop, rFlagsRegU cr, rRegI src1, memory src2)
 9333 %{
 9334   predicate(UseAPX);
 9335   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9336 
 9337   ins_cost(250);
 9338   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, int ndd" %}
 9339   ins_encode %{
 9340     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9341   %}
 9342   ins_pipe(pipe_cmov_mem);
 9343 %}
 9344 
 9345 instruct cmovI_rReg_rReg_memUCFE_ndd(rRegI dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegI src1, memory src2)
 9346 %{
 9347   match(Set dst (CMoveI (Binary cop cr) (Binary src1 (LoadI src2))));
 9348 
 9349   ins_cost(250);
 9350   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, int ndd" %}
 9351   ins_encode %{
 9352     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9353   %}
 9354   ins_pipe(pipe_cmov_mem);
 9355 %}
 9356 
 9357 // Conditional move
 9358 instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
 9359 %{
 9360   predicate(!UseAPX);
 9361   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9362 
 9363   ins_cost(200); // XXX
 9364   format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
 9365   ins_encode %{
 9366     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9367   %}
 9368   ins_pipe(pipe_cmov_reg);
 9369 %}
 9370 
 9371 // Conditional move ndd
 9372 instruct cmovN_reg_ndd(rRegN dst, rRegN src1, rRegN src2, rFlagsReg cr, cmpOp cop)
 9373 %{
 9374   predicate(UseAPX);
 9375   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9376 
 9377   ins_cost(200);
 9378   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, compressed ptr ndd" %}
 9379   ins_encode %{
 9380     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9381   %}
 9382   ins_pipe(pipe_cmov_reg);
 9383 %}
 9384 
 9385 // Conditional move
 9386 instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
 9387 %{
 9388   predicate(!UseAPX);
 9389   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9390 
 9391   ins_cost(200); // XXX
 9392   format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
 9393   ins_encode %{
 9394     __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9395   %}
 9396   ins_pipe(pipe_cmov_reg);
 9397 %}
 9398 
 9399 instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9400   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9401 
 9402   ins_cost(200);
 9403   expand %{
 9404     cmovN_regU(cop, cr, dst, src);
 9405   %}
 9406 %}
 9407 
 9408 // Conditional move ndd
 9409 instruct cmovN_regU_ndd(rRegN dst, cmpOpU cop, rFlagsRegU cr, rRegN src1, rRegN src2)
 9410 %{
 9411   predicate(UseAPX);
 9412   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9413 
 9414   ins_cost(200);
 9415   format %{ "ecmovl$cop $dst, $src1, $src2\t# unsigned, compressed ptr ndd" %}
 9416   ins_encode %{
 9417     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9418   %}
 9419   ins_pipe(pipe_cmov_reg);
 9420 %}
 9421 
 9422 instruct cmovN_regUCFE_ndd(rRegN dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegN src1, rRegN src2) %{
 9423   match(Set dst (CMoveN (Binary cop cr) (Binary src1 src2)));
 9424 
 9425   ins_cost(200);
 9426   format %{ "ecmovl$cop $dst, $src1, $src2\t# signed, unsigned, compressed ptr ndd" %}
 9427   ins_encode %{
 9428     __ ecmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9429   %}
 9430   ins_pipe(pipe_cmov_reg);
 9431 %}
 9432 
 9433 instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9434   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9435   match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
 9436 
 9437   ins_cost(200); // XXX
 9438   format %{ "cmovpl  $dst, $src\n\t"
 9439             "cmovnel $dst, $src" %}
 9440   ins_encode %{
 9441     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9442     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9443   %}
 9444   ins_pipe(pipe_cmov_reg);
 9445 %}
 9446 
 9447 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9448 // inputs of the CMove
 9449 instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
 9450   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9451   match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));
 9452 
 9453   ins_cost(200); // XXX
 9454   format %{ "cmovpl  $dst, $src\n\t"
 9455             "cmovnel $dst, $src" %}
 9456   ins_encode %{
 9457     __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
 9458     __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
 9459   %}
 9460   ins_pipe(pipe_cmov_reg);
 9461 %}
 9462 
 9463 // Conditional move
 9464 instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
 9465 %{
 9466   predicate(!UseAPX);
 9467   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9468 
 9469   ins_cost(200); // XXX
 9470   format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
 9471   ins_encode %{
 9472     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9473   %}
 9474   ins_pipe(pipe_cmov_reg);  // XXX
 9475 %}
 9476 
 9477 // Conditional move ndd
 9478 instruct cmovP_reg_ndd(rRegP dst, rRegP src1, rRegP src2, rFlagsReg cr, cmpOp cop)
 9479 %{
 9480   predicate(UseAPX);
 9481   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9482 
 9483   ins_cost(200);
 9484   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, ptr ndd" %}
 9485   ins_encode %{
 9486     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9487   %}
 9488   ins_pipe(pipe_cmov_reg);
 9489 %}
 9490 
 9491 // Conditional move
 9492 instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
 9493 %{
 9494   predicate(!UseAPX);
 9495   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9496 
 9497   ins_cost(200); // XXX
 9498   format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
 9499   ins_encode %{
 9500     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9501   %}
 9502   ins_pipe(pipe_cmov_reg); // XXX
 9503 %}
 9504 
 9505 // Conditional move ndd
 9506 instruct cmovP_regU_ndd(rRegP dst, cmpOpU cop, rFlagsRegU cr, rRegP src1, rRegP src2)
 9507 %{
 9508   predicate(UseAPX);
 9509   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9510 
 9511   ins_cost(200);
 9512   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, ptr ndd" %}
 9513   ins_encode %{
 9514     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9515   %}
 9516   ins_pipe(pipe_cmov_reg);
 9517 %}
 9518 
 9519 instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9520   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9521 
 9522   ins_cost(200);
 9523   expand %{
 9524     cmovP_regU(cop, cr, dst, src);
 9525   %}
 9526 %}
 9527 
 9528 instruct cmovP_regUCFE_ndd(rRegP dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegP src1, rRegP src2) %{
 9529   match(Set dst (CMoveP (Binary cop cr) (Binary src1 src2)));
 9530 
 9531   ins_cost(200);
 9532   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, ptr ndd" %}
 9533   ins_encode %{
 9534     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9535   %}
 9536   ins_pipe(pipe_cmov_reg);
 9537 %}
 9538 
 9539 instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9540   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9541   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 9542 
 9543   ins_cost(200); // XXX
 9544   format %{ "cmovpq  $dst, $src\n\t"
 9545             "cmovneq $dst, $src" %}
 9546   ins_encode %{
 9547     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9548     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9549   %}
 9550   ins_pipe(pipe_cmov_reg);
 9551 %}
 9552 
 9553 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9554 // inputs of the CMove
 9555 instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
 9556   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9557   match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));
 9558 
 9559   ins_cost(200); // XXX
 9560   format %{ "cmovpq  $dst, $src\n\t"
 9561             "cmovneq $dst, $src" %}
 9562   ins_encode %{
 9563     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9564     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9565   %}
 9566   ins_pipe(pipe_cmov_reg);
 9567 %}
 9568 
 9569 instruct cmovL_imm_01(rRegL dst, immL1 src, rFlagsReg cr, cmpOp cop)
 9570 %{
 9571   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9572   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9573 
 9574   ins_cost(100); // XXX
 9575   format %{ "setbn$cop $dst\t# signed, long" %}
 9576   ins_encode %{
 9577     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9578     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9579   %}
 9580   ins_pipe(ialu_reg);
 9581 %}
 9582 
 9583 instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
 9584 %{
 9585   predicate(!UseAPX);
 9586   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9587 
 9588   ins_cost(200); // XXX
 9589   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9590   ins_encode %{
 9591     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9592   %}
 9593   ins_pipe(pipe_cmov_reg);  // XXX
 9594 %}
 9595 
 9596 instruct cmovL_reg_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, rRegL src2)
 9597 %{
 9598   predicate(UseAPX);
 9599   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9600 
 9601   ins_cost(200);
 9602   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9603   ins_encode %{
 9604     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9605   %}
 9606   ins_pipe(pipe_cmov_reg);
 9607 %}
 9608 
 9609 instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
 9610 %{
 9611   predicate(!UseAPX);
 9612   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9613 
 9614   ins_cost(200); // XXX
 9615   format %{ "cmovq$cop $dst, $src\t# signed, long" %}
 9616   ins_encode %{
 9617     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9618   %}
 9619   ins_pipe(pipe_cmov_mem);  // XXX
 9620 %}
 9621 
 9622 instruct cmovL_rReg_rReg_mem_ndd(rRegL dst, cmpOp cop, rFlagsReg cr, rRegL src1, memory src2)
 9623 %{
 9624   predicate(UseAPX);
 9625   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9626 
 9627   ins_cost(200);
 9628   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, long ndd" %}
 9629   ins_encode %{
 9630     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9631   %}
 9632   ins_pipe(pipe_cmov_mem);
 9633 %}
 9634 
 9635 instruct cmovL_imm_01U(rRegL dst, immL1 src, rFlagsRegU cr, cmpOpU cop)
 9636 %{
 9637   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9638   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9639 
 9640   ins_cost(100); // XXX
 9641   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9642   ins_encode %{
 9643     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9644     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9645   %}
 9646   ins_pipe(ialu_reg);
 9647 %}
 9648 
 9649 instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
 9650 %{
 9651   predicate(!UseAPX);
 9652   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9653 
 9654   ins_cost(200); // XXX
 9655   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9656   ins_encode %{
 9657     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
 9658   %}
 9659   ins_pipe(pipe_cmov_reg); // XXX
 9660 %}
 9661 
 9662 instruct cmovL_regU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, rRegL src2)
 9663 %{
 9664   predicate(UseAPX);
 9665   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9666 
 9667   ins_cost(200);
 9668   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9669   ins_encode %{
 9670     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9671   %}
 9672   ins_pipe(pipe_cmov_reg);
 9673 %}
 9674 
 9675 instruct cmovL_imm_01UCF(rRegL dst, immL1 src, rFlagsRegUCF cr, cmpOpUCF cop)
 9676 %{
 9677   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9678   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9679 
 9680   ins_cost(100); // XXX
 9681   format %{ "setbn$cop $dst\t# unsigned, long" %}
 9682   ins_encode %{
 9683     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9684     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9685   %}
 9686   ins_pipe(ialu_reg);
 9687 %}
 9688 
 9689 instruct cmovL_imm_01UCFE(rRegL dst, immL1 src, rFlagsRegUCFE cr, cmpOpUCFE cop)
 9690 %{
 9691   predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
 9692   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9693 
 9694   ins_cost(100); // XXX
 9695   format %{ "setbn$cop $dst\t# signed, unsigned, long" %}
 9696   ins_encode %{
 9697     Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
 9698     __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
 9699   %}
 9700   ins_pipe(ialu_reg);
 9701 %}
 9702 
 9703 instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9704   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9705 
 9706   ins_cost(200);
 9707   expand %{
 9708     cmovL_regU(cop, cr, dst, src);
 9709   %}
 9710 %}
 9711 
 9712 instruct cmovL_regUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, rRegL src2)
 9713 %{
 9714   match(Set dst (CMoveL (Binary cop cr) (Binary src1 src2)));
 9715 
 9716   ins_cost(200);
 9717   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9718   ins_encode %{
 9719     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Register);
 9720   %}
 9721   ins_pipe(pipe_cmov_reg);
 9722 %}
 9723 
 9724 instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9725   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
 9726   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 9727 
 9728   ins_cost(200); // XXX
 9729   format %{ "cmovpq  $dst, $src\n\t"
 9730             "cmovneq $dst, $src" %}
 9731   ins_encode %{
 9732     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9733     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9734   %}
 9735   ins_pipe(pipe_cmov_reg);
 9736 %}
 9737 
 9738 // Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
 9739 // inputs of the CMove
 9740 instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
 9741   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
 9742   match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));
 9743 
 9744   ins_cost(200); // XXX
 9745   format %{ "cmovpq  $dst, $src\n\t"
 9746             "cmovneq $dst, $src" %}
 9747   ins_encode %{
 9748     __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
 9749     __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
 9750   %}
 9751   ins_pipe(pipe_cmov_reg);
 9752 %}
 9753 
 9754 instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
 9755 %{
 9756   predicate(!UseAPX);
 9757   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9758 
 9759   ins_cost(200); // XXX
 9760   format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
 9761   ins_encode %{
 9762     __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
 9763   %}
 9764   ins_pipe(pipe_cmov_mem); // XXX
 9765 %}
 9766 
 9767 instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
 9768   match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
 9769 
 9770   ins_cost(200);
 9771   expand %{
 9772     cmovL_memU(cop, cr, dst, src);
 9773   %}
 9774 %}
 9775 
 9776 instruct cmovL_rReg_rReg_memU_ndd(rRegL dst, cmpOpU cop, rFlagsRegU cr, rRegL src1, memory src2)
 9777 %{
 9778   predicate(UseAPX);
 9779   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9780 
 9781   ins_cost(200);
 9782   format %{ "ecmovq$cop $dst, $src1, $src2\t# unsigned, long ndd" %}
 9783   ins_encode %{
 9784     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9785   %}
 9786   ins_pipe(pipe_cmov_mem);
 9787 %}
 9788 
 9789 instruct cmovL_rReg_rReg_memUCFE_ndd(rRegL dst, cmpOpUCFE cop, rFlagsRegUCFE cr, rRegL src1, memory src2)
 9790 %{
 9791   match(Set dst (CMoveL (Binary cop cr) (Binary src1 (LoadL src2))));
 9792 
 9793   ins_cost(200);
 9794   format %{ "ecmovq$cop $dst, $src1, $src2\t# signed, unsigned, long ndd" %}
 9795   ins_encode %{
 9796     __ ecmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src1$$Register, $src2$$Address);
 9797   %}
 9798   ins_pipe(pipe_cmov_mem);
 9799 %}
 9800 
 9801 instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
 9802 %{
 9803   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9804 
 9805   ins_cost(200); // XXX
 9806   format %{ "jn$cop    skip\t# signed cmove float\n\t"
 9807             "movss     $dst, $src\n"
 9808     "skip:" %}
 9809   ins_encode %{
 9810     Label Lskip;
 9811     // Invert sense of branch from sense of CMOV
 9812     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9813     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9814     __ bind(Lskip);
 9815   %}
 9816   ins_pipe(pipe_slow);
 9817 %}
 9818 
 9819 instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
 9820 %{
 9821   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9822 
 9823   ins_cost(200); // XXX
 9824   format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
 9825             "movss     $dst, $src\n"
 9826     "skip:" %}
 9827   ins_encode %{
 9828     Label Lskip;
 9829     // Invert sense of branch from sense of CMOV
 9830     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9831     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9832     __ bind(Lskip);
 9833   %}
 9834   ins_pipe(pipe_slow);
 9835 %}
 9836 
 9837 instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
 9838   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9839 
 9840   ins_cost(200);
 9841   expand %{
 9842     cmovF_regU(cop, cr, dst, src);
 9843   %}
 9844 %}
 9845 
 9846 instruct cmovF_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regF dst, regF src)
 9847 %{
 9848   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 9849 
 9850   ins_cost(200); // XXX
 9851   format %{ "jn$cop    skip\t# signed, unsigned cmove float\n\t"
 9852             "movss     $dst, $src\n"
 9853     "skip:" %}
 9854   ins_encode %{
 9855     Label Lskip;
 9856     // Invert sense of branch from sense of CMOV
 9857     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9858     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 9859     __ bind(Lskip);
 9860   %}
 9861   ins_pipe(pipe_slow);
 9862 %}
 9863 
 9864 instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
 9865 %{
 9866   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9867 
 9868   ins_cost(200); // XXX
 9869   format %{ "jn$cop    skip\t# signed cmove double\n\t"
 9870             "movsd     $dst, $src\n"
 9871     "skip:" %}
 9872   ins_encode %{
 9873     Label Lskip;
 9874     // Invert sense of branch from sense of CMOV
 9875     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9876     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9877     __ bind(Lskip);
 9878   %}
 9879   ins_pipe(pipe_slow);
 9880 %}
 9881 
 9882 instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
 9883 %{
 9884   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9885 
 9886   ins_cost(200); // XXX
 9887   format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
 9888             "movsd     $dst, $src\n"
 9889     "skip:" %}
 9890   ins_encode %{
 9891     Label Lskip;
 9892     // Invert sense of branch from sense of CMOV
 9893     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9894     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9895     __ bind(Lskip);
 9896   %}
 9897   ins_pipe(pipe_slow);
 9898 %}
 9899 
 9900 instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
 9901   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9902 
 9903   ins_cost(200);
 9904   expand %{
 9905     cmovD_regU(cop, cr, dst, src);
 9906   %}
 9907 %}
 9908 
 9909 instruct cmovD_regUCFE(cmpOpUCFE cop, rFlagsRegUCFE cr, regD dst, regD src)
 9910 %{
 9911   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 9912 
 9913   ins_cost(200); // XXX
 9914   format %{ "jn$cop    skip\t# signed, unsigned cmove double\n\t"
 9915             "movsd     $dst, $src\n"
 9916     "skip:" %}
 9917   ins_encode %{
 9918     Label Lskip;
 9919     // Invert sense of branch from sense of CMOV
 9920     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 9921     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 9922     __ bind(Lskip);
 9923   %}
 9924   ins_pipe(pipe_slow);
 9925 %}
 9926 
 9927 //----------Arithmetic Instructions--------------------------------------------
 9928 //----------Addition Instructions----------------------------------------------
 9929 
 9930 instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
 9931 %{
 9932   predicate(!UseAPX);
 9933   match(Set dst (AddI dst src));
 9934   effect(KILL cr);
 9935   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9936   format %{ "addl    $dst, $src\t# int" %}
 9937   ins_encode %{
 9938     __ addl($dst$$Register, $src$$Register);
 9939   %}
 9940   ins_pipe(ialu_reg_reg);
 9941 %}
 9942 
 9943 instruct addI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
 9944 %{
 9945   predicate(UseAPX);
 9946   match(Set dst (AddI src1 src2));
 9947   effect(KILL cr);
 9948   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
 9949 
 9950   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9951   ins_encode %{
 9952     __ eaddl($dst$$Register, $src1$$Register, $src2$$Register, false);
 9953   %}
 9954   ins_pipe(ialu_reg_reg);
 9955 %}
 9956 
 9957 instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
 9958 %{
 9959   predicate(!UseAPX);
 9960   match(Set dst (AddI dst src));
 9961   effect(KILL cr);
 9962   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9963 
 9964   format %{ "addl    $dst, $src\t# int" %}
 9965   ins_encode %{
 9966     __ addl($dst$$Register, $src$$constant);
 9967   %}
 9968   ins_pipe( ialu_reg );
 9969 %}
 9970 
 9971 instruct addI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
 9972 %{
 9973   predicate(UseAPX);
 9974   match(Set dst (AddI src1 src2));
 9975   effect(KILL cr);
 9976   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
 9977 
 9978   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9979   ins_encode %{
 9980     __ eaddl($dst$$Register, $src1$$Register, $src2$$constant, false);
 9981   %}
 9982   ins_pipe( ialu_reg );
 9983 %}
 9984 
 9985 instruct addI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
 9986 %{
 9987   predicate(UseAPX);
 9988   match(Set dst (AddI (LoadI src1) src2));
 9989   effect(KILL cr);
 9990   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
 9991 
 9992   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
 9993   ins_encode %{
 9994     __ eaddl($dst$$Register, $src1$$Address, $src2$$constant, false);
 9995   %}
 9996   ins_pipe( ialu_reg );
 9997 %}
 9998 
 9999 instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
10000 %{
10001   predicate(!UseAPX);
10002   match(Set dst (AddI dst (LoadI src)));
10003   effect(KILL cr);
10004   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10005 
10006   ins_cost(150); // XXX
10007   format %{ "addl    $dst, $src\t# int" %}
10008   ins_encode %{
10009     __ addl($dst$$Register, $src$$Address);
10010   %}
10011   ins_pipe(ialu_reg_mem);
10012 %}
10013 
10014 instruct addI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
10015 %{
10016   predicate(UseAPX);
10017   match(Set dst (AddI src1 (LoadI src2)));
10018   effect(KILL cr);
10019   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10020 
10021   ins_cost(150);
10022   format %{ "eaddl    $dst, $src1, $src2\t# int ndd" %}
10023   ins_encode %{
10024     __ eaddl($dst$$Register, $src1$$Register, $src2$$Address, false);
10025   %}
10026   ins_pipe(ialu_reg_mem);
10027 %}
10028 
10029 instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
10030 %{
10031   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10032   effect(KILL cr);
10033   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10034 
10035   ins_cost(150); // XXX
10036   format %{ "addl    $dst, $src\t# int" %}
10037   ins_encode %{
10038     __ addl($dst$$Address, $src$$Register);
10039   %}
10040   ins_pipe(ialu_mem_reg);
10041 %}
10042 
10043 instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
10044 %{
10045   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10046   effect(KILL cr);
10047   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10048 
10049 
10050   ins_cost(125); // XXX
10051   format %{ "addl    $dst, $src\t# int" %}
10052   ins_encode %{
10053     __ addl($dst$$Address, $src$$constant);
10054   %}
10055   ins_pipe(ialu_mem_imm);
10056 %}
10057 
10058 instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
10059 %{
10060   predicate(!UseAPX && UseIncDec);
10061   match(Set dst (AddI dst src));
10062   effect(KILL cr);
10063 
10064   format %{ "incl    $dst\t# int" %}
10065   ins_encode %{
10066     __ incrementl($dst$$Register);
10067   %}
10068   ins_pipe(ialu_reg);
10069 %}
10070 
10071 instruct incI_rReg_ndd(rRegI dst, rRegI src, immI_1 val, rFlagsReg cr)
10072 %{
10073   predicate(UseAPX && UseIncDec);
10074   match(Set dst (AddI src val));
10075   effect(KILL cr);
10076   flag(PD::Flag_ndd_demotable_opr1);
10077 
10078   format %{ "eincl    $dst, $src\t# int ndd" %}
10079   ins_encode %{
10080     __ eincl($dst$$Register, $src$$Register, false);
10081   %}
10082   ins_pipe(ialu_reg);
10083 %}
10084 
10085 instruct incI_rReg_mem_ndd(rRegI dst, memory src, immI_1 val, rFlagsReg cr)
10086 %{
10087   predicate(UseAPX && UseIncDec);
10088   match(Set dst (AddI (LoadI src) val));
10089   effect(KILL cr);
10090 
10091   format %{ "eincl    $dst, $src\t# int ndd" %}
10092   ins_encode %{
10093     __ eincl($dst$$Register, $src$$Address, false);
10094   %}
10095   ins_pipe(ialu_reg);
10096 %}
10097 
10098 instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
10099 %{
10100   predicate(UseIncDec);
10101   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10102   effect(KILL cr);
10103 
10104   ins_cost(125); // XXX
10105   format %{ "incl    $dst\t# int" %}
10106   ins_encode %{
10107     __ incrementl($dst$$Address);
10108   %}
10109   ins_pipe(ialu_mem_imm);
10110 %}
10111 
10112 // XXX why does that use AddI
10113 instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
10114 %{
10115   predicate(!UseAPX && UseIncDec);
10116   match(Set dst (AddI dst src));
10117   effect(KILL cr);
10118 
10119   format %{ "decl    $dst\t# int" %}
10120   ins_encode %{
10121     __ decrementl($dst$$Register);
10122   %}
10123   ins_pipe(ialu_reg);
10124 %}
10125 
10126 instruct decI_rReg_ndd(rRegI dst, rRegI src, immI_M1 val, rFlagsReg cr)
10127 %{
10128   predicate(UseAPX && UseIncDec);
10129   match(Set dst (AddI src val));
10130   effect(KILL cr);
10131   flag(PD::Flag_ndd_demotable_opr1);
10132 
10133   format %{ "edecl    $dst, $src\t# int ndd" %}
10134   ins_encode %{
10135     __ edecl($dst$$Register, $src$$Register, false);
10136   %}
10137   ins_pipe(ialu_reg);
10138 %}
10139 
10140 instruct decI_rReg_mem_ndd(rRegI dst, memory src, immI_M1 val, rFlagsReg cr)
10141 %{
10142   predicate(UseAPX && UseIncDec);
10143   match(Set dst (AddI (LoadI src) val));
10144   effect(KILL cr);
10145 
10146   format %{ "edecl    $dst, $src\t# int ndd" %}
10147   ins_encode %{
10148     __ edecl($dst$$Register, $src$$Address, false);
10149   %}
10150   ins_pipe(ialu_reg);
10151 %}
10152 
10153 // XXX why does that use AddI
10154 instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
10155 %{
10156   predicate(UseIncDec);
10157   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
10158   effect(KILL cr);
10159 
10160   ins_cost(125); // XXX
10161   format %{ "decl    $dst\t# int" %}
10162   ins_encode %{
10163     __ decrementl($dst$$Address);
10164   %}
10165   ins_pipe(ialu_mem_imm);
10166 %}
10167 
10168 instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
10169 %{
10170   predicate(VM_Version::supports_fast_2op_lea());
10171   match(Set dst (AddI (LShiftI index scale) disp));
10172 
10173   format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
10174   ins_encode %{
10175     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10176     __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10177   %}
10178   ins_pipe(ialu_reg_reg);
10179 %}
10180 
10181 instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
10182 %{
10183   predicate(VM_Version::supports_fast_3op_lea());
10184   match(Set dst (AddI (AddI base index) disp));
10185 
10186   format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
10187   ins_encode %{
10188     __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10189   %}
10190   ins_pipe(ialu_reg_reg);
10191 %}
10192 
10193 instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
10194 %{
10195   predicate(VM_Version::supports_fast_2op_lea());
10196   match(Set dst (AddI base (LShiftI index scale)));
10197 
10198   format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
10199   ins_encode %{
10200     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10201     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
10202   %}
10203   ins_pipe(ialu_reg_reg);
10204 %}
10205 
10206 instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
10207 %{
10208   predicate(VM_Version::supports_fast_3op_lea());
10209   match(Set dst (AddI (AddI base (LShiftI index scale)) disp));
10210 
10211   format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
10212   ins_encode %{
10213     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10214     __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10215   %}
10216   ins_pipe(ialu_reg_reg);
10217 %}
10218 
10219 instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
10220 %{
10221   predicate(!UseAPX);
10222   match(Set dst (AddL dst src));
10223   effect(KILL cr);
10224   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10225 
10226   format %{ "addq    $dst, $src\t# long" %}
10227   ins_encode %{
10228     __ addq($dst$$Register, $src$$Register);
10229   %}
10230   ins_pipe(ialu_reg_reg);
10231 %}
10232 
10233 instruct addL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
10234 %{
10235   predicate(UseAPX);
10236   match(Set dst (AddL src1 src2));
10237   effect(KILL cr);
10238   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10239 
10240   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10241   ins_encode %{
10242     __ eaddq($dst$$Register, $src1$$Register, $src2$$Register, false);
10243   %}
10244   ins_pipe(ialu_reg_reg);
10245 %}
10246 
10247 instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
10248 %{
10249   predicate(!UseAPX);
10250   match(Set dst (AddL dst src));
10251   effect(KILL cr);
10252   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10253 
10254   format %{ "addq    $dst, $src\t# long" %}
10255   ins_encode %{
10256     __ addq($dst$$Register, $src$$constant);
10257   %}
10258   ins_pipe( ialu_reg );
10259 %}
10260 
10261 instruct addL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
10262 %{
10263   predicate(UseAPX);
10264   match(Set dst (AddL src1 src2));
10265   effect(KILL cr);
10266   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
10267 
10268   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10269   ins_encode %{
10270     __ eaddq($dst$$Register, $src1$$Register, $src2$$constant, false);
10271   %}
10272   ins_pipe( ialu_reg );
10273 %}
10274 
10275 instruct addL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
10276 %{
10277   predicate(UseAPX);
10278   match(Set dst (AddL (LoadL src1) src2));
10279   effect(KILL cr);
10280   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10281 
10282   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10283   ins_encode %{
10284     __ eaddq($dst$$Register, $src1$$Address, $src2$$constant, false);
10285   %}
10286   ins_pipe( ialu_reg );
10287 %}
10288 
10289 instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
10290 %{
10291   predicate(!UseAPX);
10292   match(Set dst (AddL dst (LoadL src)));
10293   effect(KILL cr);
10294   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10295 
10296   ins_cost(150); // XXX
10297   format %{ "addq    $dst, $src\t# long" %}
10298   ins_encode %{
10299     __ addq($dst$$Register, $src$$Address);
10300   %}
10301   ins_pipe(ialu_reg_mem);
10302 %}
10303 
10304 instruct addL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
10305 %{
10306   predicate(UseAPX);
10307   match(Set dst (AddL src1 (LoadL src2)));
10308   effect(KILL cr);
10309   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
10310 
10311   ins_cost(150);
10312   format %{ "eaddq    $dst, $src1, $src2\t# long ndd" %}
10313   ins_encode %{
10314     __ eaddq($dst$$Register, $src1$$Register, $src2$$Address, false);
10315   %}
10316   ins_pipe(ialu_reg_mem);
10317 %}
10318 
10319 instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
10320 %{
10321   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10322   effect(KILL cr);
10323   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10324 
10325   ins_cost(150); // XXX
10326   format %{ "addq    $dst, $src\t# long" %}
10327   ins_encode %{
10328     __ addq($dst$$Address, $src$$Register);
10329   %}
10330   ins_pipe(ialu_mem_reg);
10331 %}
10332 
10333 instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
10334 %{
10335   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10336   effect(KILL cr);
10337   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10338 
10339   ins_cost(125); // XXX
10340   format %{ "addq    $dst, $src\t# long" %}
10341   ins_encode %{
10342     __ addq($dst$$Address, $src$$constant);
10343   %}
10344   ins_pipe(ialu_mem_imm);
10345 %}
10346 
10347 instruct incL_rReg(rRegL dst, immL1 src, rFlagsReg cr)
10348 %{
10349   predicate(!UseAPX && UseIncDec);
10350   match(Set dst (AddL dst src));
10351   effect(KILL cr);
10352 
10353   format %{ "incq    $dst\t# long" %}
10354   ins_encode %{
10355     __ incrementq($dst$$Register);
10356   %}
10357   ins_pipe(ialu_reg);
10358 %}
10359 
10360 instruct incL_rReg_ndd(rRegL dst, rRegI src, immL1 val, rFlagsReg cr)
10361 %{
10362   predicate(UseAPX && UseIncDec);
10363   match(Set dst (AddL src val));
10364   effect(KILL cr);
10365   flag(PD::Flag_ndd_demotable_opr1);
10366 
10367   format %{ "eincq    $dst, $src\t# long ndd" %}
10368   ins_encode %{
10369     __ eincq($dst$$Register, $src$$Register, false);
10370   %}
10371   ins_pipe(ialu_reg);
10372 %}
10373 
10374 instruct incL_rReg_mem_ndd(rRegL dst, memory src, immL1 val, rFlagsReg cr)
10375 %{
10376   predicate(UseAPX && UseIncDec);
10377   match(Set dst (AddL (LoadL src) val));
10378   effect(KILL cr);
10379 
10380   format %{ "eincq    $dst, $src\t# long ndd" %}
10381   ins_encode %{
10382     __ eincq($dst$$Register, $src$$Address, false);
10383   %}
10384   ins_pipe(ialu_reg);
10385 %}
10386 
10387 instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
10388 %{
10389   predicate(UseIncDec);
10390   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10391   effect(KILL cr);
10392 
10393   ins_cost(125); // XXX
10394   format %{ "incq    $dst\t# long" %}
10395   ins_encode %{
10396     __ incrementq($dst$$Address);
10397   %}
10398   ins_pipe(ialu_mem_imm);
10399 %}
10400 
10401 // XXX why does that use AddL
10402 instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
10403 %{
10404   predicate(!UseAPX && UseIncDec);
10405   match(Set dst (AddL dst src));
10406   effect(KILL cr);
10407 
10408   format %{ "decq    $dst\t# long" %}
10409   ins_encode %{
10410     __ decrementq($dst$$Register);
10411   %}
10412   ins_pipe(ialu_reg);
10413 %}
10414 
10415 instruct decL_rReg_ndd(rRegL dst, rRegL src, immL_M1 val, rFlagsReg cr)
10416 %{
10417   predicate(UseAPX && UseIncDec);
10418   match(Set dst (AddL src val));
10419   effect(KILL cr);
10420   flag(PD::Flag_ndd_demotable_opr1);
10421 
10422   format %{ "edecq    $dst, $src\t# long ndd" %}
10423   ins_encode %{
10424     __ edecq($dst$$Register, $src$$Register, false);
10425   %}
10426   ins_pipe(ialu_reg);
10427 %}
10428 
10429 instruct decL_rReg_mem_ndd(rRegL dst, memory src, immL_M1 val, rFlagsReg cr)
10430 %{
10431   predicate(UseAPX && UseIncDec);
10432   match(Set dst (AddL (LoadL src) val));
10433   effect(KILL cr);
10434 
10435   format %{ "edecq    $dst, $src\t# long ndd" %}
10436   ins_encode %{
10437     __ edecq($dst$$Register, $src$$Address, false);
10438   %}
10439   ins_pipe(ialu_reg);
10440 %}
10441 
10442 // XXX why does that use AddL
10443 instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
10444 %{
10445   predicate(UseIncDec);
10446   match(Set dst (StoreL dst (AddL (LoadL dst) src)));
10447   effect(KILL cr);
10448 
10449   ins_cost(125); // XXX
10450   format %{ "decq    $dst\t# long" %}
10451   ins_encode %{
10452     __ decrementq($dst$$Address);
10453   %}
10454   ins_pipe(ialu_mem_imm);
10455 %}
10456 
10457 instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
10458 %{
10459   predicate(VM_Version::supports_fast_2op_lea());
10460   match(Set dst (AddL (LShiftL index scale) disp));
10461 
10462   format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
10463   ins_encode %{
10464     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10465     __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
10466   %}
10467   ins_pipe(ialu_reg_reg);
10468 %}
10469 
10470 instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
10471 %{
10472   predicate(VM_Version::supports_fast_3op_lea());
10473   match(Set dst (AddL (AddL base index) disp));
10474 
10475   format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
10476   ins_encode %{
10477     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
10478   %}
10479   ins_pipe(ialu_reg_reg);
10480 %}
10481 
10482 instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
10483 %{
10484   predicate(VM_Version::supports_fast_2op_lea());
10485   match(Set dst (AddL base (LShiftL index scale)));
10486 
10487   format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
10488   ins_encode %{
10489     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10490     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
10491   %}
10492   ins_pipe(ialu_reg_reg);
10493 %}
10494 
10495 instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
10496 %{
10497   predicate(VM_Version::supports_fast_3op_lea());
10498   match(Set dst (AddL (AddL base (LShiftL index scale)) disp));
10499 
10500   format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
10501   ins_encode %{
10502     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
10503     __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
10504   %}
10505   ins_pipe(ialu_reg_reg);
10506 %}
10507 
10508 instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
10509 %{
10510   match(Set dst (AddP dst src));
10511   effect(KILL cr);
10512   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10513 
10514   format %{ "addq    $dst, $src\t# ptr" %}
10515   ins_encode %{
10516     __ addq($dst$$Register, $src$$Register);
10517   %}
10518   ins_pipe(ialu_reg_reg);
10519 %}
10520 
10521 instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
10522 %{
10523   match(Set dst (AddP dst src));
10524   effect(KILL cr);
10525   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
10526 
10527   format %{ "addq    $dst, $src\t# ptr" %}
10528   ins_encode %{
10529     __ addq($dst$$Register, $src$$constant);
10530   %}
10531   ins_pipe( ialu_reg );
10532 %}
10533 
10534 // XXX addP mem ops ????
10535 
10536 instruct checkCastPP(rRegP dst)
10537 %{
10538   match(Set dst (CheckCastPP dst));
10539 
10540   size(0);
10541   format %{ "# checkcastPP of $dst" %}
10542   ins_encode(/* empty encoding */);
10543   ins_pipe(empty);
10544 %}
10545 
10546 instruct castPP(rRegP dst)
10547 %{
10548   match(Set dst (CastPP dst));
10549 
10550   size(0);
10551   format %{ "# castPP of $dst" %}
10552   ins_encode(/* empty encoding */);
10553   ins_pipe(empty);
10554 %}
10555 
10556 instruct castII(rRegI dst)
10557 %{
10558   predicate(VerifyConstraintCasts == 0);
10559   match(Set dst (CastII dst));
10560 
10561   size(0);
10562   format %{ "# castII of $dst" %}
10563   ins_encode(/* empty encoding */);
10564   ins_cost(0);
10565   ins_pipe(empty);
10566 %}
10567 
10568 instruct castII_checked(rRegI dst, rFlagsReg cr)
10569 %{
10570   predicate(VerifyConstraintCasts > 0);
10571   match(Set dst (CastII dst));
10572 
10573   effect(KILL cr);
10574   format %{ "# cast_checked_II $dst" %}
10575   ins_encode %{
10576     __ verify_int_in_range(_idx, bottom_type()->is_int(), $dst$$Register);
10577   %}
10578   ins_pipe(pipe_slow);
10579 %}
10580 
10581 instruct castLL(rRegL dst)
10582 %{
10583   predicate(VerifyConstraintCasts == 0);
10584   match(Set dst (CastLL dst));
10585 
10586   size(0);
10587   format %{ "# castLL of $dst" %}
10588   ins_encode(/* empty encoding */);
10589   ins_cost(0);
10590   ins_pipe(empty);
10591 %}
10592 
10593 instruct castLL_checked_L32(rRegL dst, rFlagsReg cr)
10594 %{
10595   predicate(VerifyConstraintCasts > 0 && castLL_is_imm32(n));
10596   match(Set dst (CastLL dst));
10597 
10598   effect(KILL cr);
10599   format %{ "# cast_checked_LL $dst" %}
10600   ins_encode %{
10601     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, noreg);
10602   %}
10603   ins_pipe(pipe_slow);
10604 %}
10605 
10606 instruct castLL_checked(rRegL dst, rRegL tmp, rFlagsReg cr)
10607 %{
10608   predicate(VerifyConstraintCasts > 0 && !castLL_is_imm32(n));
10609   match(Set dst (CastLL dst));
10610 
10611   effect(KILL cr, TEMP tmp);
10612   format %{ "# cast_checked_LL $dst\tusing $tmp as TEMP" %}
10613   ins_encode %{
10614     __ verify_long_in_range(_idx, bottom_type()->is_long(), $dst$$Register, $tmp$$Register);
10615   %}
10616   ins_pipe(pipe_slow);
10617 %}
10618 
10619 instruct castFF(regF dst)
10620 %{
10621   match(Set dst (CastFF dst));
10622 
10623   size(0);
10624   format %{ "# castFF of $dst" %}
10625   ins_encode(/* empty encoding */);
10626   ins_cost(0);
10627   ins_pipe(empty);
10628 %}
10629 
10630 instruct castHH(regF dst)
10631 %{
10632   match(Set dst (CastHH dst));
10633 
10634   size(0);
10635   format %{ "# castHH of $dst" %}
10636   ins_encode(/* empty encoding */);
10637   ins_cost(0);
10638   ins_pipe(empty);
10639 %}
10640 
10641 instruct castDD(regD dst)
10642 %{
10643   match(Set dst (CastDD dst));
10644 
10645   size(0);
10646   format %{ "# castDD of $dst" %}
10647   ins_encode(/* empty encoding */);
10648   ins_cost(0);
10649   ins_pipe(empty);
10650 %}
10651 
10652 // XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
10653 instruct compareAndSwapP(rRegI res,
10654                          memory mem_ptr,
10655                          rax_RegP oldval, rRegP newval,
10656                          rFlagsReg cr)
10657 %{
10658   predicate(n->as_LoadStore()->barrier_data() == 0);
10659   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
10660   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
10661   effect(KILL cr, KILL oldval);
10662 
10663   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10664             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10665             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10666   ins_encode %{
10667     __ lock();
10668     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10669     __ setcc(Assembler::equal, $res$$Register);
10670   %}
10671   ins_pipe( pipe_cmpxchg );
10672 %}
10673 
10674 instruct compareAndSwapL(rRegI res,
10675                          memory mem_ptr,
10676                          rax_RegL oldval, rRegL newval,
10677                          rFlagsReg cr)
10678 %{
10679   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
10680   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
10681   effect(KILL cr, KILL oldval);
10682 
10683   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10684             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10685             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10686   ins_encode %{
10687     __ lock();
10688     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10689     __ setcc(Assembler::equal, $res$$Register);
10690   %}
10691   ins_pipe( pipe_cmpxchg );
10692 %}
10693 
10694 instruct compareAndSwapI(rRegI res,
10695                          memory mem_ptr,
10696                          rax_RegI oldval, rRegI newval,
10697                          rFlagsReg cr)
10698 %{
10699   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
10700   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
10701   effect(KILL cr, KILL oldval);
10702 
10703   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10704             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10705             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10706   ins_encode %{
10707     __ lock();
10708     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10709     __ setcc(Assembler::equal, $res$$Register);
10710   %}
10711   ins_pipe( pipe_cmpxchg );
10712 %}
10713 
10714 instruct compareAndSwapB(rRegI res,
10715                          memory mem_ptr,
10716                          rax_RegI oldval, rRegI newval,
10717                          rFlagsReg cr)
10718 %{
10719   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
10720   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
10721   effect(KILL cr, KILL oldval);
10722 
10723   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10724             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10725             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10726   ins_encode %{
10727     __ lock();
10728     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10729     __ setcc(Assembler::equal, $res$$Register);
10730   %}
10731   ins_pipe( pipe_cmpxchg );
10732 %}
10733 
10734 instruct compareAndSwapS(rRegI res,
10735                          memory mem_ptr,
10736                          rax_RegI oldval, rRegI newval,
10737                          rFlagsReg cr)
10738 %{
10739   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
10740   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
10741   effect(KILL cr, KILL oldval);
10742 
10743   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10744             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10745             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10746   ins_encode %{
10747     __ lock();
10748     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10749     __ setcc(Assembler::equal, $res$$Register);
10750   %}
10751   ins_pipe( pipe_cmpxchg );
10752 %}
10753 
10754 instruct compareAndSwapN(rRegI res,
10755                           memory mem_ptr,
10756                           rax_RegN oldval, rRegN newval,
10757                           rFlagsReg cr) %{
10758   predicate(n->as_LoadStore()->barrier_data() == 0);
10759   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
10760   match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
10761   effect(KILL cr, KILL oldval);
10762 
10763   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10764             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
10765             "setcc $res \t# emits sete + movzbl or setzue for APX" %}
10766   ins_encode %{
10767     __ lock();
10768     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10769     __ setcc(Assembler::equal, $res$$Register);
10770   %}
10771   ins_pipe( pipe_cmpxchg );
10772 %}
10773 
10774 instruct compareAndExchangeB(
10775                          memory mem_ptr,
10776                          rax_RegI oldval, rRegI newval,
10777                          rFlagsReg cr)
10778 %{
10779   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
10780   effect(KILL cr);
10781 
10782   format %{ "cmpxchgb $mem_ptr,$newval\t# "
10783             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10784   ins_encode %{
10785     __ lock();
10786     __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
10787   %}
10788   ins_pipe( pipe_cmpxchg );
10789 %}
10790 
10791 instruct compareAndExchangeS(
10792                          memory mem_ptr,
10793                          rax_RegI oldval, rRegI newval,
10794                          rFlagsReg cr)
10795 %{
10796   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
10797   effect(KILL cr);
10798 
10799   format %{ "cmpxchgw $mem_ptr,$newval\t# "
10800             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10801   ins_encode %{
10802     __ lock();
10803     __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
10804   %}
10805   ins_pipe( pipe_cmpxchg );
10806 %}
10807 
10808 instruct compareAndExchangeI(
10809                          memory mem_ptr,
10810                          rax_RegI oldval, rRegI newval,
10811                          rFlagsReg cr)
10812 %{
10813   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
10814   effect(KILL cr);
10815 
10816   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10817             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10818   ins_encode %{
10819     __ lock();
10820     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10821   %}
10822   ins_pipe( pipe_cmpxchg );
10823 %}
10824 
10825 instruct compareAndExchangeL(
10826                          memory mem_ptr,
10827                          rax_RegL oldval, rRegL newval,
10828                          rFlagsReg cr)
10829 %{
10830   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
10831   effect(KILL cr);
10832 
10833   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10834             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
10835   ins_encode %{
10836     __ lock();
10837     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10838   %}
10839   ins_pipe( pipe_cmpxchg );
10840 %}
10841 
10842 instruct compareAndExchangeN(
10843                           memory mem_ptr,
10844                           rax_RegN oldval, rRegN newval,
10845                           rFlagsReg cr) %{
10846   predicate(n->as_LoadStore()->barrier_data() == 0);
10847   match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
10848   effect(KILL cr);
10849 
10850   format %{ "cmpxchgl $mem_ptr,$newval\t# "
10851             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10852   ins_encode %{
10853     __ lock();
10854     __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
10855   %}
10856   ins_pipe( pipe_cmpxchg );
10857 %}
10858 
10859 instruct compareAndExchangeP(
10860                          memory mem_ptr,
10861                          rax_RegP oldval, rRegP newval,
10862                          rFlagsReg cr)
10863 %{
10864   predicate(n->as_LoadStore()->barrier_data() == 0);
10865   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
10866   effect(KILL cr);
10867 
10868   format %{ "cmpxchgq $mem_ptr,$newval\t# "
10869             "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
10870   ins_encode %{
10871     __ lock();
10872     __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
10873   %}
10874   ins_pipe( pipe_cmpxchg );
10875 %}
10876 
10877 instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10878   predicate(n->as_LoadStore()->result_not_used());
10879   match(Set dummy (GetAndAddB mem add));
10880   effect(KILL cr);
10881   format %{ "addb_lock   $mem, $add" %}
10882   ins_encode %{
10883     __ lock();
10884     __ addb($mem$$Address, $add$$Register);
10885   %}
10886   ins_pipe(pipe_cmpxchg);
10887 %}
10888 
10889 instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10890   predicate(n->as_LoadStore()->result_not_used());
10891   match(Set dummy (GetAndAddB mem add));
10892   effect(KILL cr);
10893   format %{ "addb_lock   $mem, $add" %}
10894   ins_encode %{
10895     __ lock();
10896     __ addb($mem$$Address, $add$$constant);
10897   %}
10898   ins_pipe(pipe_cmpxchg);
10899 %}
10900 
10901 instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{
10902   predicate(!n->as_LoadStore()->result_not_used());
10903   match(Set newval (GetAndAddB mem newval));
10904   effect(KILL cr);
10905   format %{ "xaddb_lock  $mem, $newval\t# $newval -> byte" %}
10906   ins_encode %{
10907     __ lock();
10908     __ xaddb($mem$$Address, $newval$$Register);
10909     __ narrow_subword_type($newval$$Register, T_BYTE);
10910   %}
10911   ins_pipe(pipe_cmpxchg);
10912 %}
10913 
10914 instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10915   predicate(n->as_LoadStore()->result_not_used());
10916   match(Set dummy (GetAndAddS mem add));
10917   effect(KILL cr);
10918   format %{ "addw_lock   $mem, $add" %}
10919   ins_encode %{
10920     __ lock();
10921     __ addw($mem$$Address, $add$$Register);
10922   %}
10923   ins_pipe(pipe_cmpxchg);
10924 %}
10925 
10926 instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10927   predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used());
10928   match(Set dummy (GetAndAddS mem add));
10929   effect(KILL cr);
10930   format %{ "addw_lock   $mem, $add" %}
10931   ins_encode %{
10932     __ lock();
10933     __ addw($mem$$Address, $add$$constant);
10934   %}
10935   ins_pipe(pipe_cmpxchg);
10936 %}
10937 
10938 instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{
10939   predicate(!n->as_LoadStore()->result_not_used());
10940   match(Set newval (GetAndAddS mem newval));
10941   effect(KILL cr);
10942   format %{ "xaddw_lock  $mem, $newval\t# $newval -> short" %}
10943   ins_encode %{
10944     __ lock();
10945     __ xaddw($mem$$Address, $newval$$Register);
10946     __ narrow_subword_type($newval$$Register, T_SHORT);
10947   %}
10948   ins_pipe(pipe_cmpxchg);
10949 %}
10950 
10951 instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{
10952   predicate(n->as_LoadStore()->result_not_used());
10953   match(Set dummy (GetAndAddI mem add));
10954   effect(KILL cr);
10955   format %{ "addl_lock   $mem, $add" %}
10956   ins_encode %{
10957     __ lock();
10958     __ addl($mem$$Address, $add$$Register);
10959   %}
10960   ins_pipe(pipe_cmpxchg);
10961 %}
10962 
10963 instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{
10964   predicate(n->as_LoadStore()->result_not_used());
10965   match(Set dummy (GetAndAddI mem add));
10966   effect(KILL cr);
10967   format %{ "addl_lock   $mem, $add" %}
10968   ins_encode %{
10969     __ lock();
10970     __ addl($mem$$Address, $add$$constant);
10971   %}
10972   ins_pipe(pipe_cmpxchg);
10973 %}
10974 
10975 instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{
10976   predicate(!n->as_LoadStore()->result_not_used());
10977   match(Set newval (GetAndAddI mem newval));
10978   effect(KILL cr);
10979   format %{ "xaddl_lock  $mem, $newval" %}
10980   ins_encode %{
10981     __ lock();
10982     __ xaddl($mem$$Address, $newval$$Register);
10983   %}
10984   ins_pipe(pipe_cmpxchg);
10985 %}
10986 
10987 instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{
10988   predicate(n->as_LoadStore()->result_not_used());
10989   match(Set dummy (GetAndAddL mem add));
10990   effect(KILL cr);
10991   format %{ "addq_lock   $mem, $add" %}
10992   ins_encode %{
10993     __ lock();
10994     __ addq($mem$$Address, $add$$Register);
10995   %}
10996   ins_pipe(pipe_cmpxchg);
10997 %}
10998 
10999 instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
11000   predicate(n->as_LoadStore()->result_not_used());
11001   match(Set dummy (GetAndAddL mem add));
11002   effect(KILL cr);
11003   format %{ "addq_lock   $mem, $add" %}
11004   ins_encode %{
11005     __ lock();
11006     __ addq($mem$$Address, $add$$constant);
11007   %}
11008   ins_pipe(pipe_cmpxchg);
11009 %}
11010 
11011 instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{
11012   predicate(!n->as_LoadStore()->result_not_used());
11013   match(Set newval (GetAndAddL mem newval));
11014   effect(KILL cr);
11015   format %{ "xaddq_lock  $mem, $newval" %}
11016   ins_encode %{
11017     __ lock();
11018     __ xaddq($mem$$Address, $newval$$Register);
11019   %}
11020   ins_pipe(pipe_cmpxchg);
11021 %}
11022 
11023 instruct xchgB( memory mem, rRegI newval) %{
11024   match(Set newval (GetAndSetB mem newval));
11025   format %{ "XCHGB  $newval,[$mem]\t# $newval -> byte" %}
11026   ins_encode %{
11027     __ xchgb($newval$$Register, $mem$$Address);
11028     __ narrow_subword_type($newval$$Register, T_BYTE);
11029   %}
11030   ins_pipe( pipe_cmpxchg );
11031 %}
11032 
11033 instruct xchgS( memory mem, rRegI newval) %{
11034   match(Set newval (GetAndSetS mem newval));
11035   format %{ "XCHGW  $newval,[$mem]\t# $newval -> short" %}
11036   ins_encode %{
11037     __ xchgw($newval$$Register, $mem$$Address);
11038     __ narrow_subword_type($newval$$Register, T_SHORT);
11039   %}
11040   ins_pipe( pipe_cmpxchg );
11041 %}
11042 
11043 instruct xchgI( memory mem, rRegI newval) %{
11044   match(Set newval (GetAndSetI mem newval));
11045   format %{ "XCHGL  $newval,[$mem]" %}
11046   ins_encode %{
11047     __ xchgl($newval$$Register, $mem$$Address);
11048   %}
11049   ins_pipe( pipe_cmpxchg );
11050 %}
11051 
11052 instruct xchgL( memory mem, rRegL newval) %{
11053   match(Set newval (GetAndSetL mem newval));
11054   format %{ "XCHGL  $newval,[$mem]" %}
11055   ins_encode %{
11056     __ xchgq($newval$$Register, $mem$$Address);
11057   %}
11058   ins_pipe( pipe_cmpxchg );
11059 %}
11060 
11061 instruct xchgP( memory mem, rRegP newval) %{
11062   match(Set newval (GetAndSetP mem newval));
11063   predicate(n->as_LoadStore()->barrier_data() == 0);
11064   format %{ "XCHGQ  $newval,[$mem]" %}
11065   ins_encode %{
11066     __ xchgq($newval$$Register, $mem$$Address);
11067   %}
11068   ins_pipe( pipe_cmpxchg );
11069 %}
11070 
11071 instruct xchgN( memory mem, rRegN newval) %{
11072   predicate(n->as_LoadStore()->barrier_data() == 0);
11073   match(Set newval (GetAndSetN mem newval));
11074   format %{ "XCHGL  $newval,$mem]" %}
11075   ins_encode %{
11076     __ xchgl($newval$$Register, $mem$$Address);
11077   %}
11078   ins_pipe( pipe_cmpxchg );
11079 %}
11080 
11081 //----------Abs Instructions-------------------------------------------
11082 
11083 // Integer Absolute Instructions
11084 instruct absI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11085 %{
11086   match(Set dst (AbsI src));
11087   effect(TEMP dst, KILL cr);
11088   format %{ "xorl    $dst, $dst\t# abs int\n\t"
11089             "subl    $dst, $src\n\t"
11090             "cmovll  $dst, $src" %}
11091   ins_encode %{
11092     __ xorl($dst$$Register, $dst$$Register);
11093     __ subl($dst$$Register, $src$$Register);
11094     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
11095   %}
11096 
11097   ins_pipe(ialu_reg_reg);
11098 %}
11099 
11100 // Long Absolute Instructions
11101 instruct absL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11102 %{
11103   match(Set dst (AbsL src));
11104   effect(TEMP dst, KILL cr);
11105   format %{ "xorl    $dst, $dst\t# abs long\n\t"
11106             "subq    $dst, $src\n\t"
11107             "cmovlq  $dst, $src" %}
11108   ins_encode %{
11109     __ xorl($dst$$Register, $dst$$Register);
11110     __ subq($dst$$Register, $src$$Register);
11111     __ cmovq(Assembler::less, $dst$$Register, $src$$Register);
11112   %}
11113 
11114   ins_pipe(ialu_reg_reg);
11115 %}
11116 
11117 //----------Subtraction Instructions-------------------------------------------
11118 
11119 // Integer Subtraction Instructions
11120 instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11121 %{
11122   predicate(!UseAPX);
11123   match(Set dst (SubI dst src));
11124   effect(KILL cr);
11125   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11126 
11127   format %{ "subl    $dst, $src\t# int" %}
11128   ins_encode %{
11129     __ subl($dst$$Register, $src$$Register);
11130   %}
11131   ins_pipe(ialu_reg_reg);
11132 %}
11133 
11134 instruct subI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11135 %{
11136   predicate(UseAPX);
11137   match(Set dst (SubI src1 src2));
11138   effect(KILL cr);
11139   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11140 
11141   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11142   ins_encode %{
11143     __ esubl($dst$$Register, $src1$$Register, $src2$$Register, false);
11144   %}
11145   ins_pipe(ialu_reg_reg);
11146 %}
11147 
11148 instruct subI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
11149 %{
11150   predicate(UseAPX);
11151   match(Set dst (SubI src1 src2));
11152   effect(KILL cr);
11153   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11154 
11155   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11156   ins_encode %{
11157     __ esubl($dst$$Register, $src1$$Register, $src2$$constant, false);
11158   %}
11159   ins_pipe(ialu_reg_reg);
11160 %}
11161 
11162 instruct subI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
11163 %{
11164   predicate(UseAPX);
11165   match(Set dst (SubI (LoadI src1) src2));
11166   effect(KILL cr);
11167   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11168 
11169   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11170   ins_encode %{
11171     __ esubl($dst$$Register, $src1$$Address, $src2$$constant, false);
11172   %}
11173   ins_pipe(ialu_reg_reg);
11174 %}
11175 
11176 instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
11177 %{
11178   predicate(!UseAPX);
11179   match(Set dst (SubI dst (LoadI src)));
11180   effect(KILL cr);
11181   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11182 
11183   ins_cost(150);
11184   format %{ "subl    $dst, $src\t# int" %}
11185   ins_encode %{
11186     __ subl($dst$$Register, $src$$Address);
11187   %}
11188   ins_pipe(ialu_reg_mem);
11189 %}
11190 
11191 instruct subI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11192 %{
11193   predicate(UseAPX);
11194   match(Set dst (SubI src1 (LoadI src2)));
11195   effect(KILL cr);
11196   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11197 
11198   ins_cost(150);
11199   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11200   ins_encode %{
11201     __ esubl($dst$$Register, $src1$$Register, $src2$$Address, false);
11202   %}
11203   ins_pipe(ialu_reg_mem);
11204 %}
11205 
11206 instruct subI_rReg_mem_rReg_ndd(rRegI dst, memory src1, rRegI src2, rFlagsReg cr)
11207 %{
11208   predicate(UseAPX);
11209   match(Set dst (SubI (LoadI src1) src2));
11210   effect(KILL cr);
11211   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11212 
11213   ins_cost(150);
11214   format %{ "esubl    $dst, $src1, $src2\t# int ndd" %}
11215   ins_encode %{
11216     __ esubl($dst$$Register, $src1$$Address, $src2$$Register, false);
11217   %}
11218   ins_pipe(ialu_reg_mem);
11219 %}
11220 
11221 instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
11222 %{
11223   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
11224   effect(KILL cr);
11225   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11226 
11227   ins_cost(150);
11228   format %{ "subl    $dst, $src\t# int" %}
11229   ins_encode %{
11230     __ subl($dst$$Address, $src$$Register);
11231   %}
11232   ins_pipe(ialu_mem_reg);
11233 %}
11234 
11235 instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11236 %{
11237   predicate(!UseAPX);
11238   match(Set dst (SubL dst src));
11239   effect(KILL cr);
11240   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11241 
11242   format %{ "subq    $dst, $src\t# long" %}
11243   ins_encode %{
11244     __ subq($dst$$Register, $src$$Register);
11245   %}
11246   ins_pipe(ialu_reg_reg);
11247 %}
11248 
11249 instruct subL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11250 %{
11251   predicate(UseAPX);
11252   match(Set dst (SubL src1 src2));
11253   effect(KILL cr);
11254   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11255 
11256   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11257   ins_encode %{
11258     __ esubq($dst$$Register, $src1$$Register, $src2$$Register, false);
11259   %}
11260   ins_pipe(ialu_reg_reg);
11261 %}
11262 
11263 instruct subL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
11264 %{
11265   predicate(UseAPX);
11266   match(Set dst (SubL src1 src2));
11267   effect(KILL cr);
11268   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11269 
11270   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11271   ins_encode %{
11272     __ esubq($dst$$Register, $src1$$Register, $src2$$constant, false);
11273   %}
11274   ins_pipe(ialu_reg_reg);
11275 %}
11276 
11277 instruct subL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
11278 %{
11279   predicate(UseAPX);
11280   match(Set dst (SubL (LoadL src1) src2));
11281   effect(KILL cr);
11282   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11283 
11284   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11285   ins_encode %{
11286     __ esubq($dst$$Register, $src1$$Address, $src2$$constant, false);
11287   %}
11288   ins_pipe(ialu_reg_reg);
11289 %}
11290 
11291 instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
11292 %{
11293   predicate(!UseAPX);
11294   match(Set dst (SubL dst (LoadL src)));
11295   effect(KILL cr);
11296   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11297 
11298   ins_cost(150);
11299   format %{ "subq    $dst, $src\t# long" %}
11300   ins_encode %{
11301     __ subq($dst$$Register, $src$$Address);
11302   %}
11303   ins_pipe(ialu_reg_mem);
11304 %}
11305 
11306 instruct subL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11307 %{
11308   predicate(UseAPX);
11309   match(Set dst (SubL src1 (LoadL src2)));
11310   effect(KILL cr);
11311   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11312 
11313   ins_cost(150);
11314   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11315   ins_encode %{
11316     __ esubq($dst$$Register, $src1$$Register, $src2$$Address, false);
11317   %}
11318   ins_pipe(ialu_reg_mem);
11319 %}
11320 
11321 instruct subL_rReg_mem_rReg_ndd(rRegL dst, memory src1, rRegL src2, rFlagsReg cr)
11322 %{
11323   predicate(UseAPX);
11324   match(Set dst (SubL (LoadL src1) src2));
11325   effect(KILL cr);
11326   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11327 
11328   ins_cost(150);
11329   format %{ "esubq    $dst, $src1, $src2\t# long ndd" %}
11330   ins_encode %{
11331     __ esubq($dst$$Register, $src1$$Address, $src2$$Register, false);
11332   %}
11333   ins_pipe(ialu_reg_mem);
11334 %}
11335 
11336 instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
11337 %{
11338   match(Set dst (StoreL dst (SubL (LoadL dst) src)));
11339   effect(KILL cr);
11340   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_carry_flag, PD::Flag_sets_parity_flag);
11341 
11342   ins_cost(150);
11343   format %{ "subq    $dst, $src\t# long" %}
11344   ins_encode %{
11345     __ subq($dst$$Address, $src$$Register);
11346   %}
11347   ins_pipe(ialu_mem_reg);
11348 %}
11349 
11350 // Subtract from a pointer
11351 // XXX hmpf???
11352 instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
11353 %{
11354   match(Set dst (AddP dst (SubI zero src)));
11355   effect(KILL cr);
11356 
11357   format %{ "subq    $dst, $src\t# ptr - int" %}
11358   ins_encode %{
11359     __ subq($dst$$Register, $src$$Register);
11360   %}
11361   ins_pipe(ialu_reg_reg);
11362 %}
11363 
11364 instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
11365 %{
11366   predicate(!UseAPX);
11367   match(Set dst (SubI zero dst));
11368   effect(KILL cr);
11369   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11370 
11371   format %{ "negl    $dst\t# int" %}
11372   ins_encode %{
11373     __ negl($dst$$Register);
11374   %}
11375   ins_pipe(ialu_reg);
11376 %}
11377 
11378 instruct negI_rReg_ndd(rRegI dst, rRegI src, immI_0 zero, rFlagsReg cr)
11379 %{
11380   predicate(UseAPX);
11381   match(Set dst (SubI zero src));
11382   effect(KILL cr);
11383   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11384 
11385   format %{ "enegl    $dst, $src\t# int ndd" %}
11386   ins_encode %{
11387     __ enegl($dst$$Register, $src$$Register, false);
11388   %}
11389   ins_pipe(ialu_reg);
11390 %}
11391 
11392 instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
11393 %{
11394   predicate(!UseAPX);
11395   match(Set dst (NegI dst));
11396   effect(KILL cr);
11397   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11398 
11399   format %{ "negl    $dst\t# int" %}
11400   ins_encode %{
11401     __ negl($dst$$Register);
11402   %}
11403   ins_pipe(ialu_reg);
11404 %}
11405 
11406 instruct negI_rReg_2_ndd(rRegI dst, rRegI src, rFlagsReg cr)
11407 %{
11408   predicate(UseAPX);
11409   match(Set dst (NegI src));
11410   effect(KILL cr);
11411   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11412 
11413   format %{ "enegl    $dst, $src\t# int ndd" %}
11414   ins_encode %{
11415     __ enegl($dst$$Register, $src$$Register, false);
11416   %}
11417   ins_pipe(ialu_reg);
11418 %}
11419 
11420 instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
11421 %{
11422   match(Set dst (StoreI dst (SubI zero (LoadI dst))));
11423   effect(KILL cr);
11424   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11425 
11426   format %{ "negl    $dst\t# int" %}
11427   ins_encode %{
11428     __ negl($dst$$Address);
11429   %}
11430   ins_pipe(ialu_reg);
11431 %}
11432 
11433 instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
11434 %{
11435   predicate(!UseAPX);
11436   match(Set dst (SubL zero dst));
11437   effect(KILL cr);
11438   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11439 
11440   format %{ "negq    $dst\t# long" %}
11441   ins_encode %{
11442     __ negq($dst$$Register);
11443   %}
11444   ins_pipe(ialu_reg);
11445 %}
11446 
11447 instruct negL_rReg_ndd(rRegL dst, rRegL src, immL0 zero, rFlagsReg cr)
11448 %{
11449   predicate(UseAPX);
11450   match(Set dst (SubL zero src));
11451   effect(KILL cr);
11452   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr2);
11453 
11454   format %{ "enegq    $dst, $src\t# long ndd" %}
11455   ins_encode %{
11456     __ enegq($dst$$Register, $src$$Register, false);
11457   %}
11458   ins_pipe(ialu_reg);
11459 %}
11460 
11461 instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
11462 %{
11463   predicate(!UseAPX);
11464   match(Set dst (NegL dst));
11465   effect(KILL cr);
11466   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11467 
11468   format %{ "negq    $dst\t# int" %}
11469   ins_encode %{
11470     __ negq($dst$$Register);
11471   %}
11472   ins_pipe(ialu_reg);
11473 %}
11474 
11475 instruct negL_rReg_2_ndd(rRegL dst, rRegL src, rFlagsReg cr)
11476 %{
11477   predicate(UseAPX);
11478   match(Set dst (NegL src));
11479   effect(KILL cr);
11480   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_ndd_demotable_opr1);
11481 
11482   format %{ "enegq    $dst, $src\t# long ndd" %}
11483   ins_encode %{
11484     __ enegq($dst$$Register, $src$$Register, false);
11485   %}
11486   ins_pipe(ialu_reg);
11487 %}
11488 
11489 instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
11490 %{
11491   match(Set dst (StoreL dst (SubL zero (LoadL dst))));
11492   effect(KILL cr);
11493   flag(PD::Flag_sets_overflow_flag, PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag);
11494 
11495   format %{ "negq    $dst\t# long" %}
11496   ins_encode %{
11497     __ negq($dst$$Address);
11498   %}
11499   ins_pipe(ialu_reg);
11500 %}
11501 
11502 //----------Multiplication/Division Instructions-------------------------------
11503 // Integer Multiplication Instructions
11504 // Multiply Register
11505 
11506 instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
11507 %{
11508   predicate(!UseAPX);
11509   match(Set dst (MulI dst src));
11510   effect(KILL cr);
11511 
11512   ins_cost(300);
11513   format %{ "imull   $dst, $src\t# int" %}
11514   ins_encode %{
11515     __ imull($dst$$Register, $src$$Register);
11516   %}
11517   ins_pipe(ialu_reg_reg_alu0);
11518 %}
11519 
11520 instruct mulI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
11521 %{
11522   predicate(UseAPX);
11523   match(Set dst (MulI src1 src2));
11524   effect(KILL cr);
11525   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11526 
11527   ins_cost(300);
11528   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11529   ins_encode %{
11530     __ eimull($dst$$Register, $src1$$Register, $src2$$Register, false);
11531   %}
11532   ins_pipe(ialu_reg_reg_alu0);
11533 %}
11534 
11535 instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
11536 %{
11537   match(Set dst (MulI src imm));
11538   effect(KILL cr);
11539 
11540   ins_cost(300);
11541   format %{ "imull   $dst, $src, $imm\t# int" %}
11542   ins_encode %{
11543     __ imull($dst$$Register, $src$$Register, $imm$$constant);
11544   %}
11545   ins_pipe(ialu_reg_reg_alu0);
11546 %}
11547 
11548 instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
11549 %{
11550   predicate(!UseAPX);
11551   match(Set dst (MulI dst (LoadI src)));
11552   effect(KILL cr);
11553 
11554   ins_cost(350);
11555   format %{ "imull   $dst, $src\t# int" %}
11556   ins_encode %{
11557     __ imull($dst$$Register, $src$$Address);
11558   %}
11559   ins_pipe(ialu_reg_mem_alu0);
11560 %}
11561 
11562 instruct mulI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
11563 %{
11564   predicate(UseAPX);
11565   match(Set dst (MulI src1 (LoadI src2)));
11566   effect(KILL cr);
11567   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11568 
11569   ins_cost(350);
11570   format %{ "eimull   $dst, $src1, $src2\t# int ndd" %}
11571   ins_encode %{
11572     __ eimull($dst$$Register, $src1$$Register, $src2$$Address, false);
11573   %}
11574   ins_pipe(ialu_reg_mem_alu0);
11575 %}
11576 
11577 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
11578 %{
11579   match(Set dst (MulI (LoadI src) imm));
11580   effect(KILL cr);
11581 
11582   ins_cost(300);
11583   format %{ "imull   $dst, $src, $imm\t# int" %}
11584   ins_encode %{
11585     __ imull($dst$$Register, $src$$Address, $imm$$constant);
11586   %}
11587   ins_pipe(ialu_reg_mem_alu0);
11588 %}
11589 
11590 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
11591 %{
11592   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
11593   effect(KILL cr, KILL src2);
11594 
11595   expand %{ mulI_rReg(dst, src1, cr);
11596            mulI_rReg(src2, src3, cr);
11597            addI_rReg(dst, src2, cr); %}
11598 %}
11599 
11600 instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
11601 %{
11602   predicate(!UseAPX);
11603   match(Set dst (MulL dst src));
11604   effect(KILL cr);
11605 
11606   ins_cost(300);
11607   format %{ "imulq   $dst, $src\t# long" %}
11608   ins_encode %{
11609     __ imulq($dst$$Register, $src$$Register);
11610   %}
11611   ins_pipe(ialu_reg_reg_alu0);
11612 %}
11613 
11614 instruct mulL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
11615 %{
11616   predicate(UseAPX);
11617   match(Set dst (MulL src1 src2));
11618   effect(KILL cr);
11619   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11620 
11621   ins_cost(300);
11622   format %{ "eimulq   $dst, $src1, $src2\t# long ndd" %}
11623   ins_encode %{
11624     __ eimulq($dst$$Register, $src1$$Register, $src2$$Register, false);
11625   %}
11626   ins_pipe(ialu_reg_reg_alu0);
11627 %}
11628 
11629 instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
11630 %{
11631   match(Set dst (MulL src imm));
11632   effect(KILL cr);
11633 
11634   ins_cost(300);
11635   format %{ "imulq   $dst, $src, $imm\t# long" %}
11636   ins_encode %{
11637     __ imulq($dst$$Register, $src$$Register, $imm$$constant);
11638   %}
11639   ins_pipe(ialu_reg_reg_alu0);
11640 %}
11641 
11642 instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
11643 %{
11644   predicate(!UseAPX);
11645   match(Set dst (MulL dst (LoadL src)));
11646   effect(KILL cr);
11647 
11648   ins_cost(350);
11649   format %{ "imulq   $dst, $src\t# long" %}
11650   ins_encode %{
11651     __ imulq($dst$$Register, $src$$Address);
11652   %}
11653   ins_pipe(ialu_reg_mem_alu0);
11654 %}
11655 
11656 instruct mulL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
11657 %{
11658   predicate(UseAPX);
11659   match(Set dst (MulL src1 (LoadL src2)));
11660   effect(KILL cr);
11661   flag(PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
11662 
11663   ins_cost(350);
11664   format %{ "eimulq   $dst, $src1, $src2 \t# long" %}
11665   ins_encode %{
11666     __ eimulq($dst$$Register, $src1$$Register, $src2$$Address, false);
11667   %}
11668   ins_pipe(ialu_reg_mem_alu0);
11669 %}
11670 
11671 instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
11672 %{
11673   match(Set dst (MulL (LoadL src) imm));
11674   effect(KILL cr);
11675 
11676   ins_cost(300);
11677   format %{ "imulq   $dst, $src, $imm\t# long" %}
11678   ins_encode %{
11679     __ imulq($dst$$Register, $src$$Address, $imm$$constant);
11680   %}
11681   ins_pipe(ialu_reg_mem_alu0);
11682 %}
11683 
11684 instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11685 %{
11686   match(Set dst (MulHiL src rax));
11687   effect(USE_KILL rax, KILL cr);
11688 
11689   ins_cost(300);
11690   format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
11691   ins_encode %{
11692     __ imulq($src$$Register);
11693   %}
11694   ins_pipe(ialu_reg_reg_alu0);
11695 %}
11696 
11697 instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
11698 %{
11699   match(Set dst (UMulHiL src rax));
11700   effect(USE_KILL rax, KILL cr);
11701 
11702   ins_cost(300);
11703   format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
11704   ins_encode %{
11705     __ mulq($src$$Register);
11706   %}
11707   ins_pipe(ialu_reg_reg_alu0);
11708 %}
11709 
11710 instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11711                    rFlagsReg cr)
11712 %{
11713   match(Set rax (DivI rax div));
11714   effect(KILL rdx, KILL cr);
11715 
11716   ins_cost(30*100+10*100); // XXX
11717   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11718             "jne,s   normal\n\t"
11719             "xorl    rdx, rdx\n\t"
11720             "cmpl    $div, -1\n\t"
11721             "je,s    done\n"
11722     "normal: cdql\n\t"
11723             "idivl   $div\n"
11724     "done:"        %}
11725   ins_encode(cdql_enc(div));
11726   ins_pipe(ialu_reg_reg_alu0);
11727 %}
11728 
11729 instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11730                    rFlagsReg cr)
11731 %{
11732   match(Set rax (DivL rax div));
11733   effect(KILL rdx, KILL cr);
11734 
11735   ins_cost(30*100+10*100); // XXX
11736   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11737             "cmpq    rax, rdx\n\t"
11738             "jne,s   normal\n\t"
11739             "xorl    rdx, rdx\n\t"
11740             "cmpq    $div, -1\n\t"
11741             "je,s    done\n"
11742     "normal: cdqq\n\t"
11743             "idivq   $div\n"
11744     "done:"        %}
11745   ins_encode(cdqq_enc(div));
11746   ins_pipe(ialu_reg_reg_alu0);
11747 %}
11748 
11749 instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
11750 %{
11751   match(Set rax (UDivI rax div));
11752   effect(KILL rdx, KILL cr);
11753 
11754   ins_cost(300);
11755   format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
11756   ins_encode %{
11757     __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
11758   %}
11759   ins_pipe(ialu_reg_reg_alu0);
11760 %}
11761 
11762 instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
11763 %{
11764   match(Set rax (UDivL rax div));
11765   effect(KILL rdx, KILL cr);
11766 
11767   ins_cost(300);
11768   format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
11769   ins_encode %{
11770      __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
11771   %}
11772   ins_pipe(ialu_reg_reg_alu0);
11773 %}
11774 
11775 // Integer DIVMOD with Register, both quotient and mod results
11776 instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
11777                              rFlagsReg cr)
11778 %{
11779   match(DivModI rax div);
11780   effect(KILL cr);
11781 
11782   ins_cost(30*100+10*100); // XXX
11783   format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
11784             "jne,s   normal\n\t"
11785             "xorl    rdx, rdx\n\t"
11786             "cmpl    $div, -1\n\t"
11787             "je,s    done\n"
11788     "normal: cdql\n\t"
11789             "idivl   $div\n"
11790     "done:"        %}
11791   ins_encode(cdql_enc(div));
11792   ins_pipe(pipe_slow);
11793 %}
11794 
11795 // Long DIVMOD with Register, both quotient and mod results
11796 instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
11797                              rFlagsReg cr)
11798 %{
11799   match(DivModL rax div);
11800   effect(KILL cr);
11801 
11802   ins_cost(30*100+10*100); // XXX
11803   format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
11804             "cmpq    rax, rdx\n\t"
11805             "jne,s   normal\n\t"
11806             "xorl    rdx, rdx\n\t"
11807             "cmpq    $div, -1\n\t"
11808             "je,s    done\n"
11809     "normal: cdqq\n\t"
11810             "idivq   $div\n"
11811     "done:"        %}
11812   ins_encode(cdqq_enc(div));
11813   ins_pipe(pipe_slow);
11814 %}
11815 
11816 // Unsigned integer DIVMOD with Register, both quotient and mod results
11817 instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
11818                               no_rax_rdx_RegI div, rFlagsReg cr)
11819 %{
11820   match(UDivModI rax div);
11821   effect(TEMP tmp, KILL cr);
11822 
11823   ins_cost(300);
11824   format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
11825             "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
11826           %}
11827   ins_encode %{
11828     __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11829   %}
11830   ins_pipe(pipe_slow);
11831 %}
11832 
11833 // Unsigned long DIVMOD with Register, both quotient and mod results
11834 instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
11835                               no_rax_rdx_RegL div, rFlagsReg cr)
11836 %{
11837   match(UDivModL rax div);
11838   effect(TEMP tmp, KILL cr);
11839 
11840   ins_cost(300);
11841   format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
11842             "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
11843           %}
11844   ins_encode %{
11845     __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
11846   %}
11847   ins_pipe(pipe_slow);
11848 %}
11849 
11850 instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
11851                    rFlagsReg cr)
11852 %{
11853   match(Set rdx (ModI rax div));
11854   effect(KILL rax, KILL cr);
11855 
11856   ins_cost(300); // XXX
11857   format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
11858             "jne,s   normal\n\t"
11859             "xorl    rdx, rdx\n\t"
11860             "cmpl    $div, -1\n\t"
11861             "je,s    done\n"
11862     "normal: cdql\n\t"
11863             "idivl   $div\n"
11864     "done:"        %}
11865   ins_encode(cdql_enc(div));
11866   ins_pipe(ialu_reg_reg_alu0);
11867 %}
11868 
11869 instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
11870                    rFlagsReg cr)
11871 %{
11872   match(Set rdx (ModL rax div));
11873   effect(KILL rax, KILL cr);
11874 
11875   ins_cost(300); // XXX
11876   format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
11877             "cmpq    rax, rdx\n\t"
11878             "jne,s   normal\n\t"
11879             "xorl    rdx, rdx\n\t"
11880             "cmpq    $div, -1\n\t"
11881             "je,s    done\n"
11882     "normal: cdqq\n\t"
11883             "idivq   $div\n"
11884     "done:"        %}
11885   ins_encode(cdqq_enc(div));
11886   ins_pipe(ialu_reg_reg_alu0);
11887 %}
11888 
11889 instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
11890 %{
11891   match(Set rdx (UModI rax div));
11892   effect(KILL rax, KILL cr);
11893 
11894   ins_cost(300);
11895   format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
11896   ins_encode %{
11897     __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
11898   %}
11899   ins_pipe(ialu_reg_reg_alu0);
11900 %}
11901 
11902 instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
11903 %{
11904   match(Set rdx (UModL rax div));
11905   effect(KILL rax, KILL cr);
11906 
11907   ins_cost(300);
11908   format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
11909   ins_encode %{
11910     __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
11911   %}
11912   ins_pipe(ialu_reg_reg_alu0);
11913 %}
11914 
11915 // Integer Shift Instructions
11916 // Shift Left by one, two, three
11917 instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
11918 %{
11919   predicate(!UseAPX);
11920   match(Set dst (LShiftI dst shift));
11921   effect(KILL cr);
11922 
11923   format %{ "sall    $dst, $shift" %}
11924   ins_encode %{
11925     __ sall($dst$$Register, $shift$$constant);
11926   %}
11927   ins_pipe(ialu_reg);
11928 %}
11929 
11930 // Shift Left by one, two, three
11931 instruct salI_rReg_immI2_ndd(rRegI dst, rRegI src, immI2 shift, rFlagsReg cr)
11932 %{
11933   predicate(UseAPX);
11934   match(Set dst (LShiftI src shift));
11935   effect(KILL cr);
11936   flag(PD::Flag_ndd_demotable_opr1);
11937 
11938   format %{ "esall    $dst, $src, $shift\t# int(ndd)" %}
11939   ins_encode %{
11940     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11941   %}
11942   ins_pipe(ialu_reg);
11943 %}
11944 
11945 // Shift Left by 8-bit immediate
11946 instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
11947 %{
11948   predicate(!UseAPX);
11949   match(Set dst (LShiftI dst shift));
11950   effect(KILL cr);
11951 
11952   format %{ "sall    $dst, $shift" %}
11953   ins_encode %{
11954     __ sall($dst$$Register, $shift$$constant);
11955   %}
11956   ins_pipe(ialu_reg);
11957 %}
11958 
11959 // Shift Left by 8-bit immediate
11960 instruct salI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
11961 %{
11962   predicate(UseAPX);
11963   match(Set dst (LShiftI src shift));
11964   effect(KILL cr);
11965   flag(PD::Flag_ndd_demotable_opr1);
11966 
11967   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11968   ins_encode %{
11969     __ esall($dst$$Register, $src$$Register, $shift$$constant, false);
11970   %}
11971   ins_pipe(ialu_reg);
11972 %}
11973 
11974 instruct salI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
11975 %{
11976   predicate(UseAPX);
11977   match(Set dst (LShiftI (LoadI src) shift));
11978   effect(KILL cr);
11979 
11980   format %{ "esall    $dst, $src, $shift\t# int (ndd)" %}
11981   ins_encode %{
11982     __ esall($dst$$Register, $src$$Address, $shift$$constant, false);
11983   %}
11984   ins_pipe(ialu_reg);
11985 %}
11986 
11987 // Shift Left by 8-bit immediate
11988 instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
11989 %{
11990   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
11991   effect(KILL cr);
11992 
11993   format %{ "sall    $dst, $shift" %}
11994   ins_encode %{
11995     __ sall($dst$$Address, $shift$$constant);
11996   %}
11997   ins_pipe(ialu_mem_imm);
11998 %}
11999 
12000 // Shift Left by variable
12001 instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12002 %{
12003   predicate(!VM_Version::supports_bmi2());
12004   match(Set dst (LShiftI dst shift));
12005   effect(KILL cr);
12006 
12007   format %{ "sall    $dst, $shift" %}
12008   ins_encode %{
12009     __ sall($dst$$Register);
12010   %}
12011   ins_pipe(ialu_reg_reg);
12012 %}
12013 
12014 // Shift Left by variable
12015 instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12016 %{
12017   predicate(!VM_Version::supports_bmi2());
12018   match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
12019   effect(KILL cr);
12020 
12021   format %{ "sall    $dst, $shift" %}
12022   ins_encode %{
12023     __ sall($dst$$Address);
12024   %}
12025   ins_pipe(ialu_mem_reg);
12026 %}
12027 
12028 instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12029 %{
12030   predicate(VM_Version::supports_bmi2());
12031   match(Set dst (LShiftI src shift));
12032 
12033   format %{ "shlxl   $dst, $src, $shift" %}
12034   ins_encode %{
12035     __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
12036   %}
12037   ins_pipe(ialu_reg_reg);
12038 %}
12039 
12040 instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
12041 %{
12042   predicate(VM_Version::supports_bmi2());
12043   match(Set dst (LShiftI (LoadI src) shift));
12044   ins_cost(175);
12045   format %{ "shlxl   $dst, $src, $shift" %}
12046   ins_encode %{
12047     __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
12048   %}
12049   ins_pipe(ialu_reg_mem);
12050 %}
12051 
12052 // Arithmetic Shift Right by 8-bit immediate
12053 instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12054 %{
12055   predicate(!UseAPX);
12056   match(Set dst (RShiftI dst shift));
12057   effect(KILL cr);
12058 
12059   format %{ "sarl    $dst, $shift" %}
12060   ins_encode %{
12061     __ sarl($dst$$Register, $shift$$constant);
12062   %}
12063   ins_pipe(ialu_mem_imm);
12064 %}
12065 
12066 // Arithmetic Shift Right by 8-bit immediate
12067 instruct sarI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12068 %{
12069   predicate(UseAPX);
12070   match(Set dst (RShiftI src shift));
12071   effect(KILL cr);
12072   flag(PD::Flag_ndd_demotable_opr1);
12073 
12074   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12075   ins_encode %{
12076     __ esarl($dst$$Register, $src$$Register, $shift$$constant, false);
12077   %}
12078   ins_pipe(ialu_mem_imm);
12079 %}
12080 
12081 instruct sarI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12082 %{
12083   predicate(UseAPX);
12084   match(Set dst (RShiftI (LoadI src) shift));
12085   effect(KILL cr);
12086 
12087   format %{ "esarl    $dst, $src, $shift\t# int (ndd)" %}
12088   ins_encode %{
12089     __ esarl($dst$$Register, $src$$Address, $shift$$constant, false);
12090   %}
12091   ins_pipe(ialu_mem_imm);
12092 %}
12093 
12094 // Arithmetic Shift Right by 8-bit immediate
12095 instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12096 %{
12097   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12098   effect(KILL cr);
12099 
12100   format %{ "sarl    $dst, $shift" %}
12101   ins_encode %{
12102     __ sarl($dst$$Address, $shift$$constant);
12103   %}
12104   ins_pipe(ialu_mem_imm);
12105 %}
12106 
12107 // Arithmetic Shift Right by variable
12108 instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12109 %{
12110   predicate(!VM_Version::supports_bmi2());
12111   match(Set dst (RShiftI dst shift));
12112   effect(KILL cr);
12113 
12114   format %{ "sarl    $dst, $shift" %}
12115   ins_encode %{
12116     __ sarl($dst$$Register);
12117   %}
12118   ins_pipe(ialu_reg_reg);
12119 %}
12120 
12121 // Arithmetic Shift Right by variable
12122 instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12123 %{
12124   predicate(!VM_Version::supports_bmi2());
12125   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
12126   effect(KILL cr);
12127 
12128   format %{ "sarl    $dst, $shift" %}
12129   ins_encode %{
12130     __ sarl($dst$$Address);
12131   %}
12132   ins_pipe(ialu_mem_reg);
12133 %}
12134 
12135 instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12136 %{
12137   predicate(VM_Version::supports_bmi2());
12138   match(Set dst (RShiftI src shift));
12139 
12140   format %{ "sarxl   $dst, $src, $shift" %}
12141   ins_encode %{
12142     __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
12143   %}
12144   ins_pipe(ialu_reg_reg);
12145 %}
12146 
12147 instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
12148 %{
12149   predicate(VM_Version::supports_bmi2());
12150   match(Set dst (RShiftI (LoadI src) shift));
12151   ins_cost(175);
12152   format %{ "sarxl   $dst, $src, $shift" %}
12153   ins_encode %{
12154     __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
12155   %}
12156   ins_pipe(ialu_reg_mem);
12157 %}
12158 
12159 // Logical Shift Right by 8-bit immediate
12160 instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
12161 %{
12162   predicate(!UseAPX);
12163   match(Set dst (URShiftI dst shift));
12164   effect(KILL cr);
12165 
12166   format %{ "shrl    $dst, $shift" %}
12167   ins_encode %{
12168     __ shrl($dst$$Register, $shift$$constant);
12169   %}
12170   ins_pipe(ialu_reg);
12171 %}
12172 
12173 // Logical Shift Right by 8-bit immediate
12174 instruct shrI_rReg_imm_ndd(rRegI dst, rRegI src, immI8 shift, rFlagsReg cr)
12175 %{
12176   predicate(UseAPX);
12177   match(Set dst (URShiftI src shift));
12178   effect(KILL cr);
12179   flag(PD::Flag_ndd_demotable_opr1);
12180 
12181   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12182   ins_encode %{
12183     __ eshrl($dst$$Register, $src$$Register, $shift$$constant, false);
12184   %}
12185   ins_pipe(ialu_reg);
12186 %}
12187 
12188 instruct shrI_rReg_mem_imm_ndd(rRegI dst, memory src, immI8 shift, rFlagsReg cr)
12189 %{
12190   predicate(UseAPX);
12191   match(Set dst (URShiftI (LoadI src) shift));
12192   effect(KILL cr);
12193 
12194   format %{ "eshrl    $dst, $src, $shift\t # int (ndd)" %}
12195   ins_encode %{
12196     __ eshrl($dst$$Register, $src$$Address, $shift$$constant, false);
12197   %}
12198   ins_pipe(ialu_reg);
12199 %}
12200 
12201 // Logical Shift Right by 8-bit immediate
12202 instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12203 %{
12204   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12205   effect(KILL cr);
12206 
12207   format %{ "shrl    $dst, $shift" %}
12208   ins_encode %{
12209     __ shrl($dst$$Address, $shift$$constant);
12210   %}
12211   ins_pipe(ialu_mem_imm);
12212 %}
12213 
12214 // Logical Shift Right by variable
12215 instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12216 %{
12217   predicate(!VM_Version::supports_bmi2());
12218   match(Set dst (URShiftI dst shift));
12219   effect(KILL cr);
12220 
12221   format %{ "shrl    $dst, $shift" %}
12222   ins_encode %{
12223     __ shrl($dst$$Register);
12224   %}
12225   ins_pipe(ialu_reg_reg);
12226 %}
12227 
12228 // Logical Shift Right by variable
12229 instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12230 %{
12231   predicate(!VM_Version::supports_bmi2());
12232   match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
12233   effect(KILL cr);
12234 
12235   format %{ "shrl    $dst, $shift" %}
12236   ins_encode %{
12237     __ shrl($dst$$Address);
12238   %}
12239   ins_pipe(ialu_mem_reg);
12240 %}
12241 
12242 instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
12243 %{
12244   predicate(VM_Version::supports_bmi2());
12245   match(Set dst (URShiftI src shift));
12246 
12247   format %{ "shrxl   $dst, $src, $shift" %}
12248   ins_encode %{
12249     __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
12250   %}
12251   ins_pipe(ialu_reg_reg);
12252 %}
12253 
12254 instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
12255 %{
12256   predicate(VM_Version::supports_bmi2());
12257   match(Set dst (URShiftI (LoadI src) shift));
12258   ins_cost(175);
12259   format %{ "shrxl   $dst, $src, $shift" %}
12260   ins_encode %{
12261     __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
12262   %}
12263   ins_pipe(ialu_reg_mem);
12264 %}
12265 
12266 // Long Shift Instructions
12267 // Shift Left by one, two, three
12268 instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
12269 %{
12270   predicate(!UseAPX);
12271   match(Set dst (LShiftL dst shift));
12272   effect(KILL cr);
12273 
12274   format %{ "salq    $dst, $shift" %}
12275   ins_encode %{
12276     __ salq($dst$$Register, $shift$$constant);
12277   %}
12278   ins_pipe(ialu_reg);
12279 %}
12280 
12281 // Shift Left by one, two, three
12282 instruct salL_rReg_immI2_ndd(rRegL dst, rRegL src, immI2 shift, rFlagsReg cr)
12283 %{
12284   predicate(UseAPX);
12285   match(Set dst (LShiftL src shift));
12286   effect(KILL cr);
12287   flag(PD::Flag_ndd_demotable_opr1);
12288 
12289   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12290   ins_encode %{
12291     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12292   %}
12293   ins_pipe(ialu_reg);
12294 %}
12295 
12296 // Shift Left by 8-bit immediate
12297 instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12298 %{
12299   predicate(!UseAPX);
12300   match(Set dst (LShiftL dst shift));
12301   effect(KILL cr);
12302 
12303   format %{ "salq    $dst, $shift" %}
12304   ins_encode %{
12305     __ salq($dst$$Register, $shift$$constant);
12306   %}
12307   ins_pipe(ialu_reg);
12308 %}
12309 
12310 // Shift Left by 8-bit immediate
12311 instruct salL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12312 %{
12313   predicate(UseAPX);
12314   match(Set dst (LShiftL src shift));
12315   effect(KILL cr);
12316   flag(PD::Flag_ndd_demotable_opr1);
12317 
12318   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12319   ins_encode %{
12320     __ esalq($dst$$Register, $src$$Register, $shift$$constant, false);
12321   %}
12322   ins_pipe(ialu_reg);
12323 %}
12324 
12325 instruct salL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12326 %{
12327   predicate(UseAPX);
12328   match(Set dst (LShiftL (LoadL src) shift));
12329   effect(KILL cr);
12330 
12331   format %{ "esalq    $dst, $src, $shift\t# long (ndd)" %}
12332   ins_encode %{
12333     __ esalq($dst$$Register, $src$$Address, $shift$$constant, false);
12334   %}
12335   ins_pipe(ialu_reg);
12336 %}
12337 
12338 // Shift Left by 8-bit immediate
12339 instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12340 %{
12341   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12342   effect(KILL cr);
12343 
12344   format %{ "salq    $dst, $shift" %}
12345   ins_encode %{
12346     __ salq($dst$$Address, $shift$$constant);
12347   %}
12348   ins_pipe(ialu_mem_imm);
12349 %}
12350 
12351 // Shift Left by variable
12352 instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12353 %{
12354   predicate(!VM_Version::supports_bmi2());
12355   match(Set dst (LShiftL dst shift));
12356   effect(KILL cr);
12357 
12358   format %{ "salq    $dst, $shift" %}
12359   ins_encode %{
12360     __ salq($dst$$Register);
12361   %}
12362   ins_pipe(ialu_reg_reg);
12363 %}
12364 
12365 // Shift Left by variable
12366 instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12367 %{
12368   predicate(!VM_Version::supports_bmi2());
12369   match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
12370   effect(KILL cr);
12371 
12372   format %{ "salq    $dst, $shift" %}
12373   ins_encode %{
12374     __ salq($dst$$Address);
12375   %}
12376   ins_pipe(ialu_mem_reg);
12377 %}
12378 
12379 instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12380 %{
12381   predicate(VM_Version::supports_bmi2());
12382   match(Set dst (LShiftL src shift));
12383 
12384   format %{ "shlxq   $dst, $src, $shift" %}
12385   ins_encode %{
12386     __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
12387   %}
12388   ins_pipe(ialu_reg_reg);
12389 %}
12390 
12391 instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
12392 %{
12393   predicate(VM_Version::supports_bmi2());
12394   match(Set dst (LShiftL (LoadL src) shift));
12395   ins_cost(175);
12396   format %{ "shlxq   $dst, $src, $shift" %}
12397   ins_encode %{
12398     __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
12399   %}
12400   ins_pipe(ialu_reg_mem);
12401 %}
12402 
12403 // Arithmetic Shift Right by 8-bit immediate
12404 instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
12405 %{
12406   predicate(!UseAPX);
12407   match(Set dst (RShiftL dst shift));
12408   effect(KILL cr);
12409 
12410   format %{ "sarq    $dst, $shift" %}
12411   ins_encode %{
12412     __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
12413   %}
12414   ins_pipe(ialu_mem_imm);
12415 %}
12416 
12417 // Arithmetic Shift Right by 8-bit immediate
12418 instruct sarL_rReg_imm_ndd(rRegL dst, rRegL src, immI shift, rFlagsReg cr)
12419 %{
12420   predicate(UseAPX);
12421   match(Set dst (RShiftL src shift));
12422   effect(KILL cr);
12423   flag(PD::Flag_ndd_demotable_opr1);
12424 
12425   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12426   ins_encode %{
12427     __ esarq($dst$$Register, $src$$Register, (unsigned char)($shift$$constant & 0x3F), false);
12428   %}
12429   ins_pipe(ialu_mem_imm);
12430 %}
12431 
12432 instruct sarL_rReg_mem_imm_ndd(rRegL dst, memory src, immI shift, rFlagsReg cr)
12433 %{
12434   predicate(UseAPX);
12435   match(Set dst (RShiftL (LoadL src) shift));
12436   effect(KILL cr);
12437 
12438   format %{ "esarq    $dst, $src, $shift\t# long (ndd)" %}
12439   ins_encode %{
12440     __ esarq($dst$$Register, $src$$Address, (unsigned char)($shift$$constant & 0x3F), false);
12441   %}
12442   ins_pipe(ialu_mem_imm);
12443 %}
12444 
12445 // Arithmetic Shift Right by 8-bit immediate
12446 instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
12447 %{
12448   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12449   effect(KILL cr);
12450 
12451   format %{ "sarq    $dst, $shift" %}
12452   ins_encode %{
12453     __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
12454   %}
12455   ins_pipe(ialu_mem_imm);
12456 %}
12457 
12458 // Arithmetic Shift Right by variable
12459 instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12460 %{
12461   predicate(!VM_Version::supports_bmi2());
12462   match(Set dst (RShiftL dst shift));
12463   effect(KILL cr);
12464 
12465   format %{ "sarq    $dst, $shift" %}
12466   ins_encode %{
12467     __ sarq($dst$$Register);
12468   %}
12469   ins_pipe(ialu_reg_reg);
12470 %}
12471 
12472 // Arithmetic Shift Right by variable
12473 instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12474 %{
12475   predicate(!VM_Version::supports_bmi2());
12476   match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
12477   effect(KILL cr);
12478 
12479   format %{ "sarq    $dst, $shift" %}
12480   ins_encode %{
12481     __ sarq($dst$$Address);
12482   %}
12483   ins_pipe(ialu_mem_reg);
12484 %}
12485 
12486 instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12487 %{
12488   predicate(VM_Version::supports_bmi2());
12489   match(Set dst (RShiftL src shift));
12490 
12491   format %{ "sarxq   $dst, $src, $shift" %}
12492   ins_encode %{
12493     __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
12494   %}
12495   ins_pipe(ialu_reg_reg);
12496 %}
12497 
12498 instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
12499 %{
12500   predicate(VM_Version::supports_bmi2());
12501   match(Set dst (RShiftL (LoadL src) shift));
12502   ins_cost(175);
12503   format %{ "sarxq   $dst, $src, $shift" %}
12504   ins_encode %{
12505     __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
12506   %}
12507   ins_pipe(ialu_reg_mem);
12508 %}
12509 
12510 // Logical Shift Right by 8-bit immediate
12511 instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
12512 %{
12513   predicate(!UseAPX);
12514   match(Set dst (URShiftL dst shift));
12515   effect(KILL cr);
12516 
12517   format %{ "shrq    $dst, $shift" %}
12518   ins_encode %{
12519     __ shrq($dst$$Register, $shift$$constant);
12520   %}
12521   ins_pipe(ialu_reg);
12522 %}
12523 
12524 // Logical Shift Right by 8-bit immediate
12525 instruct shrL_rReg_imm_ndd(rRegL dst, rRegL src, immI8 shift, rFlagsReg cr)
12526 %{
12527   predicate(UseAPX);
12528   match(Set dst (URShiftL src shift));
12529   effect(KILL cr);
12530   flag(PD::Flag_ndd_demotable_opr1);
12531 
12532   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12533   ins_encode %{
12534     __ eshrq($dst$$Register, $src$$Register, $shift$$constant, false);
12535   %}
12536   ins_pipe(ialu_reg);
12537 %}
12538 
12539 instruct shrL_rReg_mem_imm_ndd(rRegL dst, memory src, immI8 shift, rFlagsReg cr)
12540 %{
12541   predicate(UseAPX);
12542   match(Set dst (URShiftL (LoadL src) shift));
12543   effect(KILL cr);
12544 
12545   format %{ "eshrq    $dst, $src, $shift\t# long (ndd)" %}
12546   ins_encode %{
12547     __ eshrq($dst$$Register, $src$$Address, $shift$$constant, false);
12548   %}
12549   ins_pipe(ialu_reg);
12550 %}
12551 
12552 // Logical Shift Right by 8-bit immediate
12553 instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
12554 %{
12555   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12556   effect(KILL cr);
12557 
12558   format %{ "shrq    $dst, $shift" %}
12559   ins_encode %{
12560     __ shrq($dst$$Address, $shift$$constant);
12561   %}
12562   ins_pipe(ialu_mem_imm);
12563 %}
12564 
12565 // Logical Shift Right by variable
12566 instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12567 %{
12568   predicate(!VM_Version::supports_bmi2());
12569   match(Set dst (URShiftL dst shift));
12570   effect(KILL cr);
12571 
12572   format %{ "shrq    $dst, $shift" %}
12573   ins_encode %{
12574     __ shrq($dst$$Register);
12575   %}
12576   ins_pipe(ialu_reg_reg);
12577 %}
12578 
12579 // Logical Shift Right by variable
12580 instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
12581 %{
12582   predicate(!VM_Version::supports_bmi2());
12583   match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
12584   effect(KILL cr);
12585 
12586   format %{ "shrq    $dst, $shift" %}
12587   ins_encode %{
12588     __ shrq($dst$$Address);
12589   %}
12590   ins_pipe(ialu_mem_reg);
12591 %}
12592 
12593 instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
12594 %{
12595   predicate(VM_Version::supports_bmi2());
12596   match(Set dst (URShiftL src shift));
12597 
12598   format %{ "shrxq   $dst, $src, $shift" %}
12599   ins_encode %{
12600     __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
12601   %}
12602   ins_pipe(ialu_reg_reg);
12603 %}
12604 
12605 instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
12606 %{
12607   predicate(VM_Version::supports_bmi2());
12608   match(Set dst (URShiftL (LoadL src) shift));
12609   ins_cost(175);
12610   format %{ "shrxq   $dst, $src, $shift" %}
12611   ins_encode %{
12612     __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
12613   %}
12614   ins_pipe(ialu_reg_mem);
12615 %}
12616 
12617 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
12618 // This idiom is used by the compiler for the i2b bytecode.
12619 instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
12620 %{
12621   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
12622 
12623   format %{ "movsbl  $dst, $src\t# i2b" %}
12624   ins_encode %{
12625     __ movsbl($dst$$Register, $src$$Register);
12626   %}
12627   ins_pipe(ialu_reg_reg);
12628 %}
12629 
12630 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
12631 // This idiom is used by the compiler the i2s bytecode.
12632 instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
12633 %{
12634   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
12635 
12636   format %{ "movswl  $dst, $src\t# i2s" %}
12637   ins_encode %{
12638     __ movswl($dst$$Register, $src$$Register);
12639   %}
12640   ins_pipe(ialu_reg_reg);
12641 %}
12642 
12643 // ROL/ROR instructions
12644 
12645 // Rotate left by constant.
12646 instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12647 %{
12648   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12649   match(Set dst (RotateLeft dst shift));
12650   effect(KILL cr);
12651   format %{ "roll    $dst, $shift" %}
12652   ins_encode %{
12653     __ roll($dst$$Register, $shift$$constant);
12654   %}
12655   ins_pipe(ialu_reg);
12656 %}
12657 
12658 instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
12659 %{
12660   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12661   match(Set dst (RotateLeft src shift));
12662   format %{ "rolxl   $dst, $src, $shift" %}
12663   ins_encode %{
12664     int shift = 32 - ($shift$$constant & 31);
12665     __ rorxl($dst$$Register, $src$$Register, shift);
12666   %}
12667   ins_pipe(ialu_reg_reg);
12668 %}
12669 
12670 instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
12671 %{
12672   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12673   match(Set dst (RotateLeft (LoadI src) shift));
12674   ins_cost(175);
12675   format %{ "rolxl   $dst, $src, $shift" %}
12676   ins_encode %{
12677     int shift = 32 - ($shift$$constant & 31);
12678     __ rorxl($dst$$Register, $src$$Address, shift);
12679   %}
12680   ins_pipe(ialu_reg_mem);
12681 %}
12682 
12683 // Rotate Left by variable
12684 instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12685 %{
12686   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12687   match(Set dst (RotateLeft dst shift));
12688   effect(KILL cr);
12689   format %{ "roll    $dst, $shift" %}
12690   ins_encode %{
12691     __ roll($dst$$Register);
12692   %}
12693   ins_pipe(ialu_reg_reg);
12694 %}
12695 
12696 // Rotate Left by variable
12697 instruct rolI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12698 %{
12699   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12700   match(Set dst (RotateLeft src shift));
12701   effect(KILL cr);
12702   flag(PD::Flag_ndd_demotable_opr1);
12703 
12704   format %{ "eroll    $dst, $src, $shift\t# rotate left (int ndd)" %}
12705   ins_encode %{
12706     __ eroll($dst$$Register, $src$$Register, false);
12707   %}
12708   ins_pipe(ialu_reg_reg);
12709 %}
12710 
12711 // Rotate Right by constant.
12712 instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
12713 %{
12714   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12715   match(Set dst (RotateRight dst shift));
12716   effect(KILL cr);
12717   format %{ "rorl    $dst, $shift" %}
12718   ins_encode %{
12719     __ rorl($dst$$Register, $shift$$constant);
12720   %}
12721   ins_pipe(ialu_reg);
12722 %}
12723 
12724 // Rotate Right by constant.
12725 instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
12726 %{
12727   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12728   match(Set dst (RotateRight src shift));
12729   format %{ "rorxl   $dst, $src, $shift" %}
12730   ins_encode %{
12731     __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
12732   %}
12733   ins_pipe(ialu_reg_reg);
12734 %}
12735 
12736 instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
12737 %{
12738   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
12739   match(Set dst (RotateRight (LoadI src) shift));
12740   ins_cost(175);
12741   format %{ "rorxl   $dst, $src, $shift" %}
12742   ins_encode %{
12743     __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
12744   %}
12745   ins_pipe(ialu_reg_mem);
12746 %}
12747 
12748 // Rotate Right by variable
12749 instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
12750 %{
12751   predicate(!UseAPX && n->bottom_type()->basic_type() == T_INT);
12752   match(Set dst (RotateRight dst shift));
12753   effect(KILL cr);
12754   format %{ "rorl    $dst, $shift" %}
12755   ins_encode %{
12756     __ rorl($dst$$Register);
12757   %}
12758   ins_pipe(ialu_reg_reg);
12759 %}
12760 
12761 // Rotate Right by variable
12762 instruct rorI_rReg_Var_ndd(rRegI dst, rRegI src, rcx_RegI shift, rFlagsReg cr)
12763 %{
12764   predicate(UseAPX && n->bottom_type()->basic_type() == T_INT);
12765   match(Set dst (RotateRight src shift));
12766   effect(KILL cr);
12767   flag(PD::Flag_ndd_demotable_opr1);
12768 
12769   format %{ "erorl    $dst, $src, $shift\t# rotate right(int ndd)" %}
12770   ins_encode %{
12771     __ erorl($dst$$Register, $src$$Register, false);
12772   %}
12773   ins_pipe(ialu_reg_reg);
12774 %}
12775 
12776 // Rotate Left by constant.
12777 instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12778 %{
12779   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12780   match(Set dst (RotateLeft dst shift));
12781   effect(KILL cr);
12782   format %{ "rolq    $dst, $shift" %}
12783   ins_encode %{
12784     __ rolq($dst$$Register, $shift$$constant);
12785   %}
12786   ins_pipe(ialu_reg);
12787 %}
12788 
12789 instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
12790 %{
12791   predicate(!UseAPX && VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12792   match(Set dst (RotateLeft src shift));
12793   format %{ "rolxq   $dst, $src, $shift" %}
12794   ins_encode %{
12795     int shift = 64 - ($shift$$constant & 63);
12796     __ rorxq($dst$$Register, $src$$Register, shift);
12797   %}
12798   ins_pipe(ialu_reg_reg);
12799 %}
12800 
12801 instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
12802 %{
12803   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12804   match(Set dst (RotateLeft (LoadL src) shift));
12805   ins_cost(175);
12806   format %{ "rolxq   $dst, $src, $shift" %}
12807   ins_encode %{
12808     int shift = 64 - ($shift$$constant & 63);
12809     __ rorxq($dst$$Register, $src$$Address, shift);
12810   %}
12811   ins_pipe(ialu_reg_mem);
12812 %}
12813 
12814 // Rotate Left by variable
12815 instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12816 %{
12817   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12818   match(Set dst (RotateLeft dst shift));
12819   effect(KILL cr);
12820 
12821   format %{ "rolq    $dst, $shift" %}
12822   ins_encode %{
12823     __ rolq($dst$$Register);
12824   %}
12825   ins_pipe(ialu_reg_reg);
12826 %}
12827 
12828 // Rotate Left by variable
12829 instruct rolL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12830 %{
12831   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12832   match(Set dst (RotateLeft src shift));
12833   effect(KILL cr);
12834   flag(PD::Flag_ndd_demotable_opr1);
12835 
12836   format %{ "erolq    $dst, $src, $shift\t# rotate left(long ndd)" %}
12837   ins_encode %{
12838     __ erolq($dst$$Register, $src$$Register, false);
12839   %}
12840   ins_pipe(ialu_reg_reg);
12841 %}
12842 
12843 // Rotate Right by constant.
12844 instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
12845 %{
12846   predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12847   match(Set dst (RotateRight dst shift));
12848   effect(KILL cr);
12849   format %{ "rorq    $dst, $shift" %}
12850   ins_encode %{
12851     __ rorq($dst$$Register, $shift$$constant);
12852   %}
12853   ins_pipe(ialu_reg);
12854 %}
12855 
12856 // Rotate Right by constant
12857 instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
12858 %{
12859   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12860   match(Set dst (RotateRight src shift));
12861   format %{ "rorxq   $dst, $src, $shift" %}
12862   ins_encode %{
12863     __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
12864   %}
12865   ins_pipe(ialu_reg_reg);
12866 %}
12867 
12868 instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
12869 %{
12870   predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
12871   match(Set dst (RotateRight (LoadL src) shift));
12872   ins_cost(175);
12873   format %{ "rorxq   $dst, $src, $shift" %}
12874   ins_encode %{
12875     __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
12876   %}
12877   ins_pipe(ialu_reg_mem);
12878 %}
12879 
12880 // Rotate Right by variable
12881 instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
12882 %{
12883   predicate(!UseAPX && n->bottom_type()->basic_type() == T_LONG);
12884   match(Set dst (RotateRight dst shift));
12885   effect(KILL cr);
12886   format %{ "rorq    $dst, $shift" %}
12887   ins_encode %{
12888     __ rorq($dst$$Register);
12889   %}
12890   ins_pipe(ialu_reg_reg);
12891 %}
12892 
12893 // Rotate Right by variable
12894 instruct rorL_rReg_Var_ndd(rRegL dst, rRegL src, rcx_RegI shift, rFlagsReg cr)
12895 %{
12896   predicate(UseAPX && n->bottom_type()->basic_type() == T_LONG);
12897   match(Set dst (RotateRight src shift));
12898   effect(KILL cr);
12899   flag(PD::Flag_ndd_demotable_opr1);
12900 
12901   format %{ "erorq    $dst, $src, $shift\t# rotate right(long ndd)" %}
12902   ins_encode %{
12903     __ erorq($dst$$Register, $src$$Register, false);
12904   %}
12905   ins_pipe(ialu_reg_reg);
12906 %}
12907 
12908 //----------------------------- CompressBits/ExpandBits ------------------------
12909 
12910 instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12911   predicate(n->bottom_type()->isa_long());
12912   match(Set dst (CompressBits src mask));
12913   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12914   ins_encode %{
12915     __ pextq($dst$$Register, $src$$Register, $mask$$Register);
12916   %}
12917   ins_pipe( pipe_slow );
12918 %}
12919 
12920 instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
12921   predicate(n->bottom_type()->isa_long());
12922   match(Set dst (ExpandBits src mask));
12923   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12924   ins_encode %{
12925     __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
12926   %}
12927   ins_pipe( pipe_slow );
12928 %}
12929 
12930 instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12931   predicate(n->bottom_type()->isa_long());
12932   match(Set dst (CompressBits src (LoadL mask)));
12933   format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
12934   ins_encode %{
12935     __ pextq($dst$$Register, $src$$Register, $mask$$Address);
12936   %}
12937   ins_pipe( pipe_slow );
12938 %}
12939 
12940 instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
12941   predicate(n->bottom_type()->isa_long());
12942   match(Set dst (ExpandBits src (LoadL mask)));
12943   format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
12944   ins_encode %{
12945     __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
12946   %}
12947   ins_pipe( pipe_slow );
12948 %}
12949 
12950 
12951 // Logical Instructions
12952 
12953 // Integer Logical Instructions
12954 
12955 // And Instructions
12956 // And Register with Register
12957 instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
12958 %{
12959   predicate(!UseAPX);
12960   match(Set dst (AndI dst src));
12961   effect(KILL cr);
12962   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
12963 
12964   format %{ "andl    $dst, $src\t# int" %}
12965   ins_encode %{
12966     __ andl($dst$$Register, $src$$Register);
12967   %}
12968   ins_pipe(ialu_reg_reg);
12969 %}
12970 
12971 // And Register with Register using New Data Destination (NDD)
12972 instruct andI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
12973 %{
12974   predicate(UseAPX);
12975   match(Set dst (AndI src1 src2));
12976   effect(KILL cr);
12977   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
12978 
12979   format %{ "eandl     $dst, $src1, $src2\t# int ndd" %}
12980   ins_encode %{
12981     __ eandl($dst$$Register, $src1$$Register, $src2$$Register, false);
12982 
12983   %}
12984   ins_pipe(ialu_reg_reg);
12985 %}
12986 
12987 // And Register with Immediate 255
12988 instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
12989 %{
12990   match(Set dst (AndI src mask));
12991 
12992   format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
12993   ins_encode %{
12994     __ movzbl($dst$$Register, $src$$Register);
12995   %}
12996   ins_pipe(ialu_reg);
12997 %}
12998 
12999 // And Register with Immediate 255 and promote to long
13000 instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
13001 %{
13002   match(Set dst (ConvI2L (AndI src mask)));
13003 
13004   format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
13005   ins_encode %{
13006     __ movzbl($dst$$Register, $src$$Register);
13007   %}
13008   ins_pipe(ialu_reg);
13009 %}
13010 
13011 // And Register with Immediate 65535
13012 instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
13013 %{
13014   match(Set dst (AndI src mask));
13015 
13016   format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
13017   ins_encode %{
13018     __ movzwl($dst$$Register, $src$$Register);
13019   %}
13020   ins_pipe(ialu_reg);
13021 %}
13022 
13023 // And Register with Immediate 65535 and promote to long
13024 instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
13025 %{
13026   match(Set dst (ConvI2L (AndI src mask)));
13027 
13028   format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
13029   ins_encode %{
13030     __ movzwl($dst$$Register, $src$$Register);
13031   %}
13032   ins_pipe(ialu_reg);
13033 %}
13034 
13035 // Can skip int2long conversions after AND with small bitmask
13036 instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
13037 %{
13038   predicate(VM_Version::supports_bmi2());
13039   ins_cost(125);
13040   effect(TEMP tmp, KILL cr);
13041   match(Set dst (ConvI2L (AndI src mask)));
13042   format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
13043   ins_encode %{
13044     __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
13045     __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
13046   %}
13047   ins_pipe(ialu_reg_reg);
13048 %}
13049 
13050 // And Register with Immediate
13051 instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13052 %{
13053   predicate(!UseAPX);
13054   match(Set dst (AndI dst src));
13055   effect(KILL cr);
13056   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13057 
13058   format %{ "andl    $dst, $src\t# int" %}
13059   ins_encode %{
13060     __ andl($dst$$Register, $src$$constant);
13061   %}
13062   ins_pipe(ialu_reg);
13063 %}
13064 
13065 instruct andI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13066 %{
13067   predicate(UseAPX);
13068   match(Set dst (AndI src1 src2));
13069   effect(KILL cr);
13070   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13071 
13072   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13073   ins_encode %{
13074     __ eandl($dst$$Register, $src1$$Register, $src2$$constant, false);
13075   %}
13076   ins_pipe(ialu_reg);
13077 %}
13078 
13079 instruct andI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13080 %{
13081   predicate(UseAPX);
13082   match(Set dst (AndI (LoadI src1) src2));
13083   effect(KILL cr);
13084   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13085 
13086   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13087   ins_encode %{
13088     __ eandl($dst$$Register, $src1$$Address, $src2$$constant, false);
13089   %}
13090   ins_pipe(ialu_reg);
13091 %}
13092 
13093 // And Register with Memory
13094 instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13095 %{
13096   predicate(!UseAPX);
13097   match(Set dst (AndI dst (LoadI src)));
13098   effect(KILL cr);
13099   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13100 
13101   ins_cost(150);
13102   format %{ "andl    $dst, $src\t# int" %}
13103   ins_encode %{
13104     __ andl($dst$$Register, $src$$Address);
13105   %}
13106   ins_pipe(ialu_reg_mem);
13107 %}
13108 
13109 instruct andI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13110 %{
13111   predicate(UseAPX);
13112   match(Set dst (AndI src1 (LoadI src2)));
13113   effect(KILL cr);
13114   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13115 
13116   ins_cost(150);
13117   format %{ "eandl    $dst, $src1, $src2\t# int ndd" %}
13118   ins_encode %{
13119     __ eandl($dst$$Register, $src1$$Register, $src2$$Address, false);
13120   %}
13121   ins_pipe(ialu_reg_mem);
13122 %}
13123 
13124 // And Memory with Register
13125 instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13126 %{
13127   match(Set dst (StoreB dst (AndI (LoadB dst) src)));
13128   effect(KILL cr);
13129   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13130 
13131   ins_cost(150);
13132   format %{ "andb    $dst, $src\t# byte" %}
13133   ins_encode %{
13134     __ andb($dst$$Address, $src$$Register);
13135   %}
13136   ins_pipe(ialu_mem_reg);
13137 %}
13138 
13139 instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13140 %{
13141   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13142   effect(KILL cr);
13143   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13144 
13145   ins_cost(150);
13146   format %{ "andl    $dst, $src\t# int" %}
13147   ins_encode %{
13148     __ andl($dst$$Address, $src$$Register);
13149   %}
13150   ins_pipe(ialu_mem_reg);
13151 %}
13152 
13153 // And Memory with Immediate
13154 instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
13155 %{
13156   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
13157   effect(KILL cr);
13158   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13159 
13160   ins_cost(125);
13161   format %{ "andl    $dst, $src\t# int" %}
13162   ins_encode %{
13163     __ andl($dst$$Address, $src$$constant);
13164   %}
13165   ins_pipe(ialu_mem_imm);
13166 %}
13167 
13168 // BMI1 instructions
13169 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
13170   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
13171   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13172   effect(KILL cr);
13173   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13174 
13175   ins_cost(125);
13176   format %{ "andnl  $dst, $src1, $src2" %}
13177 
13178   ins_encode %{
13179     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
13180   %}
13181   ins_pipe(ialu_reg_mem);
13182 %}
13183 
13184 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
13185   match(Set dst (AndI (XorI src1 minus_1) src2));
13186   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13187   effect(KILL cr);
13188   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13189 
13190   format %{ "andnl  $dst, $src1, $src2" %}
13191 
13192   ins_encode %{
13193     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
13194   %}
13195   ins_pipe(ialu_reg);
13196 %}
13197 
13198 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
13199   match(Set dst (AndI (SubI imm_zero src) src));
13200   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13201   effect(KILL cr);
13202   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13203 
13204   format %{ "blsil  $dst, $src" %}
13205 
13206   ins_encode %{
13207     __ blsil($dst$$Register, $src$$Register);
13208   %}
13209   ins_pipe(ialu_reg);
13210 %}
13211 
13212 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
13213   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
13214   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13215   effect(KILL cr);
13216   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13217 
13218   ins_cost(125);
13219   format %{ "blsil  $dst, $src" %}
13220 
13221   ins_encode %{
13222     __ blsil($dst$$Register, $src$$Address);
13223   %}
13224   ins_pipe(ialu_reg_mem);
13225 %}
13226 
13227 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13228 %{
13229   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
13230   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13231   effect(KILL cr);
13232   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13233 
13234   ins_cost(125);
13235   format %{ "blsmskl $dst, $src" %}
13236 
13237   ins_encode %{
13238     __ blsmskl($dst$$Register, $src$$Address);
13239   %}
13240   ins_pipe(ialu_reg_mem);
13241 %}
13242 
13243 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13244 %{
13245   match(Set dst (XorI (AddI src minus_1) src));
13246   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13247   effect(KILL cr);
13248   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13249 
13250   format %{ "blsmskl $dst, $src" %}
13251 
13252   ins_encode %{
13253     __ blsmskl($dst$$Register, $src$$Register);
13254   %}
13255 
13256   ins_pipe(ialu_reg);
13257 %}
13258 
13259 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
13260 %{
13261   match(Set dst (AndI (AddI src minus_1) src) );
13262   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13263   effect(KILL cr);
13264   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13265 
13266   format %{ "blsrl  $dst, $src" %}
13267 
13268   ins_encode %{
13269     __ blsrl($dst$$Register, $src$$Register);
13270   %}
13271 
13272   ins_pipe(ialu_reg_mem);
13273 %}
13274 
13275 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
13276 %{
13277   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
13278   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13279   effect(KILL cr);
13280   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13281 
13282   ins_cost(125);
13283   format %{ "blsrl  $dst, $src" %}
13284 
13285   ins_encode %{
13286     __ blsrl($dst$$Register, $src$$Address);
13287   %}
13288 
13289   ins_pipe(ialu_reg);
13290 %}
13291 
13292 // Or Instructions
13293 // Or Register with Register
13294 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13295 %{
13296   predicate(!UseAPX);
13297   match(Set dst (OrI dst src));
13298   effect(KILL cr);
13299   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13300 
13301   format %{ "orl     $dst, $src\t# int" %}
13302   ins_encode %{
13303     __ orl($dst$$Register, $src$$Register);
13304   %}
13305   ins_pipe(ialu_reg_reg);
13306 %}
13307 
13308 // Or Register with Register using New Data Destination (NDD)
13309 instruct orI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13310 %{
13311   predicate(UseAPX);
13312   match(Set dst (OrI src1 src2));
13313   effect(KILL cr);
13314   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13315 
13316   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13317   ins_encode %{
13318     __ eorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13319   %}
13320   ins_pipe(ialu_reg_reg);
13321 %}
13322 
13323 // Or Register with Immediate
13324 instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13325 %{
13326   predicate(!UseAPX);
13327   match(Set dst (OrI dst src));
13328   effect(KILL cr);
13329   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13330 
13331   format %{ "orl     $dst, $src\t# int" %}
13332   ins_encode %{
13333     __ orl($dst$$Register, $src$$constant);
13334   %}
13335   ins_pipe(ialu_reg);
13336 %}
13337 
13338 instruct orI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13339 %{
13340   predicate(UseAPX);
13341   match(Set dst (OrI src1 src2));
13342   effect(KILL cr);
13343   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13344 
13345   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13346   ins_encode %{
13347     __ eorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13348   %}
13349   ins_pipe(ialu_reg);
13350 %}
13351 
13352 instruct orI_rReg_imm_rReg_ndd(rRegI dst, immI src1, rRegI src2, rFlagsReg cr)
13353 %{
13354   predicate(UseAPX);
13355   match(Set dst (OrI src1 src2));
13356   effect(KILL cr);
13357   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13358 
13359   format %{ "eorl     $dst, $src2, $src1\t# int ndd" %}
13360   ins_encode %{
13361     __ eorl($dst$$Register, $src2$$Register, $src1$$constant, false);
13362   %}
13363   ins_pipe(ialu_reg);
13364 %}
13365 
13366 instruct orI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13367 %{
13368   predicate(UseAPX);
13369   match(Set dst (OrI (LoadI src1) src2));
13370   effect(KILL cr);
13371   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13372 
13373   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13374   ins_encode %{
13375     __ eorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13376   %}
13377   ins_pipe(ialu_reg);
13378 %}
13379 
13380 // Or Register with Memory
13381 instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13382 %{
13383   predicate(!UseAPX);
13384   match(Set dst (OrI dst (LoadI src)));
13385   effect(KILL cr);
13386   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13387 
13388   ins_cost(150);
13389   format %{ "orl     $dst, $src\t# int" %}
13390   ins_encode %{
13391     __ orl($dst$$Register, $src$$Address);
13392   %}
13393   ins_pipe(ialu_reg_mem);
13394 %}
13395 
13396 instruct orI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13397 %{
13398   predicate(UseAPX);
13399   match(Set dst (OrI src1 (LoadI src2)));
13400   effect(KILL cr);
13401   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13402 
13403   ins_cost(150);
13404   format %{ "eorl     $dst, $src1, $src2\t# int ndd" %}
13405   ins_encode %{
13406     __ eorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13407   %}
13408   ins_pipe(ialu_reg_mem);
13409 %}
13410 
13411 // Or Memory with Register
13412 instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13413 %{
13414   match(Set dst (StoreB dst (OrI (LoadB dst) src)));
13415   effect(KILL cr);
13416   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13417 
13418   ins_cost(150);
13419   format %{ "orb    $dst, $src\t# byte" %}
13420   ins_encode %{
13421     __ orb($dst$$Address, $src$$Register);
13422   %}
13423   ins_pipe(ialu_mem_reg);
13424 %}
13425 
13426 instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13427 %{
13428   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13429   effect(KILL cr);
13430   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13431 
13432   ins_cost(150);
13433   format %{ "orl     $dst, $src\t# int" %}
13434   ins_encode %{
13435     __ orl($dst$$Address, $src$$Register);
13436   %}
13437   ins_pipe(ialu_mem_reg);
13438 %}
13439 
13440 // Or Memory with Immediate
13441 instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
13442 %{
13443   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
13444   effect(KILL cr);
13445   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13446 
13447   ins_cost(125);
13448   format %{ "orl     $dst, $src\t# int" %}
13449   ins_encode %{
13450     __ orl($dst$$Address, $src$$constant);
13451   %}
13452   ins_pipe(ialu_mem_imm);
13453 %}
13454 
13455 // Xor Instructions
13456 // Xor Register with Register
13457 instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
13458 %{
13459   predicate(!UseAPX);
13460   match(Set dst (XorI dst src));
13461   effect(KILL cr);
13462   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13463 
13464   format %{ "xorl    $dst, $src\t# int" %}
13465   ins_encode %{
13466     __ xorl($dst$$Register, $src$$Register);
13467   %}
13468   ins_pipe(ialu_reg_reg);
13469 %}
13470 
13471 // Xor Register with Register using New Data Destination (NDD)
13472 instruct xorI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
13473 %{
13474   predicate(UseAPX);
13475   match(Set dst (XorI src1 src2));
13476   effect(KILL cr);
13477   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13478 
13479   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13480   ins_encode %{
13481     __ exorl($dst$$Register, $src1$$Register, $src2$$Register, false);
13482   %}
13483   ins_pipe(ialu_reg_reg);
13484 %}
13485 
13486 // Xor Register with Immediate -1
13487 instruct xorI_rReg_im1(rRegI dst, immI_M1 imm)
13488 %{
13489   predicate(!UseAPX);
13490   match(Set dst (XorI dst imm));
13491 
13492   format %{ "notl    $dst" %}
13493   ins_encode %{
13494      __ notl($dst$$Register);
13495   %}
13496   ins_pipe(ialu_reg);
13497 %}
13498 
13499 instruct xorI_rReg_im1_ndd(rRegI dst, rRegI src, immI_M1 imm)
13500 %{
13501   match(Set dst (XorI src imm));
13502   predicate(UseAPX);
13503   flag(PD::Flag_ndd_demotable_opr1);
13504 
13505   format %{ "enotl    $dst, $src" %}
13506   ins_encode %{
13507      __ enotl($dst$$Register, $src$$Register);
13508   %}
13509   ins_pipe(ialu_reg);
13510 %}
13511 
13512 // Xor Register with Immediate
13513 instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
13514 %{
13515   // Strict predicate check to make selection of xorI_rReg_im1 cost agnostic if immI src is -1.
13516   predicate(!UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13517   match(Set dst (XorI dst src));
13518   effect(KILL cr);
13519   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13520 
13521   format %{ "xorl    $dst, $src\t# int" %}
13522   ins_encode %{
13523     __ xorl($dst$$Register, $src$$constant);
13524   %}
13525   ins_pipe(ialu_reg);
13526 %}
13527 
13528 instruct xorI_rReg_rReg_imm_ndd(rRegI dst, rRegI src1, immI src2, rFlagsReg cr)
13529 %{
13530   // Strict predicate check to make selection of xorI_rReg_im1_ndd cost agnostic if immI src2 is -1.
13531   predicate(UseAPX && n->in(2)->bottom_type()->is_int()->get_con() != -1);
13532   match(Set dst (XorI src1 src2));
13533   effect(KILL cr);
13534   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13535 
13536   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13537   ins_encode %{
13538     __ exorl($dst$$Register, $src1$$Register, $src2$$constant, false);
13539   %}
13540   ins_pipe(ialu_reg);
13541 %}
13542 
13543 // Xor Memory with Immediate
13544 instruct xorI_rReg_mem_imm_ndd(rRegI dst, memory src1, immI src2, rFlagsReg cr)
13545 %{
13546   predicate(UseAPX);
13547   match(Set dst (XorI (LoadI src1) src2));
13548   effect(KILL cr);
13549   ins_cost(150);
13550   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13551 
13552   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13553   ins_encode %{
13554     __ exorl($dst$$Register, $src1$$Address, $src2$$constant, false);
13555   %}
13556   ins_pipe(ialu_reg);
13557 %}
13558 
13559 // Xor Register with Memory
13560 instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
13561 %{
13562   predicate(!UseAPX);
13563   match(Set dst (XorI dst (LoadI src)));
13564   effect(KILL cr);
13565   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13566 
13567   ins_cost(150);
13568   format %{ "xorl    $dst, $src\t# int" %}
13569   ins_encode %{
13570     __ xorl($dst$$Register, $src$$Address);
13571   %}
13572   ins_pipe(ialu_reg_mem);
13573 %}
13574 
13575 instruct xorI_rReg_rReg_mem_ndd(rRegI dst, rRegI src1, memory src2, rFlagsReg cr)
13576 %{
13577   predicate(UseAPX);
13578   match(Set dst (XorI src1 (LoadI src2)));
13579   effect(KILL cr);
13580   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13581 
13582   ins_cost(150);
13583   format %{ "exorl    $dst, $src1, $src2\t# int ndd" %}
13584   ins_encode %{
13585     __ exorl($dst$$Register, $src1$$Register, $src2$$Address, false);
13586   %}
13587   ins_pipe(ialu_reg_mem);
13588 %}
13589 
13590 // Xor Memory with Register
13591 instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13592 %{
13593   match(Set dst (StoreB dst (XorI (LoadB dst) src)));
13594   effect(KILL cr);
13595   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13596 
13597   ins_cost(150);
13598   format %{ "xorb    $dst, $src\t# byte" %}
13599   ins_encode %{
13600     __ xorb($dst$$Address, $src$$Register);
13601   %}
13602   ins_pipe(ialu_mem_reg);
13603 %}
13604 
13605 instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
13606 %{
13607   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13608   effect(KILL cr);
13609   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13610 
13611   ins_cost(150);
13612   format %{ "xorl    $dst, $src\t# int" %}
13613   ins_encode %{
13614     __ xorl($dst$$Address, $src$$Register);
13615   %}
13616   ins_pipe(ialu_mem_reg);
13617 %}
13618 
13619 // Xor Memory with Immediate
13620 instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
13621 %{
13622   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
13623   effect(KILL cr);
13624   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13625 
13626   ins_cost(125);
13627   format %{ "xorl    $dst, $src\t# int" %}
13628   ins_encode %{
13629     __ xorl($dst$$Address, $src$$constant);
13630   %}
13631   ins_pipe(ialu_mem_imm);
13632 %}
13633 
13634 
13635 // Long Logical Instructions
13636 
13637 // And Instructions
13638 // And Register with Register
13639 instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13640 %{
13641   predicate(!UseAPX);
13642   match(Set dst (AndL dst src));
13643   effect(KILL cr);
13644   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13645 
13646   format %{ "andq    $dst, $src\t# long" %}
13647   ins_encode %{
13648     __ andq($dst$$Register, $src$$Register);
13649   %}
13650   ins_pipe(ialu_reg_reg);
13651 %}
13652 
13653 // And Register with Register using New Data Destination (NDD)
13654 instruct andL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13655 %{
13656   predicate(UseAPX);
13657   match(Set dst (AndL src1 src2));
13658   effect(KILL cr);
13659   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13660 
13661   format %{ "eandq     $dst, $src1, $src2\t# long ndd" %}
13662   ins_encode %{
13663     __ eandq($dst$$Register, $src1$$Register, $src2$$Register, false);
13664 
13665   %}
13666   ins_pipe(ialu_reg_reg);
13667 %}
13668 
13669 // And Register with Immediate 255
13670 instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
13671 %{
13672   match(Set dst (AndL src mask));
13673 
13674   format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
13675   ins_encode %{
13676     // movzbl zeroes out the upper 32-bit and does not need REX.W
13677     __ movzbl($dst$$Register, $src$$Register);
13678   %}
13679   ins_pipe(ialu_reg);
13680 %}
13681 
13682 // And Register with Immediate 65535
13683 instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
13684 %{
13685   match(Set dst (AndL src mask));
13686 
13687   format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
13688   ins_encode %{
13689     // movzwl zeroes out the upper 32-bit and does not need REX.W
13690     __ movzwl($dst$$Register, $src$$Register);
13691   %}
13692   ins_pipe(ialu_reg);
13693 %}
13694 
13695 // And Register with Immediate
13696 instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13697 %{
13698   predicate(!UseAPX);
13699   match(Set dst (AndL dst src));
13700   effect(KILL cr);
13701   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13702 
13703   format %{ "andq    $dst, $src\t# long" %}
13704   ins_encode %{
13705     __ andq($dst$$Register, $src$$constant);
13706   %}
13707   ins_pipe(ialu_reg);
13708 %}
13709 
13710 instruct andL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
13711 %{
13712   predicate(UseAPX);
13713   match(Set dst (AndL src1 src2));
13714   effect(KILL cr);
13715   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
13716 
13717   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13718   ins_encode %{
13719     __ eandq($dst$$Register, $src1$$Register, $src2$$constant, false);
13720   %}
13721   ins_pipe(ialu_reg);
13722 %}
13723 
13724 instruct andL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
13725 %{
13726   predicate(UseAPX);
13727   match(Set dst (AndL (LoadL src1) src2));
13728   effect(KILL cr);
13729   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13730 
13731   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13732   ins_encode %{
13733     __ eandq($dst$$Register, $src1$$Address, $src2$$constant, false);
13734   %}
13735   ins_pipe(ialu_reg);
13736 %}
13737 
13738 // And Register with Memory
13739 instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
13740 %{
13741   predicate(!UseAPX);
13742   match(Set dst (AndL dst (LoadL src)));
13743   effect(KILL cr);
13744   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13745 
13746   ins_cost(150);
13747   format %{ "andq    $dst, $src\t# long" %}
13748   ins_encode %{
13749     __ andq($dst$$Register, $src$$Address);
13750   %}
13751   ins_pipe(ialu_reg_mem);
13752 %}
13753 
13754 instruct andL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
13755 %{
13756   predicate(UseAPX);
13757   match(Set dst (AndL src1 (LoadL src2)));
13758   effect(KILL cr);
13759   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13760 
13761   ins_cost(150);
13762   format %{ "eandq    $dst, $src1, $src2\t# long ndd" %}
13763   ins_encode %{
13764     __ eandq($dst$$Register, $src1$$Register, $src2$$Address, false);
13765   %}
13766   ins_pipe(ialu_reg_mem);
13767 %}
13768 
13769 // And Memory with Register
13770 instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
13771 %{
13772   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13773   effect(KILL cr);
13774   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13775 
13776   ins_cost(150);
13777   format %{ "andq    $dst, $src\t# long" %}
13778   ins_encode %{
13779     __ andq($dst$$Address, $src$$Register);
13780   %}
13781   ins_pipe(ialu_mem_reg);
13782 %}
13783 
13784 // And Memory with Immediate
13785 instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
13786 %{
13787   match(Set dst (StoreL dst (AndL (LoadL dst) src)));
13788   effect(KILL cr);
13789   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13790 
13791   ins_cost(125);
13792   format %{ "andq    $dst, $src\t# long" %}
13793   ins_encode %{
13794     __ andq($dst$$Address, $src$$constant);
13795   %}
13796   ins_pipe(ialu_mem_imm);
13797 %}
13798 
13799 instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
13800 %{
13801   // con should be a pure 64-bit immediate given that not(con) is a power of 2
13802   // because AND/OR works well enough for 8/32-bit values.
13803   predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
13804 
13805   match(Set dst (StoreL dst (AndL (LoadL dst) con)));
13806   effect(KILL cr);
13807 
13808   ins_cost(125);
13809   format %{ "btrq    $dst, log2(not($con))\t# long" %}
13810   ins_encode %{
13811     __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
13812   %}
13813   ins_pipe(ialu_mem_imm);
13814 %}
13815 
13816 // BMI1 instructions
13817 instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
13818   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
13819   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13820   effect(KILL cr);
13821   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13822 
13823   ins_cost(125);
13824   format %{ "andnq  $dst, $src1, $src2" %}
13825 
13826   ins_encode %{
13827     __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
13828   %}
13829   ins_pipe(ialu_reg_mem);
13830 %}
13831 
13832 instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
13833   match(Set dst (AndL (XorL src1 minus_1) src2));
13834   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13835   effect(KILL cr);
13836   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13837 
13838   format %{ "andnq  $dst, $src1, $src2" %}
13839 
13840   ins_encode %{
13841   __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
13842   %}
13843   ins_pipe(ialu_reg_mem);
13844 %}
13845 
13846 instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
13847   match(Set dst (AndL (SubL imm_zero src) src));
13848   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13849   effect(KILL cr);
13850   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13851 
13852   format %{ "blsiq  $dst, $src" %}
13853 
13854   ins_encode %{
13855     __ blsiq($dst$$Register, $src$$Register);
13856   %}
13857   ins_pipe(ialu_reg);
13858 %}
13859 
13860 instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
13861   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
13862   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13863   effect(KILL cr);
13864   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13865 
13866   ins_cost(125);
13867   format %{ "blsiq  $dst, $src" %}
13868 
13869   ins_encode %{
13870     __ blsiq($dst$$Register, $src$$Address);
13871   %}
13872   ins_pipe(ialu_reg_mem);
13873 %}
13874 
13875 instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13876 %{
13877   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
13878   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13879   effect(KILL cr);
13880   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13881 
13882   ins_cost(125);
13883   format %{ "blsmskq $dst, $src" %}
13884 
13885   ins_encode %{
13886     __ blsmskq($dst$$Register, $src$$Address);
13887   %}
13888   ins_pipe(ialu_reg_mem);
13889 %}
13890 
13891 instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13892 %{
13893   match(Set dst (XorL (AddL src minus_1) src));
13894   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13895   effect(KILL cr);
13896   flag(PD::Flag_sets_sign_flag, PD::Flag_clears_zero_flag, PD::Flag_clears_overflow_flag);
13897 
13898   format %{ "blsmskq $dst, $src" %}
13899 
13900   ins_encode %{
13901     __ blsmskq($dst$$Register, $src$$Register);
13902   %}
13903 
13904   ins_pipe(ialu_reg);
13905 %}
13906 
13907 instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
13908 %{
13909   match(Set dst (AndL (AddL src minus_1) src) );
13910   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13911   effect(KILL cr);
13912   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13913 
13914   format %{ "blsrq  $dst, $src" %}
13915 
13916   ins_encode %{
13917     __ blsrq($dst$$Register, $src$$Register);
13918   %}
13919 
13920   ins_pipe(ialu_reg);
13921 %}
13922 
13923 instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
13924 %{
13925   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
13926   predicate(VM_Version::supports_bmi1() && VM_Version::supports_avx());
13927   effect(KILL cr);
13928   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_clears_overflow_flag);
13929 
13930   ins_cost(125);
13931   format %{ "blsrq  $dst, $src" %}
13932 
13933   ins_encode %{
13934     __ blsrq($dst$$Register, $src$$Address);
13935   %}
13936 
13937   ins_pipe(ialu_reg);
13938 %}
13939 
13940 // Or Instructions
13941 // Or Register with Register
13942 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
13943 %{
13944   predicate(!UseAPX);
13945   match(Set dst (OrL dst src));
13946   effect(KILL cr);
13947   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13948 
13949   format %{ "orq     $dst, $src\t# long" %}
13950   ins_encode %{
13951     __ orq($dst$$Register, $src$$Register);
13952   %}
13953   ins_pipe(ialu_reg_reg);
13954 %}
13955 
13956 // Or Register with Register using New Data Destination (NDD)
13957 instruct orL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
13958 %{
13959   predicate(UseAPX);
13960   match(Set dst (OrL src1 src2));
13961   effect(KILL cr);
13962   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
13963 
13964   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13965   ins_encode %{
13966     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13967 
13968   %}
13969   ins_pipe(ialu_reg_reg);
13970 %}
13971 
13972 // Use any_RegP to match R15 (TLS register) without spilling.
13973 instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
13974   match(Set dst (OrL dst (CastP2X src)));
13975   effect(KILL cr);
13976   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13977 
13978   format %{ "orq     $dst, $src\t# long" %}
13979   ins_encode %{
13980     __ orq($dst$$Register, $src$$Register);
13981   %}
13982   ins_pipe(ialu_reg_reg);
13983 %}
13984 
13985 instruct orL_rReg_castP2X_ndd(rRegL dst, any_RegP src1, any_RegP src2, rFlagsReg cr) %{
13986   match(Set dst (OrL src1 (CastP2X src2)));
13987   effect(KILL cr);
13988   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
13989 
13990   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
13991   ins_encode %{
13992     __ eorq($dst$$Register, $src1$$Register, $src2$$Register, false);
13993   %}
13994   ins_pipe(ialu_reg_reg);
13995 %}
13996 
13997 // Or Register with Immediate
13998 instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
13999 %{
14000   predicate(!UseAPX);
14001   match(Set dst (OrL dst src));
14002   effect(KILL cr);
14003   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14004 
14005   format %{ "orq     $dst, $src\t# long" %}
14006   ins_encode %{
14007     __ orq($dst$$Register, $src$$constant);
14008   %}
14009   ins_pipe(ialu_reg);
14010 %}
14011 
14012 instruct orL_rReg_rReg_imm_ndd(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14013 %{
14014   predicate(UseAPX);
14015   match(Set dst (OrL src1 src2));
14016   effect(KILL cr);
14017   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14018 
14019   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14020   ins_encode %{
14021     __ eorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14022   %}
14023   ins_pipe(ialu_reg);
14024 %}
14025 
14026 instruct orL_rReg_imm_rReg_ndd(rRegL dst, immL32 src1, rRegL src2, rFlagsReg cr)
14027 %{
14028   predicate(UseAPX);
14029   match(Set dst (OrL src1 src2));
14030   effect(KILL cr);
14031   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14032 
14033   format %{ "eorq     $dst, $src2, $src1\t# long ndd" %}
14034   ins_encode %{
14035     __ eorq($dst$$Register, $src2$$Register, $src1$$constant, false);
14036   %}
14037   ins_pipe(ialu_reg);
14038 %}
14039 
14040 // Or Memory with Immediate
14041 instruct orL_rReg_mem_imm_ndd(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14042 %{
14043   predicate(UseAPX);
14044   match(Set dst (OrL (LoadL src1) src2));
14045   effect(KILL cr);
14046   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14047 
14048   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14049   ins_encode %{
14050     __ eorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14051   %}
14052   ins_pipe(ialu_reg);
14053 %}
14054 
14055 // Or Register with Memory
14056 instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14057 %{
14058   predicate(!UseAPX);
14059   match(Set dst (OrL dst (LoadL src)));
14060   effect(KILL cr);
14061   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14062 
14063   ins_cost(150);
14064   format %{ "orq     $dst, $src\t# long" %}
14065   ins_encode %{
14066     __ orq($dst$$Register, $src$$Address);
14067   %}
14068   ins_pipe(ialu_reg_mem);
14069 %}
14070 
14071 instruct orL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14072 %{
14073   predicate(UseAPX);
14074   match(Set dst (OrL src1 (LoadL src2)));
14075   effect(KILL cr);
14076   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14077 
14078   ins_cost(150);
14079   format %{ "eorq     $dst, $src1, $src2\t# long ndd" %}
14080   ins_encode %{
14081     __ eorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14082   %}
14083   ins_pipe(ialu_reg_mem);
14084 %}
14085 
14086 // Or Memory with Register
14087 instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14088 %{
14089   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14090   effect(KILL cr);
14091   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14092 
14093   ins_cost(150);
14094   format %{ "orq     $dst, $src\t# long" %}
14095   ins_encode %{
14096     __ orq($dst$$Address, $src$$Register);
14097   %}
14098   ins_pipe(ialu_mem_reg);
14099 %}
14100 
14101 // Or Memory with Immediate
14102 instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14103 %{
14104   match(Set dst (StoreL dst (OrL (LoadL dst) src)));
14105   effect(KILL cr);
14106   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14107 
14108   ins_cost(125);
14109   format %{ "orq     $dst, $src\t# long" %}
14110   ins_encode %{
14111     __ orq($dst$$Address, $src$$constant);
14112   %}
14113   ins_pipe(ialu_mem_imm);
14114 %}
14115 
14116 instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
14117 %{
14118   // con should be a pure 64-bit power of 2 immediate
14119   // because AND/OR works well enough for 8/32-bit values.
14120   predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
14121 
14122   match(Set dst (StoreL dst (OrL (LoadL dst) con)));
14123   effect(KILL cr);
14124 
14125   ins_cost(125);
14126   format %{ "btsq    $dst, log2($con)\t# long" %}
14127   ins_encode %{
14128     __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
14129   %}
14130   ins_pipe(ialu_mem_imm);
14131 %}
14132 
14133 // Xor Instructions
14134 // Xor Register with Register
14135 instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
14136 %{
14137   predicate(!UseAPX);
14138   match(Set dst (XorL dst src));
14139   effect(KILL cr);
14140   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14141 
14142   format %{ "xorq    $dst, $src\t# long" %}
14143   ins_encode %{
14144     __ xorq($dst$$Register, $src$$Register);
14145   %}
14146   ins_pipe(ialu_reg_reg);
14147 %}
14148 
14149 // Xor Register with Register using New Data Destination (NDD)
14150 instruct xorL_rReg_ndd(rRegL dst, rRegL src1, rRegL src2, rFlagsReg cr)
14151 %{
14152   predicate(UseAPX);
14153   match(Set dst (XorL src1 src2));
14154   effect(KILL cr);
14155   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14156 
14157   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14158   ins_encode %{
14159     __ exorq($dst$$Register, $src1$$Register, $src2$$Register, false);
14160   %}
14161   ins_pipe(ialu_reg_reg);
14162 %}
14163 
14164 // Xor Register with Immediate -1
14165 instruct xorL_rReg_im1(rRegL dst, immL_M1 imm)
14166 %{
14167   predicate(!UseAPX);
14168   match(Set dst (XorL dst imm));
14169 
14170   format %{ "notq   $dst" %}
14171   ins_encode %{
14172      __ notq($dst$$Register);
14173   %}
14174   ins_pipe(ialu_reg);
14175 %}
14176 
14177 instruct xorL_rReg_im1_ndd(rRegL dst,rRegL src, immL_M1 imm)
14178 %{
14179   predicate(UseAPX);
14180   match(Set dst (XorL src imm));
14181   flag(PD::Flag_ndd_demotable_opr1);
14182 
14183   format %{ "enotq   $dst, $src" %}
14184   ins_encode %{
14185     __ enotq($dst$$Register, $src$$Register);
14186   %}
14187   ins_pipe(ialu_reg);
14188 %}
14189 
14190 // Xor Register with Immediate
14191 instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
14192 %{
14193   // Strict predicate check to make selection of xorL_rReg_im1 cost agnostic if immL32 src is -1.
14194   predicate(!UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14195   match(Set dst (XorL dst src));
14196   effect(KILL cr);
14197   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14198 
14199   format %{ "xorq    $dst, $src\t# long" %}
14200   ins_encode %{
14201     __ xorq($dst$$Register, $src$$constant);
14202   %}
14203   ins_pipe(ialu_reg);
14204 %}
14205 
14206 instruct xorL_rReg_rReg_imm(rRegL dst, rRegL src1, immL32 src2, rFlagsReg cr)
14207 %{
14208   // Strict predicate check to make selection of xorL_rReg_im1_ndd cost agnostic if immL32 src2 is -1.
14209   predicate(UseAPX && n->in(2)->bottom_type()->is_long()->get_con() != -1L);
14210   match(Set dst (XorL src1 src2));
14211   effect(KILL cr);
14212   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1);
14213 
14214   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14215   ins_encode %{
14216     __ exorq($dst$$Register, $src1$$Register, $src2$$constant, false);
14217   %}
14218   ins_pipe(ialu_reg);
14219 %}
14220 
14221 // Xor Memory with Immediate
14222 instruct xorL_rReg_mem_imm(rRegL dst, memory src1, immL32 src2, rFlagsReg cr)
14223 %{
14224   predicate(UseAPX);
14225   match(Set dst (XorL (LoadL src1) src2));
14226   effect(KILL cr);
14227   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14228   ins_cost(150);
14229 
14230   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14231   ins_encode %{
14232     __ exorq($dst$$Register, $src1$$Address, $src2$$constant, false);
14233   %}
14234   ins_pipe(ialu_reg);
14235 %}
14236 
14237 // Xor Register with Memory
14238 instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
14239 %{
14240   predicate(!UseAPX);
14241   match(Set dst (XorL dst (LoadL src)));
14242   effect(KILL cr);
14243   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14244 
14245   ins_cost(150);
14246   format %{ "xorq    $dst, $src\t# long" %}
14247   ins_encode %{
14248     __ xorq($dst$$Register, $src$$Address);
14249   %}
14250   ins_pipe(ialu_reg_mem);
14251 %}
14252 
14253 instruct xorL_rReg_rReg_mem_ndd(rRegL dst, rRegL src1, memory src2, rFlagsReg cr)
14254 %{
14255   predicate(UseAPX);
14256   match(Set dst (XorL src1 (LoadL src2)));
14257   effect(KILL cr);
14258   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag, PD::Flag_ndd_demotable_opr1, PD::Flag_ndd_demotable_opr2);
14259 
14260   ins_cost(150);
14261   format %{ "exorq    $dst, $src1, $src2\t# long ndd" %}
14262   ins_encode %{
14263     __ exorq($dst$$Register, $src1$$Register, $src2$$Address, false);
14264   %}
14265   ins_pipe(ialu_reg_mem);
14266 %}
14267 
14268 // Xor Memory with Register
14269 instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
14270 %{
14271   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14272   effect(KILL cr);
14273   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14274 
14275   ins_cost(150);
14276   format %{ "xorq    $dst, $src\t# long" %}
14277   ins_encode %{
14278     __ xorq($dst$$Address, $src$$Register);
14279   %}
14280   ins_pipe(ialu_mem_reg);
14281 %}
14282 
14283 // Xor Memory with Immediate
14284 instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
14285 %{
14286   match(Set dst (StoreL dst (XorL (LoadL dst) src)));
14287   effect(KILL cr);
14288   flag(PD::Flag_sets_sign_flag, PD::Flag_sets_zero_flag, PD::Flag_sets_parity_flag, PD::Flag_clears_overflow_flag, PD::Flag_clears_carry_flag);
14289 
14290   ins_cost(125);
14291   format %{ "xorq    $dst, $src\t# long" %}
14292   ins_encode %{
14293     __ xorq($dst$$Address, $src$$constant);
14294   %}
14295   ins_pipe(ialu_mem_imm);
14296 %}
14297 
14298 instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
14299 %{
14300   match(Set dst (CmpLTMask p q));
14301   effect(KILL cr);
14302 
14303   ins_cost(400);
14304   format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
14305             "setcc   $dst \t# emits setlt + movzbl or setzul for APX"
14306             "negl    $dst" %}
14307   ins_encode %{
14308     __ cmpl($p$$Register, $q$$Register);
14309     __ setcc(Assembler::less, $dst$$Register);
14310     __ negl($dst$$Register);
14311   %}
14312   ins_pipe(pipe_slow);
14313 %}
14314 
14315 instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
14316 %{
14317   match(Set dst (CmpLTMask dst zero));
14318   effect(KILL cr);
14319 
14320   ins_cost(100);
14321   format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
14322   ins_encode %{
14323     __ sarl($dst$$Register, 31);
14324   %}
14325   ins_pipe(ialu_reg);
14326 %}
14327 
14328 /* Better to save a register than avoid a branch */
14329 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14330 %{
14331   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
14332   effect(KILL cr);
14333   ins_cost(300);
14334   format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
14335             "jge     done\n\t"
14336             "addl    $p,$y\n"
14337             "done:   " %}
14338   ins_encode %{
14339     Register Rp = $p$$Register;
14340     Register Rq = $q$$Register;
14341     Register Ry = $y$$Register;
14342     Label done;
14343     __ subl(Rp, Rq);
14344     __ jccb(Assembler::greaterEqual, done);
14345     __ addl(Rp, Ry);
14346     __ bind(done);
14347   %}
14348   ins_pipe(pipe_cmplt);
14349 %}
14350 
14351 /* Better to save a register than avoid a branch */
14352 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
14353 %{
14354   match(Set y (AndI (CmpLTMask p q) y));
14355   effect(KILL cr);
14356 
14357   ins_cost(300);
14358 
14359   format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
14360             "jlt     done\n\t"
14361             "xorl    $y, $y\n"
14362             "done:   " %}
14363   ins_encode %{
14364     Register Rp = $p$$Register;
14365     Register Rq = $q$$Register;
14366     Register Ry = $y$$Register;
14367     Label done;
14368     __ cmpl(Rp, Rq);
14369     __ jccb(Assembler::less, done);
14370     __ xorl(Ry, Ry);
14371     __ bind(done);
14372   %}
14373   ins_pipe(pipe_cmplt);
14374 %}
14375 
14376 
14377 //---------- FP Instructions------------------------------------------------
14378 
14379 // Really expensive, avoid
14380 instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
14381 %{
14382   match(Set cr (CmpF src1 src2));
14383 
14384   ins_cost(500);
14385   format %{ "ucomiss $src1, $src2\n\t"
14386             "jnp,s   exit\n\t"
14387             "pushfq\t# saw NaN, set CF\n\t"
14388             "andq    [rsp], #0xffffff2b\n\t"
14389             "popfq\n"
14390     "exit:" %}
14391   ins_encode %{
14392     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14393     emit_cmpfp_fixup(masm);
14394   %}
14395   ins_pipe(pipe_slow);
14396 %}
14397 
14398 instruct cmpF_cc_regCF(rFlagsRegUCF cr, regF src1, regF src2) %{
14399   match(Set cr (CmpF src1 src2));
14400 
14401   ins_cost(100);
14402   format %{ "ucomiss $src1, $src2" %}
14403   ins_encode %{
14404     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14405   %}
14406   ins_pipe(pipe_slow);
14407 %}
14408 
14409 instruct cmpF_cc_regCFE(rFlagsRegUCFE cr, regF src1, regF src2) %{
14410   match(Set cr (CmpF src1 src2));
14411 
14412   ins_cost(100);
14413   format %{ "evucomxss $src1, $src2" %}
14414   ins_encode %{
14415     __ evucomxss($src1$$XMMRegister, $src2$$XMMRegister);
14416   %}
14417   ins_pipe(pipe_slow);
14418 %}
14419 
14420 instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
14421   match(Set cr (CmpF src1 (LoadF src2)));
14422 
14423   ins_cost(100);
14424   format %{ "ucomiss $src1, $src2" %}
14425   ins_encode %{
14426     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14427   %}
14428   ins_pipe(pipe_slow);
14429 %}
14430 
14431 instruct cmpF_cc_memCFE(rFlagsRegUCFE cr, regF src1, memory src2) %{
14432   match(Set cr (CmpF src1 (LoadF src2)));
14433 
14434   ins_cost(100);
14435   format %{ "evucomxss $src1, $src2" %}
14436   ins_encode %{
14437     __ evucomxss($src1$$XMMRegister, $src2$$Address);
14438   %}
14439   ins_pipe(pipe_slow);
14440 %}
14441 
14442 instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
14443   match(Set cr (CmpF src con));
14444 
14445   ins_cost(100);
14446   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14447   ins_encode %{
14448     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14449   %}
14450   ins_pipe(pipe_slow);
14451 %}
14452 
14453 instruct cmpF_cc_immCFE(rFlagsRegUCFE cr, regF src, immF con) %{
14454   match(Set cr (CmpF src con));
14455 
14456   ins_cost(100);
14457   format %{ "evucomxss $src, [$constantaddress]\t# load from constant table: float=$con" %}
14458   ins_encode %{
14459     __ evucomxss($src$$XMMRegister, $constantaddress($con));
14460   %}
14461   ins_pipe(pipe_slow);
14462 %}
14463 
14464 // Really expensive, avoid
14465 instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
14466 %{
14467   match(Set cr (CmpD src1 src2));
14468 
14469   ins_cost(500);
14470   format %{ "ucomisd $src1, $src2\n\t"
14471             "jnp,s   exit\n\t"
14472             "pushfq\t# saw NaN, set CF\n\t"
14473             "andq    [rsp], #0xffffff2b\n\t"
14474             "popfq\n"
14475     "exit:" %}
14476   ins_encode %{
14477     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14478     emit_cmpfp_fixup(masm);
14479   %}
14480   ins_pipe(pipe_slow);
14481 %}
14482 
14483 instruct cmpD_cc_regCF(rFlagsRegUCF cr, regD src1, regD src2) %{
14484   match(Set cr (CmpD src1 src2));
14485 
14486   ins_cost(100);
14487   format %{ "ucomisd $src1, $src2 test" %}
14488   ins_encode %{
14489     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14490   %}
14491   ins_pipe(pipe_slow);
14492 %}
14493 
14494 instruct cmpD_cc_regCFE(rFlagsRegUCFE cr, regD src1, regD src2) %{
14495   match(Set cr (CmpD src1 src2));
14496 
14497   ins_cost(100);
14498   format %{ "evucomxsd $src1, $src2 test" %}
14499   ins_encode %{
14500     __ evucomxsd($src1$$XMMRegister, $src2$$XMMRegister);
14501   %}
14502   ins_pipe(pipe_slow);
14503 %}
14504 
14505 instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
14506   match(Set cr (CmpD src1 (LoadD src2)));
14507 
14508   ins_cost(100);
14509   format %{ "ucomisd $src1, $src2" %}
14510   ins_encode %{
14511     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14512   %}
14513   ins_pipe(pipe_slow);
14514 %}
14515 
14516 instruct cmpD_cc_memCFE(rFlagsRegUCFE cr, regD src1, memory src2) %{
14517   match(Set cr (CmpD src1 (LoadD src2)));
14518 
14519   ins_cost(100);
14520   format %{ "evucomxsd $src1, $src2" %}
14521   ins_encode %{
14522     __ evucomxsd($src1$$XMMRegister, $src2$$Address);
14523   %}
14524   ins_pipe(pipe_slow);
14525 %}
14526 
14527 instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
14528   match(Set cr (CmpD src con));
14529   ins_cost(100);
14530   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14531   ins_encode %{
14532     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14533   %}
14534   ins_pipe(pipe_slow);
14535 %}
14536 
14537 instruct cmpD_cc_immCFE(rFlagsRegUCFE cr, regD src, immD con) %{
14538   match(Set cr (CmpD src con));
14539 
14540   ins_cost(100);
14541   format %{ "evucomxsd $src, [$constantaddress]\t# load from constant table: double=$con" %}
14542   ins_encode %{
14543     __ evucomxsd($src$$XMMRegister, $constantaddress($con));
14544   %}
14545   ins_pipe(pipe_slow);
14546 %}
14547 
14548 // Compare into -1,0,1
14549 instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
14550 %{
14551   match(Set dst (CmpF3 src1 src2));
14552   effect(KILL cr);
14553 
14554   ins_cost(275);
14555   format %{ "ucomiss $src1, $src2\n\t"
14556             "movl    $dst, #-1\n\t"
14557             "jp,s    done\n\t"
14558             "jb,s    done\n\t"
14559             "setne   $dst\n\t"
14560             "movzbl  $dst, $dst\n"
14561     "done:" %}
14562   ins_encode %{
14563     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
14564     emit_cmpfp3(masm, $dst$$Register);
14565   %}
14566   ins_pipe(pipe_slow);
14567 %}
14568 
14569 // Compare into -1,0,1
14570 instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
14571 %{
14572   match(Set dst (CmpF3 src1 (LoadF src2)));
14573   effect(KILL cr);
14574 
14575   ins_cost(275);
14576   format %{ "ucomiss $src1, $src2\n\t"
14577             "movl    $dst, #-1\n\t"
14578             "jp,s    done\n\t"
14579             "jb,s    done\n\t"
14580             "setne   $dst\n\t"
14581             "movzbl  $dst, $dst\n"
14582     "done:" %}
14583   ins_encode %{
14584     __ ucomiss($src1$$XMMRegister, $src2$$Address);
14585     emit_cmpfp3(masm, $dst$$Register);
14586   %}
14587   ins_pipe(pipe_slow);
14588 %}
14589 
14590 // Compare into -1,0,1
14591 instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
14592   match(Set dst (CmpF3 src con));
14593   effect(KILL cr);
14594 
14595   ins_cost(275);
14596   format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
14597             "movl    $dst, #-1\n\t"
14598             "jp,s    done\n\t"
14599             "jb,s    done\n\t"
14600             "setne   $dst\n\t"
14601             "movzbl  $dst, $dst\n"
14602     "done:" %}
14603   ins_encode %{
14604     __ ucomiss($src$$XMMRegister, $constantaddress($con));
14605     emit_cmpfp3(masm, $dst$$Register);
14606   %}
14607   ins_pipe(pipe_slow);
14608 %}
14609 
14610 // Compare into -1,0,1
14611 instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
14612 %{
14613   match(Set dst (CmpD3 src1 src2));
14614   effect(KILL cr);
14615 
14616   ins_cost(275);
14617   format %{ "ucomisd $src1, $src2\n\t"
14618             "movl    $dst, #-1\n\t"
14619             "jp,s    done\n\t"
14620             "jb,s    done\n\t"
14621             "setne   $dst\n\t"
14622             "movzbl  $dst, $dst\n"
14623     "done:" %}
14624   ins_encode %{
14625     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
14626     emit_cmpfp3(masm, $dst$$Register);
14627   %}
14628   ins_pipe(pipe_slow);
14629 %}
14630 
14631 // Compare into -1,0,1
14632 instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
14633 %{
14634   match(Set dst (CmpD3 src1 (LoadD src2)));
14635   effect(KILL cr);
14636 
14637   ins_cost(275);
14638   format %{ "ucomisd $src1, $src2\n\t"
14639             "movl    $dst, #-1\n\t"
14640             "jp,s    done\n\t"
14641             "jb,s    done\n\t"
14642             "setne   $dst\n\t"
14643             "movzbl  $dst, $dst\n"
14644     "done:" %}
14645   ins_encode %{
14646     __ ucomisd($src1$$XMMRegister, $src2$$Address);
14647     emit_cmpfp3(masm, $dst$$Register);
14648   %}
14649   ins_pipe(pipe_slow);
14650 %}
14651 
14652 // Compare into -1,0,1
14653 instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
14654   match(Set dst (CmpD3 src con));
14655   effect(KILL cr);
14656 
14657   ins_cost(275);
14658   format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
14659             "movl    $dst, #-1\n\t"
14660             "jp,s    done\n\t"
14661             "jb,s    done\n\t"
14662             "setne   $dst\n\t"
14663             "movzbl  $dst, $dst\n"
14664     "done:" %}
14665   ins_encode %{
14666     __ ucomisd($src$$XMMRegister, $constantaddress($con));
14667     emit_cmpfp3(masm, $dst$$Register);
14668   %}
14669   ins_pipe(pipe_slow);
14670 %}
14671 
14672 //----------Arithmetic Conversion Instructions---------------------------------
14673 
14674 instruct convF2D_reg_reg(regD dst, regF src)
14675 %{
14676   match(Set dst (ConvF2D src));
14677 
14678   format %{ "cvtss2sd $dst, $src" %}
14679   ins_encode %{
14680     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
14681   %}
14682   ins_pipe(pipe_slow); // XXX
14683 %}
14684 
14685 instruct convF2D_reg_mem(regD dst, memory src)
14686 %{
14687   predicate(UseAVX == 0);
14688   match(Set dst (ConvF2D (LoadF src)));
14689 
14690   format %{ "cvtss2sd $dst, $src" %}
14691   ins_encode %{
14692     __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
14693   %}
14694   ins_pipe(pipe_slow); // XXX
14695 %}
14696 
14697 instruct convD2F_reg_reg(regF dst, regD src)
14698 %{
14699   match(Set dst (ConvD2F src));
14700 
14701   format %{ "cvtsd2ss $dst, $src" %}
14702   ins_encode %{
14703     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
14704   %}
14705   ins_pipe(pipe_slow); // XXX
14706 %}
14707 
14708 instruct convD2F_reg_mem(regF dst, memory src)
14709 %{
14710   predicate(UseAVX == 0);
14711   match(Set dst (ConvD2F (LoadD src)));
14712 
14713   format %{ "cvtsd2ss $dst, $src" %}
14714   ins_encode %{
14715     __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
14716   %}
14717   ins_pipe(pipe_slow); // XXX
14718 %}
14719 
14720 // XXX do mem variants
14721 instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
14722 %{
14723   predicate(!VM_Version::supports_avx10_2());
14724   match(Set dst (ConvF2I src));
14725   effect(KILL cr);
14726   format %{ "convert_f2i $dst, $src" %}
14727   ins_encode %{
14728     __ convertF2I(T_INT, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14729   %}
14730   ins_pipe(pipe_slow);
14731 %}
14732 
14733 instruct convF2I_reg_reg_avx10_2(rRegI dst, regF src)
14734 %{
14735   predicate(VM_Version::supports_avx10_2());
14736   match(Set dst (ConvF2I src));
14737   format %{ "evcvttss2sisl $dst, $src" %}
14738   ins_encode %{
14739     __ evcvttss2sisl($dst$$Register, $src$$XMMRegister);
14740   %}
14741   ins_pipe(pipe_slow);
14742 %}
14743 
14744 instruct convF2I_reg_mem_avx10_2(rRegI dst, memory src)
14745 %{
14746   predicate(VM_Version::supports_avx10_2());
14747   match(Set dst (ConvF2I (LoadF src)));
14748   format %{ "evcvttss2sisl $dst, $src" %}
14749   ins_encode %{
14750     __ evcvttss2sisl($dst$$Register, $src$$Address);
14751   %}
14752   ins_pipe(pipe_slow);
14753 %}
14754 
14755 instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
14756 %{
14757   predicate(!VM_Version::supports_avx10_2());
14758   match(Set dst (ConvF2L src));
14759   effect(KILL cr);
14760   format %{ "convert_f2l $dst, $src"%}
14761   ins_encode %{
14762     __ convertF2I(T_LONG, T_FLOAT, $dst$$Register, $src$$XMMRegister);
14763   %}
14764   ins_pipe(pipe_slow);
14765 %}
14766 
14767 instruct convF2L_reg_reg_avx10_2(rRegL dst, regF src)
14768 %{
14769   predicate(VM_Version::supports_avx10_2());
14770   match(Set dst (ConvF2L src));
14771   format %{ "evcvttss2sisq $dst, $src" %}
14772   ins_encode %{
14773     __ evcvttss2sisq($dst$$Register, $src$$XMMRegister);
14774   %}
14775   ins_pipe(pipe_slow);
14776 %}
14777 
14778 instruct convF2L_reg_mem_avx10_2(rRegL dst, memory src)
14779 %{
14780   predicate(VM_Version::supports_avx10_2());
14781   match(Set dst (ConvF2L (LoadF src)));
14782   format %{ "evcvttss2sisq $dst, $src" %}
14783   ins_encode %{
14784     __ evcvttss2sisq($dst$$Register, $src$$Address);
14785   %}
14786   ins_pipe(pipe_slow);
14787 %}
14788 
14789 instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
14790 %{
14791   predicate(!VM_Version::supports_avx10_2());
14792   match(Set dst (ConvD2I src));
14793   effect(KILL cr);
14794   format %{ "convert_d2i $dst, $src"%}
14795   ins_encode %{
14796     __ convertF2I(T_INT, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14797   %}
14798   ins_pipe(pipe_slow);
14799 %}
14800 
14801 instruct convD2I_reg_reg_avx10_2(rRegI dst, regD src)
14802 %{
14803   predicate(VM_Version::supports_avx10_2());
14804   match(Set dst (ConvD2I src));
14805   format %{ "evcvttsd2sisl $dst, $src" %}
14806   ins_encode %{
14807     __ evcvttsd2sisl($dst$$Register, $src$$XMMRegister);
14808   %}
14809   ins_pipe(pipe_slow);
14810 %}
14811 
14812 instruct convD2I_reg_mem_avx10_2(rRegI dst, memory src)
14813 %{
14814   predicate(VM_Version::supports_avx10_2());
14815   match(Set dst (ConvD2I (LoadD src)));
14816   format %{ "evcvttsd2sisl $dst, $src" %}
14817   ins_encode %{
14818     __ evcvttsd2sisl($dst$$Register, $src$$Address);
14819   %}
14820   ins_pipe(pipe_slow);
14821 %}
14822 
14823 instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
14824 %{
14825   predicate(!VM_Version::supports_avx10_2());
14826   match(Set dst (ConvD2L src));
14827   effect(KILL cr);
14828   format %{ "convert_d2l $dst, $src"%}
14829   ins_encode %{
14830     __ convertF2I(T_LONG, T_DOUBLE, $dst$$Register, $src$$XMMRegister);
14831   %}
14832   ins_pipe(pipe_slow);
14833 %}
14834 
14835 instruct convD2L_reg_reg_avx10_2(rRegL dst, regD src)
14836 %{
14837   predicate(VM_Version::supports_avx10_2());
14838   match(Set dst (ConvD2L src));
14839   format %{ "evcvttsd2sisq $dst, $src" %}
14840   ins_encode %{
14841     __ evcvttsd2sisq($dst$$Register, $src$$XMMRegister);
14842   %}
14843   ins_pipe(pipe_slow);
14844 %}
14845 
14846 instruct convD2L_reg_mem_avx10_2(rRegL dst, memory src)
14847 %{
14848   predicate(VM_Version::supports_avx10_2());
14849   match(Set dst (ConvD2L (LoadD src)));
14850   format %{ "evcvttsd2sisq $dst, $src" %}
14851   ins_encode %{
14852     __ evcvttsd2sisq($dst$$Register, $src$$Address);
14853   %}
14854   ins_pipe(pipe_slow);
14855 %}
14856 
14857 instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14858 %{
14859   match(Set dst (RoundD src));
14860   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14861   format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
14862   ins_encode %{
14863     __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14864   %}
14865   ins_pipe(pipe_slow);
14866 %}
14867 
14868 instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
14869 %{
14870   match(Set dst (RoundF src));
14871   effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
14872   format %{ "round_float $dst,$src" %}
14873   ins_encode %{
14874     __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
14875   %}
14876   ins_pipe(pipe_slow);
14877 %}
14878 
14879 instruct convI2F_reg_reg(vlRegF dst, rRegI src)
14880 %{
14881   predicate(!UseXmmI2F);
14882   match(Set dst (ConvI2F src));
14883 
14884   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14885   ins_encode %{
14886     if (UseAVX > 0) {
14887       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14888     }
14889     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
14890   %}
14891   ins_pipe(pipe_slow); // XXX
14892 %}
14893 
14894 instruct convI2F_reg_mem(regF dst, memory src)
14895 %{
14896   predicate(UseAVX == 0);
14897   match(Set dst (ConvI2F (LoadI src)));
14898 
14899   format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
14900   ins_encode %{
14901     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
14902   %}
14903   ins_pipe(pipe_slow); // XXX
14904 %}
14905 
14906 instruct convI2D_reg_reg(vlRegD dst, rRegI src)
14907 %{
14908   predicate(!UseXmmI2D);
14909   match(Set dst (ConvI2D src));
14910 
14911   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14912   ins_encode %{
14913     if (UseAVX > 0) {
14914       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14915     }
14916     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
14917   %}
14918   ins_pipe(pipe_slow); // XXX
14919 %}
14920 
14921 instruct convI2D_reg_mem(regD dst, memory src)
14922 %{
14923   predicate(UseAVX == 0);
14924   match(Set dst (ConvI2D (LoadI src)));
14925 
14926   format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
14927   ins_encode %{
14928     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
14929   %}
14930   ins_pipe(pipe_slow); // XXX
14931 %}
14932 
14933 instruct convXI2F_reg(regF dst, rRegI src)
14934 %{
14935   predicate(UseXmmI2F);
14936   match(Set dst (ConvI2F src));
14937 
14938   format %{ "movdl $dst, $src\n\t"
14939             "cvtdq2psl $dst, $dst\t# i2f" %}
14940   ins_encode %{
14941     __ movdl($dst$$XMMRegister, $src$$Register);
14942     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
14943   %}
14944   ins_pipe(pipe_slow); // XXX
14945 %}
14946 
14947 instruct convXI2D_reg(regD dst, rRegI src)
14948 %{
14949   predicate(UseXmmI2D);
14950   match(Set dst (ConvI2D src));
14951 
14952   format %{ "movdl $dst, $src\n\t"
14953             "cvtdq2pdl $dst, $dst\t# i2d" %}
14954   ins_encode %{
14955     __ movdl($dst$$XMMRegister, $src$$Register);
14956     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
14957   %}
14958   ins_pipe(pipe_slow); // XXX
14959 %}
14960 
14961 instruct convL2F_reg_reg(vlRegF dst, rRegL src)
14962 %{
14963   match(Set dst (ConvL2F src));
14964 
14965   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14966   ins_encode %{
14967     if (UseAVX > 0) {
14968       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14969     }
14970     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
14971   %}
14972   ins_pipe(pipe_slow); // XXX
14973 %}
14974 
14975 instruct convL2F_reg_mem(regF dst, memory src)
14976 %{
14977   predicate(UseAVX == 0);
14978   match(Set dst (ConvL2F (LoadL src)));
14979 
14980   format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
14981   ins_encode %{
14982     __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
14983   %}
14984   ins_pipe(pipe_slow); // XXX
14985 %}
14986 
14987 instruct convL2D_reg_reg(vlRegD dst, rRegL src)
14988 %{
14989   match(Set dst (ConvL2D src));
14990 
14991   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
14992   ins_encode %{
14993     if (UseAVX > 0) {
14994       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
14995     }
14996     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
14997   %}
14998   ins_pipe(pipe_slow); // XXX
14999 %}
15000 
15001 instruct convL2D_reg_mem(regD dst, memory src)
15002 %{
15003   predicate(UseAVX == 0);
15004   match(Set dst (ConvL2D (LoadL src)));
15005 
15006   format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
15007   ins_encode %{
15008     __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
15009   %}
15010   ins_pipe(pipe_slow); // XXX
15011 %}
15012 
15013 instruct convI2L_reg_reg(rRegL dst, rRegI src)
15014 %{
15015   match(Set dst (ConvI2L src));
15016 
15017   ins_cost(125);
15018   format %{ "movslq  $dst, $src\t# i2l" %}
15019   ins_encode %{
15020     __ movslq($dst$$Register, $src$$Register);
15021   %}
15022   ins_pipe(ialu_reg_reg);
15023 %}
15024 
15025 // Zero-extend convert int to long
15026 instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
15027 %{
15028   match(Set dst (AndL (ConvI2L src) mask));
15029 
15030   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15031   ins_encode %{
15032     if ($dst$$reg != $src$$reg) {
15033       __ movl($dst$$Register, $src$$Register);
15034     }
15035   %}
15036   ins_pipe(ialu_reg_reg);
15037 %}
15038 
15039 // Zero-extend convert int to long
15040 instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
15041 %{
15042   match(Set dst (AndL (ConvI2L (LoadI src)) mask));
15043 
15044   format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
15045   ins_encode %{
15046     __ movl($dst$$Register, $src$$Address);
15047   %}
15048   ins_pipe(ialu_reg_mem);
15049 %}
15050 
15051 instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
15052 %{
15053   match(Set dst (AndL src mask));
15054 
15055   format %{ "movl    $dst, $src\t# zero-extend long" %}
15056   ins_encode %{
15057     __ movl($dst$$Register, $src$$Register);
15058   %}
15059   ins_pipe(ialu_reg_reg);
15060 %}
15061 
15062 instruct convL2I_reg_reg(rRegI dst, rRegL src)
15063 %{
15064   match(Set dst (ConvL2I src));
15065 
15066   format %{ "movl    $dst, $src\t# l2i" %}
15067   ins_encode %{
15068     __ movl($dst$$Register, $src$$Register);
15069   %}
15070   ins_pipe(ialu_reg_reg);
15071 %}
15072 
15073 
15074 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
15075   match(Set dst (MoveF2I src));
15076   effect(DEF dst, USE src);
15077 
15078   ins_cost(125);
15079   format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
15080   ins_encode %{
15081     __ movl($dst$$Register, Address(rsp, $src$$disp));
15082   %}
15083   ins_pipe(ialu_reg_mem);
15084 %}
15085 
15086 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
15087   match(Set dst (MoveI2F src));
15088   effect(DEF dst, USE src);
15089 
15090   ins_cost(125);
15091   format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
15092   ins_encode %{
15093     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
15094   %}
15095   ins_pipe(pipe_slow);
15096 %}
15097 
15098 instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
15099   match(Set dst (MoveD2L src));
15100   effect(DEF dst, USE src);
15101 
15102   ins_cost(125);
15103   format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
15104   ins_encode %{
15105     __ movq($dst$$Register, Address(rsp, $src$$disp));
15106   %}
15107   ins_pipe(ialu_reg_mem);
15108 %}
15109 
15110 instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
15111   predicate(!UseXmmLoadAndClearUpper);
15112   match(Set dst (MoveL2D src));
15113   effect(DEF dst, USE src);
15114 
15115   ins_cost(125);
15116   format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
15117   ins_encode %{
15118     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15119   %}
15120   ins_pipe(pipe_slow);
15121 %}
15122 
15123 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
15124   predicate(UseXmmLoadAndClearUpper);
15125   match(Set dst (MoveL2D src));
15126   effect(DEF dst, USE src);
15127 
15128   ins_cost(125);
15129   format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
15130   ins_encode %{
15131     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
15132   %}
15133   ins_pipe(pipe_slow);
15134 %}
15135 
15136 
15137 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
15138   match(Set dst (MoveF2I src));
15139   effect(DEF dst, USE src);
15140 
15141   ins_cost(95); // XXX
15142   format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
15143   ins_encode %{
15144     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
15145   %}
15146   ins_pipe(pipe_slow);
15147 %}
15148 
15149 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
15150   match(Set dst (MoveI2F src));
15151   effect(DEF dst, USE src);
15152 
15153   ins_cost(100);
15154   format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
15155   ins_encode %{
15156     __ movl(Address(rsp, $dst$$disp), $src$$Register);
15157   %}
15158   ins_pipe( ialu_mem_reg );
15159 %}
15160 
15161 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
15162   match(Set dst (MoveD2L src));
15163   effect(DEF dst, USE src);
15164 
15165   ins_cost(95); // XXX
15166   format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
15167   ins_encode %{
15168     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
15169   %}
15170   ins_pipe(pipe_slow);
15171 %}
15172 
15173 instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
15174   match(Set dst (MoveL2D src));
15175   effect(DEF dst, USE src);
15176 
15177   ins_cost(100);
15178   format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
15179   ins_encode %{
15180     __ movq(Address(rsp, $dst$$disp), $src$$Register);
15181   %}
15182   ins_pipe(ialu_mem_reg);
15183 %}
15184 
15185 instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
15186   match(Set dst (MoveF2I src));
15187   effect(DEF dst, USE src);
15188   ins_cost(85);
15189   format %{ "movd    $dst,$src\t# MoveF2I" %}
15190   ins_encode %{
15191     __ movdl($dst$$Register, $src$$XMMRegister);
15192   %}
15193   ins_pipe( pipe_slow );
15194 %}
15195 
15196 instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
15197   match(Set dst (MoveD2L src));
15198   effect(DEF dst, USE src);
15199   ins_cost(85);
15200   format %{ "movd    $dst,$src\t# MoveD2L" %}
15201   ins_encode %{
15202     __ movdq($dst$$Register, $src$$XMMRegister);
15203   %}
15204   ins_pipe( pipe_slow );
15205 %}
15206 
15207 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
15208   match(Set dst (MoveI2F src));
15209   effect(DEF dst, USE src);
15210   ins_cost(100);
15211   format %{ "movd    $dst,$src\t# MoveI2F" %}
15212   ins_encode %{
15213     __ movdl($dst$$XMMRegister, $src$$Register);
15214   %}
15215   ins_pipe( pipe_slow );
15216 %}
15217 
15218 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
15219   match(Set dst (MoveL2D src));
15220   effect(DEF dst, USE src);
15221   ins_cost(100);
15222   format %{ "movd    $dst,$src\t# MoveL2D" %}
15223   ins_encode %{
15224      __ movdq($dst$$XMMRegister, $src$$Register);
15225   %}
15226   ins_pipe( pipe_slow );
15227 %}
15228 
15229 // Fast clearing of an array
15230 // Small non-constant lenght ClearArray for non-AVX512 targets.
15231 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15232                   Universe dummy, rFlagsReg cr)
15233 %{
15234   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
15235   match(Set dummy (ClearArray cnt base));
15236   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15237 
15238   format %{ $$template
15239     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15240     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15241     $$emit$$"jg      LARGE\n\t"
15242     $$emit$$"dec     rcx\n\t"
15243     $$emit$$"js      DONE\t# Zero length\n\t"
15244     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15245     $$emit$$"dec     rcx\n\t"
15246     $$emit$$"jge     LOOP\n\t"
15247     $$emit$$"jmp     DONE\n\t"
15248     $$emit$$"# LARGE:\n\t"
15249     if (UseFastStosb) {
15250        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15251        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15252     } else if (UseXMMForObjInit) {
15253        $$emit$$"mov     rdi,rax\n\t"
15254        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15255        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15256        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15257        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15258        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15259        $$emit$$"add     0x40,rax\n\t"
15260        $$emit$$"# L_zero_64_bytes:\n\t"
15261        $$emit$$"sub     0x8,rcx\n\t"
15262        $$emit$$"jge     L_loop\n\t"
15263        $$emit$$"add     0x4,rcx\n\t"
15264        $$emit$$"jl      L_tail\n\t"
15265        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15266        $$emit$$"add     0x20,rax\n\t"
15267        $$emit$$"sub     0x4,rcx\n\t"
15268        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15269        $$emit$$"add     0x4,rcx\n\t"
15270        $$emit$$"jle     L_end\n\t"
15271        $$emit$$"dec     rcx\n\t"
15272        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15273        $$emit$$"vmovq   xmm0,(rax)\n\t"
15274        $$emit$$"add     0x8,rax\n\t"
15275        $$emit$$"dec     rcx\n\t"
15276        $$emit$$"jge     L_sloop\n\t"
15277        $$emit$$"# L_end:\n\t"
15278     } else {
15279        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15280     }
15281     $$emit$$"# DONE"
15282   %}
15283   ins_encode %{
15284     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15285                  $tmp$$XMMRegister, false, knoreg);
15286   %}
15287   ins_pipe(pipe_slow);
15288 %}
15289 
15290 // Small non-constant length ClearArray for AVX512 targets.
15291 instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15292                        Universe dummy, rFlagsReg cr)
15293 %{
15294   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
15295   match(Set dummy (ClearArray cnt base));
15296   ins_cost(125);
15297   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15298 
15299   format %{ $$template
15300     $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15301     $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
15302     $$emit$$"jg      LARGE\n\t"
15303     $$emit$$"dec     rcx\n\t"
15304     $$emit$$"js      DONE\t# Zero length\n\t"
15305     $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
15306     $$emit$$"dec     rcx\n\t"
15307     $$emit$$"jge     LOOP\n\t"
15308     $$emit$$"jmp     DONE\n\t"
15309     $$emit$$"# LARGE:\n\t"
15310     if (UseFastStosb) {
15311        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15312        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
15313     } else if (UseXMMForObjInit) {
15314        $$emit$$"mov     rdi,rax\n\t"
15315        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15316        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15317        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15318        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15319        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15320        $$emit$$"add     0x40,rax\n\t"
15321        $$emit$$"# L_zero_64_bytes:\n\t"
15322        $$emit$$"sub     0x8,rcx\n\t"
15323        $$emit$$"jge     L_loop\n\t"
15324        $$emit$$"add     0x4,rcx\n\t"
15325        $$emit$$"jl      L_tail\n\t"
15326        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15327        $$emit$$"add     0x20,rax\n\t"
15328        $$emit$$"sub     0x4,rcx\n\t"
15329        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15330        $$emit$$"add     0x4,rcx\n\t"
15331        $$emit$$"jle     L_end\n\t"
15332        $$emit$$"dec     rcx\n\t"
15333        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15334        $$emit$$"vmovq   xmm0,(rax)\n\t"
15335        $$emit$$"add     0x8,rax\n\t"
15336        $$emit$$"dec     rcx\n\t"
15337        $$emit$$"jge     L_sloop\n\t"
15338        $$emit$$"# L_end:\n\t"
15339     } else {
15340        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
15341     }
15342     $$emit$$"# DONE"
15343   %}
15344   ins_encode %{
15345     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15346                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
15347   %}
15348   ins_pipe(pipe_slow);
15349 %}
15350 
15351 // Large non-constant length ClearArray for non-AVX512 targets.
15352 instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
15353                         Universe dummy, rFlagsReg cr)
15354 %{
15355   predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
15356   match(Set dummy (ClearArray cnt base));
15357   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
15358 
15359   format %{ $$template
15360     if (UseFastStosb) {
15361        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15362        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15363        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15364     } else if (UseXMMForObjInit) {
15365        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15366        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15367        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15368        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15369        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15370        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15371        $$emit$$"add     0x40,rax\n\t"
15372        $$emit$$"# L_zero_64_bytes:\n\t"
15373        $$emit$$"sub     0x8,rcx\n\t"
15374        $$emit$$"jge     L_loop\n\t"
15375        $$emit$$"add     0x4,rcx\n\t"
15376        $$emit$$"jl      L_tail\n\t"
15377        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15378        $$emit$$"add     0x20,rax\n\t"
15379        $$emit$$"sub     0x4,rcx\n\t"
15380        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15381        $$emit$$"add     0x4,rcx\n\t"
15382        $$emit$$"jle     L_end\n\t"
15383        $$emit$$"dec     rcx\n\t"
15384        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15385        $$emit$$"vmovq   xmm0,(rax)\n\t"
15386        $$emit$$"add     0x8,rax\n\t"
15387        $$emit$$"dec     rcx\n\t"
15388        $$emit$$"jge     L_sloop\n\t"
15389        $$emit$$"# L_end:\n\t"
15390     } else {
15391        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15392        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15393     }
15394   %}
15395   ins_encode %{
15396     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15397                  $tmp$$XMMRegister, true, knoreg);
15398   %}
15399   ins_pipe(pipe_slow);
15400 %}
15401 
15402 // Large non-constant length ClearArray for AVX512 targets.
15403 instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
15404                              Universe dummy, rFlagsReg cr)
15405 %{
15406   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
15407   match(Set dummy (ClearArray cnt base));
15408   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
15409 
15410   format %{ $$template
15411     if (UseFastStosb) {
15412        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15413        $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
15414        $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
15415     } else if (UseXMMForObjInit) {
15416        $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
15417        $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
15418        $$emit$$"jmpq    L_zero_64_bytes\n\t"
15419        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
15420        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15421        $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
15422        $$emit$$"add     0x40,rax\n\t"
15423        $$emit$$"# L_zero_64_bytes:\n\t"
15424        $$emit$$"sub     0x8,rcx\n\t"
15425        $$emit$$"jge     L_loop\n\t"
15426        $$emit$$"add     0x4,rcx\n\t"
15427        $$emit$$"jl      L_tail\n\t"
15428        $$emit$$"vmovdqu ymm0,(rax)\n\t"
15429        $$emit$$"add     0x20,rax\n\t"
15430        $$emit$$"sub     0x4,rcx\n\t"
15431        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
15432        $$emit$$"add     0x4,rcx\n\t"
15433        $$emit$$"jle     L_end\n\t"
15434        $$emit$$"dec     rcx\n\t"
15435        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
15436        $$emit$$"vmovq   xmm0,(rax)\n\t"
15437        $$emit$$"add     0x8,rax\n\t"
15438        $$emit$$"dec     rcx\n\t"
15439        $$emit$$"jge     L_sloop\n\t"
15440        $$emit$$"# L_end:\n\t"
15441     } else {
15442        $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
15443        $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
15444     }
15445   %}
15446   ins_encode %{
15447     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
15448                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
15449   %}
15450   ins_pipe(pipe_slow);
15451 %}
15452 
15453 // Small constant length ClearArray for AVX512 targets.
15454 instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
15455 %{
15456   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
15457   match(Set dummy (ClearArray cnt base));
15458   ins_cost(100);
15459   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
15460   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
15461   ins_encode %{
15462    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
15463   %}
15464   ins_pipe(pipe_slow);
15465 %}
15466 
15467 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15468                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15469 %{
15470   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15471   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15472   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15473 
15474   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15475   ins_encode %{
15476     __ string_compare($str1$$Register, $str2$$Register,
15477                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15478                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
15479   %}
15480   ins_pipe( pipe_slow );
15481 %}
15482 
15483 instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15484                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15485 %{
15486   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15487   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15488   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15489 
15490   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15491   ins_encode %{
15492     __ string_compare($str1$$Register, $str2$$Register,
15493                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15494                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
15495   %}
15496   ins_pipe( pipe_slow );
15497 %}
15498 
15499 instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15500                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
15501 %{
15502   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15503   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15504   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15505 
15506   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15507   ins_encode %{
15508     __ string_compare($str1$$Register, $str2$$Register,
15509                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15510                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
15511   %}
15512   ins_pipe( pipe_slow );
15513 %}
15514 
15515 instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15516                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15517 %{
15518   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15519   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15520   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15521 
15522   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15523   ins_encode %{
15524     __ string_compare($str1$$Register, $str2$$Register,
15525                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15526                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
15527   %}
15528   ins_pipe( pipe_slow );
15529 %}
15530 
15531 instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15532                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15533 %{
15534   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15535   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15536   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15537 
15538   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15539   ins_encode %{
15540     __ string_compare($str1$$Register, $str2$$Register,
15541                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15542                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
15543   %}
15544   ins_pipe( pipe_slow );
15545 %}
15546 
15547 instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
15548                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15549 %{
15550   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15551   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15552   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15553 
15554   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15555   ins_encode %{
15556     __ string_compare($str1$$Register, $str2$$Register,
15557                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
15558                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
15559   %}
15560   ins_pipe( pipe_slow );
15561 %}
15562 
15563 instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15564                           rax_RegI result, legRegD tmp1, rFlagsReg cr)
15565 %{
15566   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15567   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15568   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15569 
15570   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15571   ins_encode %{
15572     __ string_compare($str2$$Register, $str1$$Register,
15573                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15574                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
15575   %}
15576   ins_pipe( pipe_slow );
15577 %}
15578 
15579 instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
15580                                rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
15581 %{
15582   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15583   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15584   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15585 
15586   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
15587   ins_encode %{
15588     __ string_compare($str2$$Register, $str1$$Register,
15589                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
15590                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
15591   %}
15592   ins_pipe( pipe_slow );
15593 %}
15594 
15595 // fast search of substring with known size.
15596 instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15597                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15598 %{
15599   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15600   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15601   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15602 
15603   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15604   ins_encode %{
15605     int icnt2 = (int)$int_cnt2$$constant;
15606     if (icnt2 >= 16) {
15607       // IndexOf for constant substrings with size >= 16 elements
15608       // which don't need to be loaded through stack.
15609       __ string_indexofC8($str1$$Register, $str2$$Register,
15610                           $cnt1$$Register, $cnt2$$Register,
15611                           icnt2, $result$$Register,
15612                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15613     } else {
15614       // Small strings are loaded through stack if they cross page boundary.
15615       __ string_indexof($str1$$Register, $str2$$Register,
15616                         $cnt1$$Register, $cnt2$$Register,
15617                         icnt2, $result$$Register,
15618                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15619     }
15620   %}
15621   ins_pipe( pipe_slow );
15622 %}
15623 
15624 // fast search of substring with known size.
15625 instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15626                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15627 %{
15628   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15629   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15630   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15631 
15632   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15633   ins_encode %{
15634     int icnt2 = (int)$int_cnt2$$constant;
15635     if (icnt2 >= 8) {
15636       // IndexOf for constant substrings with size >= 8 elements
15637       // which don't need to be loaded through stack.
15638       __ string_indexofC8($str1$$Register, $str2$$Register,
15639                           $cnt1$$Register, $cnt2$$Register,
15640                           icnt2, $result$$Register,
15641                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15642     } else {
15643       // Small strings are loaded through stack if they cross page boundary.
15644       __ string_indexof($str1$$Register, $str2$$Register,
15645                         $cnt1$$Register, $cnt2$$Register,
15646                         icnt2, $result$$Register,
15647                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15648     }
15649   %}
15650   ins_pipe( pipe_slow );
15651 %}
15652 
15653 // fast search of substring with known size.
15654 instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
15655                               rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
15656 %{
15657   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15658   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15659   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
15660 
15661   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
15662   ins_encode %{
15663     int icnt2 = (int)$int_cnt2$$constant;
15664     if (icnt2 >= 8) {
15665       // IndexOf for constant substrings with size >= 8 elements
15666       // which don't need to be loaded through stack.
15667       __ string_indexofC8($str1$$Register, $str2$$Register,
15668                           $cnt1$$Register, $cnt2$$Register,
15669                           icnt2, $result$$Register,
15670                           $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15671     } else {
15672       // Small strings are loaded through stack if they cross page boundary.
15673       __ string_indexof($str1$$Register, $str2$$Register,
15674                         $cnt1$$Register, $cnt2$$Register,
15675                         icnt2, $result$$Register,
15676                         $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15677     }
15678   %}
15679   ins_pipe( pipe_slow );
15680 %}
15681 
15682 instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15683                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15684 %{
15685   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
15686   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15687   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15688 
15689   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15690   ins_encode %{
15691     __ string_indexof($str1$$Register, $str2$$Register,
15692                       $cnt1$$Register, $cnt2$$Register,
15693                       (-1), $result$$Register,
15694                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
15695   %}
15696   ins_pipe( pipe_slow );
15697 %}
15698 
15699 instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15700                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15701 %{
15702   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
15703   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15704   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15705 
15706   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15707   ins_encode %{
15708     __ string_indexof($str1$$Register, $str2$$Register,
15709                       $cnt1$$Register, $cnt2$$Register,
15710                       (-1), $result$$Register,
15711                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
15712   %}
15713   ins_pipe( pipe_slow );
15714 %}
15715 
15716 instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
15717                           rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
15718 %{
15719   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
15720   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15721   effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
15722 
15723   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
15724   ins_encode %{
15725     __ string_indexof($str1$$Register, $str2$$Register,
15726                       $cnt1$$Register, $cnt2$$Register,
15727                       (-1), $result$$Register,
15728                       $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
15729   %}
15730   ins_pipe( pipe_slow );
15731 %}
15732 
15733 instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15734                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15735 %{
15736   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
15737   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15738   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15739   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15740   ins_encode %{
15741     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15742                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15743   %}
15744   ins_pipe( pipe_slow );
15745 %}
15746 
15747 instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
15748                               rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
15749 %{
15750   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
15751   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15752   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
15753   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
15754   ins_encode %{
15755     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
15756                            $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
15757   %}
15758   ins_pipe( pipe_slow );
15759 %}
15760 
15761 // fast string equals
15762 instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15763                        legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
15764 %{
15765   predicate(!VM_Version::supports_avx512vlbw());
15766   match(Set result (StrEquals (Binary str1 str2) cnt));
15767   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15768 
15769   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15770   ins_encode %{
15771     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15772                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15773                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15774   %}
15775   ins_pipe( pipe_slow );
15776 %}
15777 
15778 instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
15779                            legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
15780 %{
15781   predicate(VM_Version::supports_avx512vlbw());
15782   match(Set result (StrEquals (Binary str1 str2) cnt));
15783   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
15784 
15785   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
15786   ins_encode %{
15787     __ arrays_equals(false, $str1$$Register, $str2$$Register,
15788                      $cnt$$Register, $result$$Register, $tmp3$$Register,
15789                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15790   %}
15791   ins_pipe( pipe_slow );
15792 %}
15793 
15794 // fast array equals
15795 instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15796                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15797 %{
15798   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15799   match(Set result (AryEq ary1 ary2));
15800   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15801 
15802   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15803   ins_encode %{
15804     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15805                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15806                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
15807   %}
15808   ins_pipe( pipe_slow );
15809 %}
15810 
15811 instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15812                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15813 %{
15814   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15815   match(Set result (AryEq ary1 ary2));
15816   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15817 
15818   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15819   ins_encode %{
15820     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15821                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15822                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
15823   %}
15824   ins_pipe( pipe_slow );
15825 %}
15826 
15827 instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15828                        legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15829 %{
15830   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15831   match(Set result (AryEq ary1 ary2));
15832   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15833 
15834   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15835   ins_encode %{
15836     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15837                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15838                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
15839   %}
15840   ins_pipe( pipe_slow );
15841 %}
15842 
15843 instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
15844                             legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
15845 %{
15846   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15847   match(Set result (AryEq ary1 ary2));
15848   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
15849 
15850   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
15851   ins_encode %{
15852     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
15853                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
15854                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
15855   %}
15856   ins_pipe( pipe_slow );
15857 %}
15858 
15859 instruct arrays_hashcode(rdi_RegP ary1, rdx_RegI cnt1, rbx_RegI result, immU8 basic_type,
15860                          legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, legRegD tmp_vec4,
15861                          legRegD tmp_vec5, legRegD tmp_vec6, legRegD tmp_vec7, legRegD tmp_vec8,
15862                          legRegD tmp_vec9, legRegD tmp_vec10, legRegD tmp_vec11, legRegD tmp_vec12,
15863                          legRegD tmp_vec13, rRegI tmp1, rRegI tmp2, rRegI tmp3, rFlagsReg cr)
15864 %{
15865   predicate(UseAVX >= 2);
15866   match(Set result (VectorizedHashCode (Binary ary1 cnt1) (Binary result basic_type)));
15867   effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, TEMP tmp_vec4, TEMP tmp_vec5, TEMP tmp_vec6,
15868          TEMP tmp_vec7, TEMP tmp_vec8, TEMP tmp_vec9, TEMP tmp_vec10, TEMP tmp_vec11, TEMP tmp_vec12,
15869          TEMP tmp_vec13, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL ary1, USE_KILL cnt1,
15870          USE basic_type, KILL cr);
15871 
15872   format %{ "Array HashCode array[] $ary1,$cnt1,$result,$basic_type -> $result   // KILL all" %}
15873   ins_encode %{
15874     __ arrays_hashcode($ary1$$Register, $cnt1$$Register, $result$$Register,
15875                        $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15876                        $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister,
15877                        $tmp_vec4$$XMMRegister, $tmp_vec5$$XMMRegister, $tmp_vec6$$XMMRegister,
15878                        $tmp_vec7$$XMMRegister, $tmp_vec8$$XMMRegister, $tmp_vec9$$XMMRegister,
15879                        $tmp_vec10$$XMMRegister, $tmp_vec11$$XMMRegister, $tmp_vec12$$XMMRegister,
15880                        $tmp_vec13$$XMMRegister, (BasicType)$basic_type$$constant);
15881   %}
15882   ins_pipe( pipe_slow );
15883 %}
15884 
15885 instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15886                          legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
15887 %{
15888   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15889   match(Set result (CountPositives ary1 len));
15890   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15891 
15892   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15893   ins_encode %{
15894     __ count_positives($ary1$$Register, $len$$Register,
15895                        $result$$Register, $tmp3$$Register,
15896                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
15897   %}
15898   ins_pipe( pipe_slow );
15899 %}
15900 
15901 instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
15902                               legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
15903 %{
15904   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15905   match(Set result (CountPositives ary1 len));
15906   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
15907 
15908   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
15909   ins_encode %{
15910     __ count_positives($ary1$$Register, $len$$Register,
15911                        $result$$Register, $tmp3$$Register,
15912                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
15913   %}
15914   ins_pipe( pipe_slow );
15915 %}
15916 
15917 // fast char[] to byte[] compression
15918 instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15919                          legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15920   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15921   match(Set result (StrCompressedCopy src (Binary dst len)));
15922   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
15923          USE_KILL len, KILL tmp5, KILL cr);
15924 
15925   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15926   ins_encode %{
15927     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15928                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15929                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15930                            knoreg, knoreg);
15931   %}
15932   ins_pipe( pipe_slow );
15933 %}
15934 
15935 instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
15936                               legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15937   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15938   match(Set result (StrCompressedCopy src (Binary dst len)));
15939   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
15940          USE_KILL len, KILL tmp5, KILL cr);
15941 
15942   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
15943   ins_encode %{
15944     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15945                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15946                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
15947                            $ktmp1$$KRegister, $ktmp2$$KRegister);
15948   %}
15949   ins_pipe( pipe_slow );
15950 %}
15951 // fast byte[] to char[] inflation
15952 instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15953                         legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
15954   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
15955   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15956   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15957 
15958   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15959   ins_encode %{
15960     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15961                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
15962   %}
15963   ins_pipe( pipe_slow );
15964 %}
15965 
15966 instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15967                              legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
15968   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
15969   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15970   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15971 
15972   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15973   ins_encode %{
15974     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15975                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
15976   %}
15977   ins_pipe( pipe_slow );
15978 %}
15979 
15980 // encode char[] to byte[] in ISO_8859_1
15981 instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15982                           legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
15983                           rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
15984   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
15985   match(Set result (EncodeISOArray src (Binary dst len)));
15986   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
15987 
15988   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
15989   ins_encode %{
15990     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15991                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
15992                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
15993   %}
15994   ins_pipe( pipe_slow );
15995 %}
15996 
15997 // encode char[] to byte[] in ASCII
15998 instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
15999                             legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
16000                             rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
16001   predicate(((EncodeISOArrayNode*)n)->is_ascii());
16002   match(Set result (EncodeISOArray src (Binary dst len)));
16003   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
16004 
16005   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
16006   ins_encode %{
16007     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16008                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
16009                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
16010   %}
16011   ins_pipe( pipe_slow );
16012 %}
16013 
16014 //----------Overflow Math Instructions-----------------------------------------
16015 
16016 instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16017 %{
16018   match(Set cr (OverflowAddI op1 op2));
16019   effect(DEF cr, USE_KILL op1, USE op2);
16020 
16021   format %{ "addl    $op1, $op2\t# overflow check int" %}
16022 
16023   ins_encode %{
16024     __ addl($op1$$Register, $op2$$Register);
16025   %}
16026   ins_pipe(ialu_reg_reg);
16027 %}
16028 
16029 instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
16030 %{
16031   match(Set cr (OverflowAddI op1 op2));
16032   effect(DEF cr, USE_KILL op1, USE op2);
16033 
16034   format %{ "addl    $op1, $op2\t# overflow check int" %}
16035 
16036   ins_encode %{
16037     __ addl($op1$$Register, $op2$$constant);
16038   %}
16039   ins_pipe(ialu_reg_reg);
16040 %}
16041 
16042 instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16043 %{
16044   match(Set cr (OverflowAddL op1 op2));
16045   effect(DEF cr, USE_KILL op1, USE op2);
16046 
16047   format %{ "addq    $op1, $op2\t# overflow check long" %}
16048   ins_encode %{
16049     __ addq($op1$$Register, $op2$$Register);
16050   %}
16051   ins_pipe(ialu_reg_reg);
16052 %}
16053 
16054 instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
16055 %{
16056   match(Set cr (OverflowAddL op1 op2));
16057   effect(DEF cr, USE_KILL op1, USE op2);
16058 
16059   format %{ "addq    $op1, $op2\t# overflow check long" %}
16060   ins_encode %{
16061     __ addq($op1$$Register, $op2$$constant);
16062   %}
16063   ins_pipe(ialu_reg_reg);
16064 %}
16065 
16066 instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16067 %{
16068   match(Set cr (OverflowSubI op1 op2));
16069 
16070   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16071   ins_encode %{
16072     __ cmpl($op1$$Register, $op2$$Register);
16073   %}
16074   ins_pipe(ialu_reg_reg);
16075 %}
16076 
16077 instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16078 %{
16079   match(Set cr (OverflowSubI op1 op2));
16080 
16081   format %{ "cmpl    $op1, $op2\t# overflow check int" %}
16082   ins_encode %{
16083     __ cmpl($op1$$Register, $op2$$constant);
16084   %}
16085   ins_pipe(ialu_reg_reg);
16086 %}
16087 
16088 instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16089 %{
16090   match(Set cr (OverflowSubL op1 op2));
16091 
16092   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16093   ins_encode %{
16094     __ cmpq($op1$$Register, $op2$$Register);
16095   %}
16096   ins_pipe(ialu_reg_reg);
16097 %}
16098 
16099 instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16100 %{
16101   match(Set cr (OverflowSubL op1 op2));
16102 
16103   format %{ "cmpq    $op1, $op2\t# overflow check long" %}
16104   ins_encode %{
16105     __ cmpq($op1$$Register, $op2$$constant);
16106   %}
16107   ins_pipe(ialu_reg_reg);
16108 %}
16109 
16110 instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
16111 %{
16112   match(Set cr (OverflowSubI zero op2));
16113   effect(DEF cr, USE_KILL op2);
16114 
16115   format %{ "negl    $op2\t# overflow check int" %}
16116   ins_encode %{
16117     __ negl($op2$$Register);
16118   %}
16119   ins_pipe(ialu_reg_reg);
16120 %}
16121 
16122 instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
16123 %{
16124   match(Set cr (OverflowSubL zero op2));
16125   effect(DEF cr, USE_KILL op2);
16126 
16127   format %{ "negq    $op2\t# overflow check long" %}
16128   ins_encode %{
16129     __ negq($op2$$Register);
16130   %}
16131   ins_pipe(ialu_reg_reg);
16132 %}
16133 
16134 instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
16135 %{
16136   match(Set cr (OverflowMulI op1 op2));
16137   effect(DEF cr, USE_KILL op1, USE op2);
16138 
16139   format %{ "imull    $op1, $op2\t# overflow check int" %}
16140   ins_encode %{
16141     __ imull($op1$$Register, $op2$$Register);
16142   %}
16143   ins_pipe(ialu_reg_reg_alu0);
16144 %}
16145 
16146 instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
16147 %{
16148   match(Set cr (OverflowMulI op1 op2));
16149   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16150 
16151   format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
16152   ins_encode %{
16153     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
16154   %}
16155   ins_pipe(ialu_reg_reg_alu0);
16156 %}
16157 
16158 instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
16159 %{
16160   match(Set cr (OverflowMulL op1 op2));
16161   effect(DEF cr, USE_KILL op1, USE op2);
16162 
16163   format %{ "imulq    $op1, $op2\t# overflow check long" %}
16164   ins_encode %{
16165     __ imulq($op1$$Register, $op2$$Register);
16166   %}
16167   ins_pipe(ialu_reg_reg_alu0);
16168 %}
16169 
16170 instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
16171 %{
16172   match(Set cr (OverflowMulL op1 op2));
16173   effect(DEF cr, TEMP tmp, USE op1, USE op2);
16174 
16175   format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
16176   ins_encode %{
16177     __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
16178   %}
16179   ins_pipe(ialu_reg_reg_alu0);
16180 %}
16181 
16182 
16183 //----------Control Flow Instructions------------------------------------------
16184 // Signed compare Instructions
16185 
16186 // XXX more variants!!
16187 instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
16188 %{
16189   match(Set cr (CmpI op1 op2));
16190   effect(DEF cr, USE op1, USE op2);
16191 
16192   format %{ "cmpl    $op1, $op2" %}
16193   ins_encode %{
16194     __ cmpl($op1$$Register, $op2$$Register);
16195   %}
16196   ins_pipe(ialu_cr_reg_reg);
16197 %}
16198 
16199 instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
16200 %{
16201   match(Set cr (CmpI op1 op2));
16202 
16203   format %{ "cmpl    $op1, $op2" %}
16204   ins_encode %{
16205     __ cmpl($op1$$Register, $op2$$constant);
16206   %}
16207   ins_pipe(ialu_cr_reg_imm);
16208 %}
16209 
16210 instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
16211 %{
16212   match(Set cr (CmpI op1 (LoadI op2)));
16213 
16214   ins_cost(500); // XXX
16215   format %{ "cmpl    $op1, $op2" %}
16216   ins_encode %{
16217     __ cmpl($op1$$Register, $op2$$Address);
16218   %}
16219   ins_pipe(ialu_cr_reg_mem);
16220 %}
16221 
16222 instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
16223 %{
16224   match(Set cr (CmpI src zero));
16225 
16226   format %{ "testl   $src, $src" %}
16227   ins_encode %{
16228     __ testl($src$$Register, $src$$Register);
16229   %}
16230   ins_pipe(ialu_cr_reg_imm);
16231 %}
16232 
16233 instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
16234 %{
16235   match(Set cr (CmpI (AndI src con) zero));
16236 
16237   format %{ "testl   $src, $con" %}
16238   ins_encode %{
16239     __ testl($src$$Register, $con$$constant);
16240   %}
16241   ins_pipe(ialu_cr_reg_imm);
16242 %}
16243 
16244 instruct testI_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2, immI_0 zero)
16245 %{
16246   match(Set cr (CmpI (AndI src1 src2) zero));
16247 
16248   format %{ "testl   $src1, $src2" %}
16249   ins_encode %{
16250     __ testl($src1$$Register, $src2$$Register);
16251   %}
16252   ins_pipe(ialu_cr_reg_imm);
16253 %}
16254 
16255 instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
16256 %{
16257   match(Set cr (CmpI (AndI src (LoadI mem)) zero));
16258 
16259   format %{ "testl   $src, $mem" %}
16260   ins_encode %{
16261     __ testl($src$$Register, $mem$$Address);
16262   %}
16263   ins_pipe(ialu_cr_reg_mem);
16264 %}
16265 
16266 // Unsigned compare Instructions; really, same as signed except they
16267 // produce an rFlagsRegU instead of rFlagsReg.
16268 instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
16269 %{
16270   match(Set cr (CmpU op1 op2));
16271 
16272   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16273   ins_encode %{
16274     __ cmpl($op1$$Register, $op2$$Register);
16275   %}
16276   ins_pipe(ialu_cr_reg_reg);
16277 %}
16278 
16279 instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
16280 %{
16281   match(Set cr (CmpU op1 op2));
16282 
16283   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16284   ins_encode %{
16285     __ cmpl($op1$$Register, $op2$$constant);
16286   %}
16287   ins_pipe(ialu_cr_reg_imm);
16288 %}
16289 
16290 instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
16291 %{
16292   match(Set cr (CmpU op1 (LoadI op2)));
16293 
16294   ins_cost(500); // XXX
16295   format %{ "cmpl    $op1, $op2\t# unsigned" %}
16296   ins_encode %{
16297     __ cmpl($op1$$Register, $op2$$Address);
16298   %}
16299   ins_pipe(ialu_cr_reg_mem);
16300 %}
16301 
16302 instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
16303 %{
16304   match(Set cr (CmpU src zero));
16305 
16306   format %{ "testl   $src, $src\t# unsigned" %}
16307   ins_encode %{
16308     __ testl($src$$Register, $src$$Register);
16309   %}
16310   ins_pipe(ialu_cr_reg_imm);
16311 %}
16312 
16313 instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
16314 %{
16315   match(Set cr (CmpP op1 op2));
16316 
16317   format %{ "cmpq    $op1, $op2\t# ptr" %}
16318   ins_encode %{
16319     __ cmpq($op1$$Register, $op2$$Register);
16320   %}
16321   ins_pipe(ialu_cr_reg_reg);
16322 %}
16323 
16324 instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
16325 %{
16326   match(Set cr (CmpP op1 (LoadP op2)));
16327   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16328 
16329   ins_cost(500); // XXX
16330   format %{ "cmpq    $op1, $op2\t# ptr" %}
16331   ins_encode %{
16332     __ cmpq($op1$$Register, $op2$$Address);
16333   %}
16334   ins_pipe(ialu_cr_reg_mem);
16335 %}
16336 
16337 // XXX this is generalized by compP_rReg_mem???
16338 // Compare raw pointer (used in out-of-heap check).
16339 // Only works because non-oop pointers must be raw pointers
16340 // and raw pointers have no anti-dependencies.
16341 instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
16342 %{
16343   predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
16344             n->in(2)->as_Load()->barrier_data() == 0);
16345   match(Set cr (CmpP op1 (LoadP op2)));
16346 
16347   format %{ "cmpq    $op1, $op2\t# raw ptr" %}
16348   ins_encode %{
16349     __ cmpq($op1$$Register, $op2$$Address);
16350   %}
16351   ins_pipe(ialu_cr_reg_mem);
16352 %}
16353 
16354 // This will generate a signed flags result. This should be OK since
16355 // any compare to a zero should be eq/neq.
16356 instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
16357 %{
16358   match(Set cr (CmpP src zero));
16359 
16360   format %{ "testq   $src, $src\t# ptr" %}
16361   ins_encode %{
16362     __ testq($src$$Register, $src$$Register);
16363   %}
16364   ins_pipe(ialu_cr_reg_imm);
16365 %}
16366 
16367 // This will generate a signed flags result. This should be OK since
16368 // any compare to a zero should be eq/neq.
16369 instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
16370 %{
16371   predicate((!UseCompressedOops || (CompressedOops::base() != nullptr)) &&
16372             n->in(1)->as_Load()->barrier_data() == 0);
16373   match(Set cr (CmpP (LoadP op) zero));
16374 
16375   ins_cost(500); // XXX
16376   format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
16377   ins_encode %{
16378     __ testq($op$$Address, 0xFFFFFFFF);
16379   %}
16380   ins_pipe(ialu_cr_reg_imm);
16381 %}
16382 
16383 instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
16384 %{
16385   predicate(UseCompressedOops && (CompressedOops::base() == nullptr) &&
16386             n->in(1)->as_Load()->barrier_data() == 0);
16387   match(Set cr (CmpP (LoadP mem) zero));
16388 
16389   format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
16390   ins_encode %{
16391     __ cmpq(r12, $mem$$Address);
16392   %}
16393   ins_pipe(ialu_cr_reg_mem);
16394 %}
16395 
16396 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
16397 %{
16398   match(Set cr (CmpN op1 op2));
16399 
16400   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16401   ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
16402   ins_pipe(ialu_cr_reg_reg);
16403 %}
16404 
16405 instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
16406 %{
16407   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16408   match(Set cr (CmpN src (LoadN mem)));
16409 
16410   format %{ "cmpl    $src, $mem\t# compressed ptr" %}
16411   ins_encode %{
16412     __ cmpl($src$$Register, $mem$$Address);
16413   %}
16414   ins_pipe(ialu_cr_reg_mem);
16415 %}
16416 
16417 instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
16418   match(Set cr (CmpN op1 op2));
16419 
16420   format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
16421   ins_encode %{
16422     __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
16423   %}
16424   ins_pipe(ialu_cr_reg_imm);
16425 %}
16426 
16427 instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
16428 %{
16429   predicate(n->in(2)->as_Load()->barrier_data() == 0);
16430   match(Set cr (CmpN src (LoadN mem)));
16431 
16432   format %{ "cmpl    $mem, $src\t# compressed ptr" %}
16433   ins_encode %{
16434     __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
16435   %}
16436   ins_pipe(ialu_cr_reg_mem);
16437 %}
16438 
16439 instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
16440   match(Set cr (CmpN op1 op2));
16441 
16442   format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
16443   ins_encode %{
16444     __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
16445   %}
16446   ins_pipe(ialu_cr_reg_imm);
16447 %}
16448 
16449 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
16450 %{
16451   predicate(!UseCompactObjectHeaders);
16452   match(Set cr (CmpN src (LoadNKlass mem)));
16453 
16454   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
16455   ins_encode %{
16456     __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
16457   %}
16458   ins_pipe(ialu_cr_reg_mem);
16459 %}
16460 
16461 instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
16462   match(Set cr (CmpN src zero));
16463 
16464   format %{ "testl   $src, $src\t# compressed ptr" %}
16465   ins_encode %{ __ testl($src$$Register, $src$$Register); %}
16466   ins_pipe(ialu_cr_reg_imm);
16467 %}
16468 
16469 instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
16470 %{
16471   predicate(CompressedOops::base() != nullptr &&
16472             n->in(1)->as_Load()->barrier_data() == 0);
16473   match(Set cr (CmpN (LoadN mem) zero));
16474 
16475   ins_cost(500); // XXX
16476   format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
16477   ins_encode %{
16478     __ cmpl($mem$$Address, (int)0xFFFFFFFF);
16479   %}
16480   ins_pipe(ialu_cr_reg_mem);
16481 %}
16482 
16483 instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
16484 %{
16485   predicate(CompressedOops::base() == nullptr &&
16486             n->in(1)->as_Load()->barrier_data() == 0);
16487   match(Set cr (CmpN (LoadN mem) zero));
16488 
16489   format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
16490   ins_encode %{
16491     __ cmpl(r12, $mem$$Address);
16492   %}
16493   ins_pipe(ialu_cr_reg_mem);
16494 %}
16495 
16496 // Yanked all unsigned pointer compare operations.
16497 // Pointer compares are done with CmpP which is already unsigned.
16498 
16499 instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
16500 %{
16501   match(Set cr (CmpL op1 op2));
16502 
16503   format %{ "cmpq    $op1, $op2" %}
16504   ins_encode %{
16505     __ cmpq($op1$$Register, $op2$$Register);
16506   %}
16507   ins_pipe(ialu_cr_reg_reg);
16508 %}
16509 
16510 instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
16511 %{
16512   match(Set cr (CmpL op1 op2));
16513 
16514   format %{ "cmpq    $op1, $op2" %}
16515   ins_encode %{
16516     __ cmpq($op1$$Register, $op2$$constant);
16517   %}
16518   ins_pipe(ialu_cr_reg_imm);
16519 %}
16520 
16521 instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
16522 %{
16523   match(Set cr (CmpL op1 (LoadL op2)));
16524 
16525   format %{ "cmpq    $op1, $op2" %}
16526   ins_encode %{
16527     __ cmpq($op1$$Register, $op2$$Address);
16528   %}
16529   ins_pipe(ialu_cr_reg_mem);
16530 %}
16531 
16532 instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
16533 %{
16534   match(Set cr (CmpL src zero));
16535 
16536   format %{ "testq   $src, $src" %}
16537   ins_encode %{
16538     __ testq($src$$Register, $src$$Register);
16539   %}
16540   ins_pipe(ialu_cr_reg_imm);
16541 %}
16542 
16543 instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
16544 %{
16545   match(Set cr (CmpL (AndL src con) zero));
16546 
16547   format %{ "testq   $src, $con\t# long" %}
16548   ins_encode %{
16549     __ testq($src$$Register, $con$$constant);
16550   %}
16551   ins_pipe(ialu_cr_reg_imm);
16552 %}
16553 
16554 instruct testL_reg_reg(rFlagsReg cr, rRegL src1, rRegL src2, immL0 zero)
16555 %{
16556   match(Set cr (CmpL (AndL src1 src2) zero));
16557 
16558   format %{ "testq   $src1, $src2\t# long" %}
16559   ins_encode %{
16560     __ testq($src1$$Register, $src2$$Register);
16561   %}
16562   ins_pipe(ialu_cr_reg_imm);
16563 %}
16564 
16565 instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
16566 %{
16567   match(Set cr (CmpL (AndL src (LoadL mem)) zero));
16568 
16569   format %{ "testq   $src, $mem" %}
16570   ins_encode %{
16571     __ testq($src$$Register, $mem$$Address);
16572   %}
16573   ins_pipe(ialu_cr_reg_mem);
16574 %}
16575 
16576 instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
16577 %{
16578   match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
16579 
16580   format %{ "testq   $src, $mem" %}
16581   ins_encode %{
16582     __ testq($src$$Register, $mem$$Address);
16583   %}
16584   ins_pipe(ialu_cr_reg_mem);
16585 %}
16586 
16587 // Manifest a CmpU result in an integer register.  Very painful.
16588 // This is the test to avoid.
16589 instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
16590 %{
16591   match(Set dst (CmpU3 src1 src2));
16592   effect(KILL flags);
16593 
16594   ins_cost(275); // XXX
16595   format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
16596             "movl    $dst, -1\n\t"
16597             "jb,u    done\n\t"
16598             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16599     "done:" %}
16600   ins_encode %{
16601     Label done;
16602     __ cmpl($src1$$Register, $src2$$Register);
16603     __ movl($dst$$Register, -1);
16604     __ jccb(Assembler::below, done);
16605     __ setcc(Assembler::notZero, $dst$$Register);
16606     __ bind(done);
16607   %}
16608   ins_pipe(pipe_slow);
16609 %}
16610 
16611 // Manifest a CmpL result in an integer register.  Very painful.
16612 // This is the test to avoid.
16613 instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16614 %{
16615   match(Set dst (CmpL3 src1 src2));
16616   effect(KILL flags);
16617 
16618   ins_cost(275); // XXX
16619   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16620             "movl    $dst, -1\n\t"
16621             "jl,s    done\n\t"
16622             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16623     "done:" %}
16624   ins_encode %{
16625     Label done;
16626     __ cmpq($src1$$Register, $src2$$Register);
16627     __ movl($dst$$Register, -1);
16628     __ jccb(Assembler::less, done);
16629     __ setcc(Assembler::notZero, $dst$$Register);
16630     __ bind(done);
16631   %}
16632   ins_pipe(pipe_slow);
16633 %}
16634 
16635 // Manifest a CmpUL result in an integer register.  Very painful.
16636 // This is the test to avoid.
16637 instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
16638 %{
16639   match(Set dst (CmpUL3 src1 src2));
16640   effect(KILL flags);
16641 
16642   ins_cost(275); // XXX
16643   format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
16644             "movl    $dst, -1\n\t"
16645             "jb,u    done\n\t"
16646             "setcc   $dst \t# emits setne + movzbl or setzune for APX"
16647     "done:" %}
16648   ins_encode %{
16649     Label done;
16650     __ cmpq($src1$$Register, $src2$$Register);
16651     __ movl($dst$$Register, -1);
16652     __ jccb(Assembler::below, done);
16653     __ setcc(Assembler::notZero, $dst$$Register);
16654     __ bind(done);
16655   %}
16656   ins_pipe(pipe_slow);
16657 %}
16658 
16659 // Unsigned long compare Instructions; really, same as signed long except they
16660 // produce an rFlagsRegU instead of rFlagsReg.
16661 instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
16662 %{
16663   match(Set cr (CmpUL op1 op2));
16664 
16665   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16666   ins_encode %{
16667     __ cmpq($op1$$Register, $op2$$Register);
16668   %}
16669   ins_pipe(ialu_cr_reg_reg);
16670 %}
16671 
16672 instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
16673 %{
16674   match(Set cr (CmpUL op1 op2));
16675 
16676   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16677   ins_encode %{
16678     __ cmpq($op1$$Register, $op2$$constant);
16679   %}
16680   ins_pipe(ialu_cr_reg_imm);
16681 %}
16682 
16683 instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
16684 %{
16685   match(Set cr (CmpUL op1 (LoadL op2)));
16686 
16687   format %{ "cmpq    $op1, $op2\t# unsigned" %}
16688   ins_encode %{
16689     __ cmpq($op1$$Register, $op2$$Address);
16690   %}
16691   ins_pipe(ialu_cr_reg_mem);
16692 %}
16693 
16694 instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
16695 %{
16696   match(Set cr (CmpUL src zero));
16697 
16698   format %{ "testq   $src, $src\t# unsigned" %}
16699   ins_encode %{
16700     __ testq($src$$Register, $src$$Register);
16701   %}
16702   ins_pipe(ialu_cr_reg_imm);
16703 %}
16704 
16705 instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
16706 %{
16707   match(Set cr (CmpI (LoadB mem) imm));
16708 
16709   ins_cost(125);
16710   format %{ "cmpb    $mem, $imm" %}
16711   ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
16712   ins_pipe(ialu_cr_reg_mem);
16713 %}
16714 
16715 instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
16716 %{
16717   match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
16718 
16719   ins_cost(125);
16720   format %{ "testb   $mem, $imm\t# ubyte" %}
16721   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16722   ins_pipe(ialu_cr_reg_mem);
16723 %}
16724 
16725 instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
16726 %{
16727   match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
16728 
16729   ins_cost(125);
16730   format %{ "testb   $mem, $imm\t# byte" %}
16731   ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
16732   ins_pipe(ialu_cr_reg_mem);
16733 %}
16734 
16735 //----------Max and Min--------------------------------------------------------
16736 // Min Instructions
16737 
16738 instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
16739 %{
16740   predicate(!UseAPX);
16741   effect(USE_DEF dst, USE src, USE cr);
16742 
16743   format %{ "cmovlgt $dst, $src\t# min" %}
16744   ins_encode %{
16745     __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
16746   %}
16747   ins_pipe(pipe_cmov_reg);
16748 %}
16749 
16750 instruct cmovI_reg_g_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16751 %{
16752   predicate(UseAPX);
16753   effect(DEF dst, USE src1, USE src2, USE cr);
16754 
16755   format %{ "ecmovlgt $dst, $src1, $src2\t# min ndd" %}
16756   ins_encode %{
16757     __ ecmovl(Assembler::greater, $dst$$Register, $src1$$Register, $src2$$Register);
16758   %}
16759   ins_pipe(pipe_cmov_reg);
16760 %}
16761 
16762 instruct minI_rReg(rRegI dst, rRegI src)
16763 %{
16764   predicate(!UseAPX);
16765   match(Set dst (MinI dst src));
16766 
16767   ins_cost(200);
16768   expand %{
16769     rFlagsReg cr;
16770     compI_rReg(cr, dst, src);
16771     cmovI_reg_g(dst, src, cr);
16772   %}
16773 %}
16774 
16775 instruct minI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16776 %{
16777   predicate(UseAPX);
16778   match(Set dst (MinI src1 src2));
16779   effect(DEF dst, USE src1, USE src2);
16780   flag(PD::Flag_ndd_demotable_opr1);
16781 
16782   ins_cost(200);
16783   expand %{
16784     rFlagsReg cr;
16785     compI_rReg(cr, src1, src2);
16786     cmovI_reg_g_ndd(dst, src1, src2, cr);
16787   %}
16788 %}
16789 
16790 instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
16791 %{
16792   predicate(!UseAPX);
16793   effect(USE_DEF dst, USE src, USE cr);
16794 
16795   format %{ "cmovllt $dst, $src\t# max" %}
16796   ins_encode %{
16797     __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
16798   %}
16799   ins_pipe(pipe_cmov_reg);
16800 %}
16801 
16802 instruct cmovI_reg_l_ndd(rRegI dst, rRegI src1, rRegI src2, rFlagsReg cr)
16803 %{
16804   predicate(UseAPX);
16805   effect(DEF dst, USE src1, USE src2, USE cr);
16806 
16807   format %{ "ecmovllt $dst, $src1, $src2\t# max ndd" %}
16808   ins_encode %{
16809     __ ecmovl(Assembler::less, $dst$$Register, $src1$$Register, $src2$$Register);
16810   %}
16811   ins_pipe(pipe_cmov_reg);
16812 %}
16813 
16814 instruct maxI_rReg(rRegI dst, rRegI src)
16815 %{
16816   predicate(!UseAPX);
16817   match(Set dst (MaxI dst src));
16818 
16819   ins_cost(200);
16820   expand %{
16821     rFlagsReg cr;
16822     compI_rReg(cr, dst, src);
16823     cmovI_reg_l(dst, src, cr);
16824   %}
16825 %}
16826 
16827 instruct maxI_rReg_ndd(rRegI dst, rRegI src1, rRegI src2)
16828 %{
16829   predicate(UseAPX);
16830   match(Set dst (MaxI src1 src2));
16831   effect(DEF dst, USE src1, USE src2);
16832   flag(PD::Flag_ndd_demotable_opr1);
16833 
16834   ins_cost(200);
16835   expand %{
16836     rFlagsReg cr;
16837     compI_rReg(cr, src1, src2);
16838     cmovI_reg_l_ndd(dst, src1, src2, cr);
16839   %}
16840 %}
16841 
16842 // ============================================================================
16843 // Branch Instructions
16844 
16845 // Jump Direct - Label defines a relative address from JMP+1
16846 instruct jmpDir(label labl)
16847 %{
16848   match(Goto);
16849   effect(USE labl);
16850 
16851   ins_cost(300);
16852   format %{ "jmp     $labl" %}
16853   size(5);
16854   ins_encode %{
16855     Label* L = $labl$$label;
16856     __ jmp(*L, false); // Always long jump
16857   %}
16858   ins_pipe(pipe_jmp);
16859 %}
16860 
16861 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16862 instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
16863 %{
16864   match(If cop cr);
16865   effect(USE labl);
16866 
16867   ins_cost(300);
16868   format %{ "j$cop     $labl" %}
16869   size(6);
16870   ins_encode %{
16871     Label* L = $labl$$label;
16872     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16873   %}
16874   ins_pipe(pipe_jcc);
16875 %}
16876 
16877 // Jump Direct Conditional - Label defines a relative address from Jcc+1
16878 instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
16879 %{
16880   match(CountedLoopEnd cop cr);
16881   effect(USE labl);
16882 
16883   ins_cost(300);
16884   format %{ "j$cop     $labl\t# loop end" %}
16885   size(6);
16886   ins_encode %{
16887     Label* L = $labl$$label;
16888     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16889   %}
16890   ins_pipe(pipe_jcc);
16891 %}
16892 
16893 // Jump Direct Conditional - using unsigned comparison
16894 instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
16895   match(If cop cmp);
16896   effect(USE labl);
16897 
16898   ins_cost(300);
16899   format %{ "j$cop,u   $labl" %}
16900   size(6);
16901   ins_encode %{
16902     Label* L = $labl$$label;
16903     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16904   %}
16905   ins_pipe(pipe_jcc);
16906 %}
16907 
16908 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
16909   match(If cop cmp);
16910   effect(USE labl);
16911 
16912   ins_cost(200);
16913   format %{ "j$cop,u   $labl" %}
16914   size(6);
16915   ins_encode %{
16916     Label* L = $labl$$label;
16917     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16918   %}
16919   ins_pipe(pipe_jcc);
16920 %}
16921 
16922 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
16923   match(If cop cmp);
16924   effect(USE labl);
16925 
16926   ins_cost(200);
16927   format %{ $$template
16928     if ($cop$$cmpcode == Assembler::notEqual) {
16929       $$emit$$"jp,u    $labl\n\t"
16930       $$emit$$"j$cop,u   $labl"
16931     } else {
16932       $$emit$$"jp,u    done\n\t"
16933       $$emit$$"j$cop,u   $labl\n\t"
16934       $$emit$$"done:"
16935     }
16936   %}
16937   ins_encode %{
16938     Label* l = $labl$$label;
16939     if ($cop$$cmpcode == Assembler::notEqual) {
16940       __ jcc(Assembler::parity, *l, false);
16941       __ jcc(Assembler::notEqual, *l, false);
16942     } else if ($cop$$cmpcode == Assembler::equal) {
16943       Label done;
16944       __ jccb(Assembler::parity, done);
16945       __ jcc(Assembler::equal, *l, false);
16946       __ bind(done);
16947     } else {
16948        ShouldNotReachHere();
16949     }
16950   %}
16951   ins_pipe(pipe_jcc);
16952 %}
16953 
16954 // Jump Direct Conditional - using signed and unsigned comparison
16955 instruct jmpConUCFE(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
16956   match(If cop cmp);
16957   effect(USE labl);
16958 
16959   ins_cost(200);
16960   format %{ "j$cop,su   $labl" %}
16961   size(6);
16962   ins_encode %{
16963     Label* L = $labl$$label;
16964     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
16965   %}
16966   ins_pipe(pipe_jcc);
16967 %}
16968 
16969 // ============================================================================
16970 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
16971 // superklass array for an instance of the superklass.  Set a hidden
16972 // internal cache on a hit (cache is checked with exposed code in
16973 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16974 // encoding ALSO sets flags.
16975 
16976 instruct partialSubtypeCheck(rdi_RegP result,
16977                              rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
16978                              rFlagsReg cr)
16979 %{
16980   match(Set result (PartialSubtypeCheck sub super));
16981   predicate(!UseSecondarySupersTable);
16982   effect(KILL rcx, KILL cr);
16983 
16984   ins_cost(1100);  // slightly larger than the next version
16985   format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
16986             "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
16987             "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
16988             "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
16989             "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
16990             "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
16991             "xorq    $result, $result\t\t Hit: rdi zero\n\t"
16992     "miss:\t" %}
16993 
16994   ins_encode %{
16995     Label miss;
16996     // NB: Callers may assume that, when $result is a valid register,
16997     // check_klass_subtype_slow_path_linear sets it to a nonzero
16998     // value.
16999     __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register,
17000                                             $rcx$$Register, $result$$Register,
17001                                             nullptr, &miss,
17002                                             /*set_cond_codes:*/ true);
17003     __ xorptr($result$$Register, $result$$Register);
17004     __ bind(miss);
17005   %}
17006 
17007   ins_pipe(pipe_slow);
17008 %}
17009 
17010 // ============================================================================
17011 // Two versions of hashtable-based partialSubtypeCheck, both used when
17012 // we need to search for a super class in the secondary supers array.
17013 // The first is used when we don't know _a priori_ the class being
17014 // searched for. The second, far more common, is used when we do know:
17015 // this is used for instanceof, checkcast, and any case where C2 can
17016 // determine it by constant propagation.
17017 
17018 instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result,
17019                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17020                                        rFlagsReg cr)
17021 %{
17022   match(Set result (PartialSubtypeCheck sub super));
17023   predicate(UseSecondarySupersTable);
17024   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17025 
17026   ins_cost(1000);
17027   format %{ "partialSubtypeCheck $result, $sub, $super" %}
17028 
17029   ins_encode %{
17030     __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register,
17031 					 $temp3$$Register, $temp4$$Register, $result$$Register);
17032   %}
17033 
17034   ins_pipe(pipe_slow);
17035 %}
17036 
17037 instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result,
17038                                        rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4,
17039                                        rFlagsReg cr)
17040 %{
17041   match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
17042   predicate(UseSecondarySupersTable);
17043   effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4);
17044 
17045   ins_cost(700);  // smaller than the next version
17046   format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
17047 
17048   ins_encode %{
17049     u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
17050     if (InlineSecondarySupersTest) {
17051       __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register,
17052                                        $temp3$$Register, $temp4$$Register, $result$$Register,
17053                                        super_klass_slot);
17054     } else {
17055       __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
17056     }
17057   %}
17058 
17059   ins_pipe(pipe_slow);
17060 %}
17061 
17062 // ============================================================================
17063 // Branch Instructions -- short offset versions
17064 //
17065 // These instructions are used to replace jumps of a long offset (the default
17066 // match) with jumps of a shorter offset.  These instructions are all tagged
17067 // with the ins_short_branch attribute, which causes the ADLC to suppress the
17068 // match rules in general matching.  Instead, the ADLC generates a conversion
17069 // method in the MachNode which can be used to do in-place replacement of the
17070 // long variant with the shorter variant.  The compiler will determine if a
17071 // branch can be taken by the is_short_branch_offset() predicate in the machine
17072 // specific code section of the file.
17073 
17074 // Jump Direct - Label defines a relative address from JMP+1
17075 instruct jmpDir_short(label labl) %{
17076   match(Goto);
17077   effect(USE labl);
17078 
17079   ins_cost(300);
17080   format %{ "jmp,s   $labl" %}
17081   size(2);
17082   ins_encode %{
17083     Label* L = $labl$$label;
17084     __ jmpb(*L);
17085   %}
17086   ins_pipe(pipe_jmp);
17087   ins_short_branch(1);
17088 %}
17089 
17090 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17091 instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
17092   match(If cop cr);
17093   effect(USE labl);
17094 
17095   ins_cost(300);
17096   format %{ "j$cop,s   $labl" %}
17097   size(2);
17098   ins_encode %{
17099     Label* L = $labl$$label;
17100     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17101   %}
17102   ins_pipe(pipe_jcc);
17103   ins_short_branch(1);
17104 %}
17105 
17106 // Jump Direct Conditional - Label defines a relative address from Jcc+1
17107 instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
17108   match(CountedLoopEnd cop cr);
17109   effect(USE labl);
17110 
17111   ins_cost(300);
17112   format %{ "j$cop,s   $labl\t# loop end" %}
17113   size(2);
17114   ins_encode %{
17115     Label* L = $labl$$label;
17116     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17117   %}
17118   ins_pipe(pipe_jcc);
17119   ins_short_branch(1);
17120 %}
17121 
17122 // Jump Direct Conditional - using unsigned comparison
17123 instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
17124   match(If cop cmp);
17125   effect(USE labl);
17126 
17127   ins_cost(300);
17128   format %{ "j$cop,us  $labl" %}
17129   size(2);
17130   ins_encode %{
17131     Label* L = $labl$$label;
17132     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17133   %}
17134   ins_pipe(pipe_jcc);
17135   ins_short_branch(1);
17136 %}
17137 
17138 instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
17139   match(If cop cmp);
17140   effect(USE labl);
17141 
17142   ins_cost(300);
17143   format %{ "j$cop,us  $labl" %}
17144   size(2);
17145   ins_encode %{
17146     Label* L = $labl$$label;
17147     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17148   %}
17149   ins_pipe(pipe_jcc);
17150   ins_short_branch(1);
17151 %}
17152 
17153 instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
17154   match(If cop cmp);
17155   effect(USE labl);
17156 
17157   ins_cost(300);
17158   format %{ $$template
17159     if ($cop$$cmpcode == Assembler::notEqual) {
17160       $$emit$$"jp,u,s  $labl\n\t"
17161       $$emit$$"j$cop,u,s  $labl"
17162     } else {
17163       $$emit$$"jp,u,s  done\n\t"
17164       $$emit$$"j$cop,u,s  $labl\n\t"
17165       $$emit$$"done:"
17166     }
17167   %}
17168   size(4);
17169   ins_encode %{
17170     Label* l = $labl$$label;
17171     if ($cop$$cmpcode == Assembler::notEqual) {
17172       __ jccb(Assembler::parity, *l);
17173       __ jccb(Assembler::notEqual, *l);
17174     } else if ($cop$$cmpcode == Assembler::equal) {
17175       Label done;
17176       __ jccb(Assembler::parity, done);
17177       __ jccb(Assembler::equal, *l);
17178       __ bind(done);
17179     } else {
17180        ShouldNotReachHere();
17181     }
17182   %}
17183   ins_pipe(pipe_jcc);
17184   ins_short_branch(1);
17185 %}
17186 
17187 // Jump Direct Conditional - using signed and unsigned comparison
17188 instruct jmpConUCFE_short(cmpOpUCFE cop, rFlagsRegUCFE cmp, label labl) %{
17189   match(If cop cmp);
17190   effect(USE labl);
17191 
17192   ins_cost(300);
17193   format %{ "j$cop,sus  $labl" %}
17194   size(2);
17195   ins_encode %{
17196     Label* L = $labl$$label;
17197     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
17198   %}
17199   ins_pipe(pipe_jcc);
17200   ins_short_branch(1);
17201 %}
17202 
17203 // ============================================================================
17204 // inlined locking and unlocking
17205 
17206 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
17207   match(Set cr (FastLock object box));
17208   effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
17209   ins_cost(300);
17210   format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
17211   ins_encode %{
17212     __ fast_lock($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17213   %}
17214   ins_pipe(pipe_slow);
17215 %}
17216 
17217 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
17218   match(Set cr (FastUnlock object rax_reg));
17219   effect(TEMP tmp, USE_KILL rax_reg);
17220   ins_cost(300);
17221   format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
17222   ins_encode %{
17223     __ fast_unlock($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
17224   %}
17225   ins_pipe(pipe_slow);
17226 %}
17227 
17228 
17229 // ============================================================================
17230 // Safepoint Instructions
17231 instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
17232 %{
17233   match(SafePoint poll);
17234   effect(KILL cr, USE poll);
17235 
17236   format %{ "testl   rax, [$poll]\t"
17237             "# Safepoint: poll for GC" %}
17238   ins_cost(125);
17239   ins_encode %{
17240     __ relocate(relocInfo::poll_type);
17241     address pre_pc = __ pc();
17242     __ testl(rax, Address($poll$$Register, 0));
17243     assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
17244   %}
17245   ins_pipe(ialu_reg_mem);
17246 %}
17247 
17248 instruct mask_all_evexL(kReg dst, rRegL src) %{
17249   match(Set dst (MaskAll src));
17250   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
17251   ins_encode %{
17252     int mask_len = Matcher::vector_length(this);
17253     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
17254   %}
17255   ins_pipe( pipe_slow );
17256 %}
17257 
17258 instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
17259   predicate(Matcher::vector_length(n) > 32);
17260   match(Set dst (MaskAll src));
17261   effect(TEMP tmp);
17262   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
17263   ins_encode %{
17264     int mask_len = Matcher::vector_length(this);
17265     __ movslq($tmp$$Register, $src$$Register);
17266     __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
17267   %}
17268   ins_pipe( pipe_slow );
17269 %}
17270 
17271 // ============================================================================
17272 // Procedure Call/Return Instructions
17273 // Call Java Static Instruction
17274 // Note: If this code changes, the corresponding ret_addr_offset() and
17275 //       compute_padding() functions will have to be adjusted.
17276 instruct CallStaticJavaDirect(method meth) %{
17277   match(CallStaticJava);
17278   effect(USE meth);
17279 
17280   ins_cost(300);
17281   format %{ "call,static " %}
17282   opcode(0xE8); /* E8 cd */
17283   ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
17284   ins_pipe(pipe_slow);
17285   ins_alignment(4);
17286 %}
17287 
17288 // Call Java Dynamic Instruction
17289 // Note: If this code changes, the corresponding ret_addr_offset() and
17290 //       compute_padding() functions will have to be adjusted.
17291 instruct CallDynamicJavaDirect(method meth)
17292 %{
17293   match(CallDynamicJava);
17294   effect(USE meth);
17295 
17296   ins_cost(300);
17297   format %{ "movq    rax, #Universe::non_oop_word()\n\t"
17298             "call,dynamic " %}
17299   ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
17300   ins_pipe(pipe_slow);
17301   ins_alignment(4);
17302 %}
17303 
17304 // Call Runtime Instruction
17305 instruct CallRuntimeDirect(method meth)
17306 %{
17307   match(CallRuntime);
17308   effect(USE meth);
17309 
17310   ins_cost(300);
17311   format %{ "call,runtime " %}
17312   ins_encode(clear_avx, Java_To_Runtime(meth));
17313   ins_pipe(pipe_slow);
17314 %}
17315 
17316 // Call runtime without safepoint
17317 instruct CallLeafDirect(method meth)
17318 %{
17319   match(CallLeaf);
17320   effect(USE meth);
17321 
17322   ins_cost(300);
17323   format %{ "call_leaf,runtime " %}
17324   ins_encode(clear_avx, Java_To_Runtime(meth));
17325   ins_pipe(pipe_slow);
17326 %}
17327 
17328 // Call runtime without safepoint and with vector arguments
17329 instruct CallLeafDirectVector(method meth)
17330 %{
17331   match(CallLeafVector);
17332   effect(USE meth);
17333 
17334   ins_cost(300);
17335   format %{ "call_leaf,vector " %}
17336   ins_encode(Java_To_Runtime(meth));
17337   ins_pipe(pipe_slow);
17338 %}
17339 
17340 // Call runtime without safepoint
17341 instruct CallLeafNoFPDirect(method meth)
17342 %{
17343   match(CallLeafNoFP);
17344   effect(USE meth);
17345 
17346   ins_cost(300);
17347   format %{ "call_leaf_nofp,runtime " %}
17348   ins_encode(clear_avx, Java_To_Runtime(meth));
17349   ins_pipe(pipe_slow);
17350 %}
17351 
17352 // Return Instruction
17353 // Remove the return address & jump to it.
17354 // Notice: We always emit a nop after a ret to make sure there is room
17355 // for safepoint patching
17356 instruct Ret()
17357 %{
17358   match(Return);
17359 
17360   format %{ "ret" %}
17361   ins_encode %{
17362     __ ret(0);
17363   %}
17364   ins_pipe(pipe_jmp);
17365 %}
17366 
17367 // Tail Call; Jump from runtime stub to Java code.
17368 // Also known as an 'interprocedural jump'.
17369 // Target of jump will eventually return to caller.
17370 // TailJump below removes the return address.
17371 // Don't use rbp for 'jump_target' because a MachEpilogNode has already been
17372 // emitted just above the TailCall which has reset rbp to the caller state.
17373 instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
17374 %{
17375   match(TailCall jump_target method_ptr);
17376 
17377   ins_cost(300);
17378   format %{ "jmp     $jump_target\t# rbx holds method" %}
17379   ins_encode %{
17380     __ jmp($jump_target$$Register);
17381   %}
17382   ins_pipe(pipe_jmp);
17383 %}
17384 
17385 // Tail Jump; remove the return address; jump to target.
17386 // TailCall above leaves the return address around.
17387 instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
17388 %{
17389   match(TailJump jump_target ex_oop);
17390 
17391   ins_cost(300);
17392   format %{ "popq    rdx\t# pop return address\n\t"
17393             "jmp     $jump_target" %}
17394   ins_encode %{
17395     __ popq(as_Register(RDX_enc));
17396     __ jmp($jump_target$$Register);
17397   %}
17398   ins_pipe(pipe_jmp);
17399 %}
17400 
17401 // Forward exception.
17402 instruct ForwardExceptionjmp()
17403 %{
17404   match(ForwardException);
17405 
17406   format %{ "jmp     forward_exception_stub" %}
17407   ins_encode %{
17408     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
17409   %}
17410   ins_pipe(pipe_jmp);
17411 %}
17412 
17413 // Create exception oop: created by stack-crawling runtime code.
17414 // Created exception is now available to this handler, and is setup
17415 // just prior to jumping to this handler.  No code emitted.
17416 instruct CreateException(rax_RegP ex_oop)
17417 %{
17418   match(Set ex_oop (CreateEx));
17419 
17420   size(0);
17421   // use the following format syntax
17422   format %{ "# exception oop is in rax; no code emitted" %}
17423   ins_encode();
17424   ins_pipe(empty);
17425 %}
17426 
17427 // Rethrow exception:
17428 // The exception oop will come in the first argument position.
17429 // Then JUMP (not call) to the rethrow stub code.
17430 instruct RethrowException()
17431 %{
17432   match(Rethrow);
17433 
17434   // use the following format syntax
17435   format %{ "jmp     rethrow_stub" %}
17436   ins_encode %{
17437     __ jump(RuntimeAddress(OptoRuntime::rethrow_stub()), noreg);
17438   %}
17439   ins_pipe(pipe_jmp);
17440 %}
17441 
17442 // ============================================================================
17443 // This name is KNOWN by the ADLC and cannot be changed.
17444 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
17445 // for this guy.
17446 instruct tlsLoadP(r15_RegP dst) %{
17447   match(Set dst (ThreadLocal));
17448   effect(DEF dst);
17449 
17450   size(0);
17451   format %{ "# TLS is in R15" %}
17452   ins_encode( /*empty encoding*/ );
17453   ins_pipe(ialu_reg_reg);
17454 %}
17455 
17456 instruct addF_reg(regF dst, regF src) %{
17457   predicate(UseAVX == 0);
17458   match(Set dst (AddF dst src));
17459 
17460   format %{ "addss   $dst, $src" %}
17461   ins_cost(150);
17462   ins_encode %{
17463     __ addss($dst$$XMMRegister, $src$$XMMRegister);
17464   %}
17465   ins_pipe(pipe_slow);
17466 %}
17467 
17468 instruct addF_mem(regF dst, memory src) %{
17469   predicate(UseAVX == 0);
17470   match(Set dst (AddF dst (LoadF src)));
17471 
17472   format %{ "addss   $dst, $src" %}
17473   ins_cost(150);
17474   ins_encode %{
17475     __ addss($dst$$XMMRegister, $src$$Address);
17476   %}
17477   ins_pipe(pipe_slow);
17478 %}
17479 
17480 instruct addF_imm(regF dst, immF con) %{
17481   predicate(UseAVX == 0);
17482   match(Set dst (AddF dst con));
17483   format %{ "addss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17484   ins_cost(150);
17485   ins_encode %{
17486     __ addss($dst$$XMMRegister, $constantaddress($con));
17487   %}
17488   ins_pipe(pipe_slow);
17489 %}
17490 
17491 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
17492   predicate(UseAVX > 0);
17493   match(Set dst (AddF src1 src2));
17494 
17495   format %{ "vaddss  $dst, $src1, $src2" %}
17496   ins_cost(150);
17497   ins_encode %{
17498     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17499   %}
17500   ins_pipe(pipe_slow);
17501 %}
17502 
17503 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
17504   predicate(UseAVX > 0);
17505   match(Set dst (AddF src1 (LoadF src2)));
17506 
17507   format %{ "vaddss  $dst, $src1, $src2" %}
17508   ins_cost(150);
17509   ins_encode %{
17510     __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17511   %}
17512   ins_pipe(pipe_slow);
17513 %}
17514 
17515 instruct addF_reg_imm(regF dst, regF src, immF con) %{
17516   predicate(UseAVX > 0);
17517   match(Set dst (AddF src con));
17518 
17519   format %{ "vaddss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17520   ins_cost(150);
17521   ins_encode %{
17522     __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17523   %}
17524   ins_pipe(pipe_slow);
17525 %}
17526 
17527 instruct addD_reg(regD dst, regD src) %{
17528   predicate(UseAVX == 0);
17529   match(Set dst (AddD dst src));
17530 
17531   format %{ "addsd   $dst, $src" %}
17532   ins_cost(150);
17533   ins_encode %{
17534     __ addsd($dst$$XMMRegister, $src$$XMMRegister);
17535   %}
17536   ins_pipe(pipe_slow);
17537 %}
17538 
17539 instruct addD_mem(regD dst, memory src) %{
17540   predicate(UseAVX == 0);
17541   match(Set dst (AddD dst (LoadD src)));
17542 
17543   format %{ "addsd   $dst, $src" %}
17544   ins_cost(150);
17545   ins_encode %{
17546     __ addsd($dst$$XMMRegister, $src$$Address);
17547   %}
17548   ins_pipe(pipe_slow);
17549 %}
17550 
17551 instruct addD_imm(regD dst, immD con) %{
17552   predicate(UseAVX == 0);
17553   match(Set dst (AddD dst con));
17554   format %{ "addsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17555   ins_cost(150);
17556   ins_encode %{
17557     __ addsd($dst$$XMMRegister, $constantaddress($con));
17558   %}
17559   ins_pipe(pipe_slow);
17560 %}
17561 
17562 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
17563   predicate(UseAVX > 0);
17564   match(Set dst (AddD src1 src2));
17565 
17566   format %{ "vaddsd  $dst, $src1, $src2" %}
17567   ins_cost(150);
17568   ins_encode %{
17569     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17570   %}
17571   ins_pipe(pipe_slow);
17572 %}
17573 
17574 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
17575   predicate(UseAVX > 0);
17576   match(Set dst (AddD src1 (LoadD src2)));
17577 
17578   format %{ "vaddsd  $dst, $src1, $src2" %}
17579   ins_cost(150);
17580   ins_encode %{
17581     __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17582   %}
17583   ins_pipe(pipe_slow);
17584 %}
17585 
17586 instruct addD_reg_imm(regD dst, regD src, immD con) %{
17587   predicate(UseAVX > 0);
17588   match(Set dst (AddD src con));
17589 
17590   format %{ "vaddsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17591   ins_cost(150);
17592   ins_encode %{
17593     __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17594   %}
17595   ins_pipe(pipe_slow);
17596 %}
17597 
17598 instruct subF_reg(regF dst, regF src) %{
17599   predicate(UseAVX == 0);
17600   match(Set dst (SubF dst src));
17601 
17602   format %{ "subss   $dst, $src" %}
17603   ins_cost(150);
17604   ins_encode %{
17605     __ subss($dst$$XMMRegister, $src$$XMMRegister);
17606   %}
17607   ins_pipe(pipe_slow);
17608 %}
17609 
17610 instruct subF_mem(regF dst, memory src) %{
17611   predicate(UseAVX == 0);
17612   match(Set dst (SubF dst (LoadF src)));
17613 
17614   format %{ "subss   $dst, $src" %}
17615   ins_cost(150);
17616   ins_encode %{
17617     __ subss($dst$$XMMRegister, $src$$Address);
17618   %}
17619   ins_pipe(pipe_slow);
17620 %}
17621 
17622 instruct subF_imm(regF dst, immF con) %{
17623   predicate(UseAVX == 0);
17624   match(Set dst (SubF dst con));
17625   format %{ "subss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17626   ins_cost(150);
17627   ins_encode %{
17628     __ subss($dst$$XMMRegister, $constantaddress($con));
17629   %}
17630   ins_pipe(pipe_slow);
17631 %}
17632 
17633 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
17634   predicate(UseAVX > 0);
17635   match(Set dst (SubF src1 src2));
17636 
17637   format %{ "vsubss  $dst, $src1, $src2" %}
17638   ins_cost(150);
17639   ins_encode %{
17640     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17641   %}
17642   ins_pipe(pipe_slow);
17643 %}
17644 
17645 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
17646   predicate(UseAVX > 0);
17647   match(Set dst (SubF src1 (LoadF src2)));
17648 
17649   format %{ "vsubss  $dst, $src1, $src2" %}
17650   ins_cost(150);
17651   ins_encode %{
17652     __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17653   %}
17654   ins_pipe(pipe_slow);
17655 %}
17656 
17657 instruct subF_reg_imm(regF dst, regF src, immF con) %{
17658   predicate(UseAVX > 0);
17659   match(Set dst (SubF src con));
17660 
17661   format %{ "vsubss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17662   ins_cost(150);
17663   ins_encode %{
17664     __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17665   %}
17666   ins_pipe(pipe_slow);
17667 %}
17668 
17669 instruct subD_reg(regD dst, regD src) %{
17670   predicate(UseAVX == 0);
17671   match(Set dst (SubD dst src));
17672 
17673   format %{ "subsd   $dst, $src" %}
17674   ins_cost(150);
17675   ins_encode %{
17676     __ subsd($dst$$XMMRegister, $src$$XMMRegister);
17677   %}
17678   ins_pipe(pipe_slow);
17679 %}
17680 
17681 instruct subD_mem(regD dst, memory src) %{
17682   predicate(UseAVX == 0);
17683   match(Set dst (SubD dst (LoadD src)));
17684 
17685   format %{ "subsd   $dst, $src" %}
17686   ins_cost(150);
17687   ins_encode %{
17688     __ subsd($dst$$XMMRegister, $src$$Address);
17689   %}
17690   ins_pipe(pipe_slow);
17691 %}
17692 
17693 instruct subD_imm(regD dst, immD con) %{
17694   predicate(UseAVX == 0);
17695   match(Set dst (SubD dst con));
17696   format %{ "subsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17697   ins_cost(150);
17698   ins_encode %{
17699     __ subsd($dst$$XMMRegister, $constantaddress($con));
17700   %}
17701   ins_pipe(pipe_slow);
17702 %}
17703 
17704 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
17705   predicate(UseAVX > 0);
17706   match(Set dst (SubD src1 src2));
17707 
17708   format %{ "vsubsd  $dst, $src1, $src2" %}
17709   ins_cost(150);
17710   ins_encode %{
17711     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17712   %}
17713   ins_pipe(pipe_slow);
17714 %}
17715 
17716 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
17717   predicate(UseAVX > 0);
17718   match(Set dst (SubD src1 (LoadD src2)));
17719 
17720   format %{ "vsubsd  $dst, $src1, $src2" %}
17721   ins_cost(150);
17722   ins_encode %{
17723     __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17724   %}
17725   ins_pipe(pipe_slow);
17726 %}
17727 
17728 instruct subD_reg_imm(regD dst, regD src, immD con) %{
17729   predicate(UseAVX > 0);
17730   match(Set dst (SubD src con));
17731 
17732   format %{ "vsubsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17733   ins_cost(150);
17734   ins_encode %{
17735     __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17736   %}
17737   ins_pipe(pipe_slow);
17738 %}
17739 
17740 instruct mulF_reg(regF dst, regF src) %{
17741   predicate(UseAVX == 0);
17742   match(Set dst (MulF dst src));
17743 
17744   format %{ "mulss   $dst, $src" %}
17745   ins_cost(150);
17746   ins_encode %{
17747     __ mulss($dst$$XMMRegister, $src$$XMMRegister);
17748   %}
17749   ins_pipe(pipe_slow);
17750 %}
17751 
17752 instruct mulF_mem(regF dst, memory src) %{
17753   predicate(UseAVX == 0);
17754   match(Set dst (MulF dst (LoadF src)));
17755 
17756   format %{ "mulss   $dst, $src" %}
17757   ins_cost(150);
17758   ins_encode %{
17759     __ mulss($dst$$XMMRegister, $src$$Address);
17760   %}
17761   ins_pipe(pipe_slow);
17762 %}
17763 
17764 instruct mulF_imm(regF dst, immF con) %{
17765   predicate(UseAVX == 0);
17766   match(Set dst (MulF dst con));
17767   format %{ "mulss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17768   ins_cost(150);
17769   ins_encode %{
17770     __ mulss($dst$$XMMRegister, $constantaddress($con));
17771   %}
17772   ins_pipe(pipe_slow);
17773 %}
17774 
17775 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
17776   predicate(UseAVX > 0);
17777   match(Set dst (MulF src1 src2));
17778 
17779   format %{ "vmulss  $dst, $src1, $src2" %}
17780   ins_cost(150);
17781   ins_encode %{
17782     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17783   %}
17784   ins_pipe(pipe_slow);
17785 %}
17786 
17787 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
17788   predicate(UseAVX > 0);
17789   match(Set dst (MulF src1 (LoadF src2)));
17790 
17791   format %{ "vmulss  $dst, $src1, $src2" %}
17792   ins_cost(150);
17793   ins_encode %{
17794     __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17795   %}
17796   ins_pipe(pipe_slow);
17797 %}
17798 
17799 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
17800   predicate(UseAVX > 0);
17801   match(Set dst (MulF src con));
17802 
17803   format %{ "vmulss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17804   ins_cost(150);
17805   ins_encode %{
17806     __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17807   %}
17808   ins_pipe(pipe_slow);
17809 %}
17810 
17811 instruct mulD_reg(regD dst, regD src) %{
17812   predicate(UseAVX == 0);
17813   match(Set dst (MulD dst src));
17814 
17815   format %{ "mulsd   $dst, $src" %}
17816   ins_cost(150);
17817   ins_encode %{
17818     __ mulsd($dst$$XMMRegister, $src$$XMMRegister);
17819   %}
17820   ins_pipe(pipe_slow);
17821 %}
17822 
17823 instruct mulD_mem(regD dst, memory src) %{
17824   predicate(UseAVX == 0);
17825   match(Set dst (MulD dst (LoadD src)));
17826 
17827   format %{ "mulsd   $dst, $src" %}
17828   ins_cost(150);
17829   ins_encode %{
17830     __ mulsd($dst$$XMMRegister, $src$$Address);
17831   %}
17832   ins_pipe(pipe_slow);
17833 %}
17834 
17835 instruct mulD_imm(regD dst, immD con) %{
17836   predicate(UseAVX == 0);
17837   match(Set dst (MulD dst con));
17838   format %{ "mulsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17839   ins_cost(150);
17840   ins_encode %{
17841     __ mulsd($dst$$XMMRegister, $constantaddress($con));
17842   %}
17843   ins_pipe(pipe_slow);
17844 %}
17845 
17846 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
17847   predicate(UseAVX > 0);
17848   match(Set dst (MulD src1 src2));
17849 
17850   format %{ "vmulsd  $dst, $src1, $src2" %}
17851   ins_cost(150);
17852   ins_encode %{
17853     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17854   %}
17855   ins_pipe(pipe_slow);
17856 %}
17857 
17858 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
17859   predicate(UseAVX > 0);
17860   match(Set dst (MulD src1 (LoadD src2)));
17861 
17862   format %{ "vmulsd  $dst, $src1, $src2" %}
17863   ins_cost(150);
17864   ins_encode %{
17865     __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17866   %}
17867   ins_pipe(pipe_slow);
17868 %}
17869 
17870 instruct mulD_reg_imm(regD dst, regD src, immD con) %{
17871   predicate(UseAVX > 0);
17872   match(Set dst (MulD src con));
17873 
17874   format %{ "vmulsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
17875   ins_cost(150);
17876   ins_encode %{
17877     __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17878   %}
17879   ins_pipe(pipe_slow);
17880 %}
17881 
17882 instruct divF_reg(regF dst, regF src) %{
17883   predicate(UseAVX == 0);
17884   match(Set dst (DivF dst src));
17885 
17886   format %{ "divss   $dst, $src" %}
17887   ins_cost(150);
17888   ins_encode %{
17889     __ divss($dst$$XMMRegister, $src$$XMMRegister);
17890   %}
17891   ins_pipe(pipe_slow);
17892 %}
17893 
17894 instruct divF_mem(regF dst, memory src) %{
17895   predicate(UseAVX == 0);
17896   match(Set dst (DivF dst (LoadF src)));
17897 
17898   format %{ "divss   $dst, $src" %}
17899   ins_cost(150);
17900   ins_encode %{
17901     __ divss($dst$$XMMRegister, $src$$Address);
17902   %}
17903   ins_pipe(pipe_slow);
17904 %}
17905 
17906 instruct divF_imm(regF dst, immF con) %{
17907   predicate(UseAVX == 0);
17908   match(Set dst (DivF dst con));
17909   format %{ "divss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
17910   ins_cost(150);
17911   ins_encode %{
17912     __ divss($dst$$XMMRegister, $constantaddress($con));
17913   %}
17914   ins_pipe(pipe_slow);
17915 %}
17916 
17917 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
17918   predicate(UseAVX > 0);
17919   match(Set dst (DivF src1 src2));
17920 
17921   format %{ "vdivss  $dst, $src1, $src2" %}
17922   ins_cost(150);
17923   ins_encode %{
17924     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17925   %}
17926   ins_pipe(pipe_slow);
17927 %}
17928 
17929 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
17930   predicate(UseAVX > 0);
17931   match(Set dst (DivF src1 (LoadF src2)));
17932 
17933   format %{ "vdivss  $dst, $src1, $src2" %}
17934   ins_cost(150);
17935   ins_encode %{
17936     __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
17937   %}
17938   ins_pipe(pipe_slow);
17939 %}
17940 
17941 instruct divF_reg_imm(regF dst, regF src, immF con) %{
17942   predicate(UseAVX > 0);
17943   match(Set dst (DivF src con));
17944 
17945   format %{ "vdivss  $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
17946   ins_cost(150);
17947   ins_encode %{
17948     __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
17949   %}
17950   ins_pipe(pipe_slow);
17951 %}
17952 
17953 instruct divD_reg(regD dst, regD src) %{
17954   predicate(UseAVX == 0);
17955   match(Set dst (DivD dst src));
17956 
17957   format %{ "divsd   $dst, $src" %}
17958   ins_cost(150);
17959   ins_encode %{
17960     __ divsd($dst$$XMMRegister, $src$$XMMRegister);
17961   %}
17962   ins_pipe(pipe_slow);
17963 %}
17964 
17965 instruct divD_mem(regD dst, memory src) %{
17966   predicate(UseAVX == 0);
17967   match(Set dst (DivD dst (LoadD src)));
17968 
17969   format %{ "divsd   $dst, $src" %}
17970   ins_cost(150);
17971   ins_encode %{
17972     __ divsd($dst$$XMMRegister, $src$$Address);
17973   %}
17974   ins_pipe(pipe_slow);
17975 %}
17976 
17977 instruct divD_imm(regD dst, immD con) %{
17978   predicate(UseAVX == 0);
17979   match(Set dst (DivD dst con));
17980   format %{ "divsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
17981   ins_cost(150);
17982   ins_encode %{
17983     __ divsd($dst$$XMMRegister, $constantaddress($con));
17984   %}
17985   ins_pipe(pipe_slow);
17986 %}
17987 
17988 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
17989   predicate(UseAVX > 0);
17990   match(Set dst (DivD src1 src2));
17991 
17992   format %{ "vdivsd  $dst, $src1, $src2" %}
17993   ins_cost(150);
17994   ins_encode %{
17995     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
17996   %}
17997   ins_pipe(pipe_slow);
17998 %}
17999 
18000 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
18001   predicate(UseAVX > 0);
18002   match(Set dst (DivD src1 (LoadD src2)));
18003 
18004   format %{ "vdivsd  $dst, $src1, $src2" %}
18005   ins_cost(150);
18006   ins_encode %{
18007     __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
18008   %}
18009   ins_pipe(pipe_slow);
18010 %}
18011 
18012 instruct divD_reg_imm(regD dst, regD src, immD con) %{
18013   predicate(UseAVX > 0);
18014   match(Set dst (DivD src con));
18015 
18016   format %{ "vdivsd  $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
18017   ins_cost(150);
18018   ins_encode %{
18019     __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
18020   %}
18021   ins_pipe(pipe_slow);
18022 %}
18023 
18024 instruct absF_reg(regF dst) %{
18025   predicate(UseAVX == 0);
18026   match(Set dst (AbsF dst));
18027   ins_cost(150);
18028   format %{ "andps   $dst, [0x7fffffff]\t# abs float by sign masking" %}
18029   ins_encode %{
18030     __ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
18031   %}
18032   ins_pipe(pipe_slow);
18033 %}
18034 
18035 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
18036   predicate(UseAVX > 0);
18037   match(Set dst (AbsF src));
18038   ins_cost(150);
18039   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
18040   ins_encode %{
18041     int vlen_enc = Assembler::AVX_128bit;
18042     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
18043               ExternalAddress(float_signmask()), vlen_enc);
18044   %}
18045   ins_pipe(pipe_slow);
18046 %}
18047 
18048 instruct absD_reg(regD dst) %{
18049   predicate(UseAVX == 0);
18050   match(Set dst (AbsD dst));
18051   ins_cost(150);
18052   format %{ "andpd   $dst, [0x7fffffffffffffff]\t"
18053             "# abs double by sign masking" %}
18054   ins_encode %{
18055     __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
18056   %}
18057   ins_pipe(pipe_slow);
18058 %}
18059 
18060 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
18061   predicate(UseAVX > 0);
18062   match(Set dst (AbsD src));
18063   ins_cost(150);
18064   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
18065             "# abs double by sign masking" %}
18066   ins_encode %{
18067     int vlen_enc = Assembler::AVX_128bit;
18068     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
18069               ExternalAddress(double_signmask()), vlen_enc);
18070   %}
18071   ins_pipe(pipe_slow);
18072 %}
18073 
18074 instruct negF_reg(regF dst) %{
18075   predicate(UseAVX == 0);
18076   match(Set dst (NegF dst));
18077   ins_cost(150);
18078   format %{ "xorps   $dst, [0x80000000]\t# neg float by sign flipping" %}
18079   ins_encode %{
18080     __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
18081   %}
18082   ins_pipe(pipe_slow);
18083 %}
18084 
18085 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
18086   predicate(UseAVX > 0);
18087   match(Set dst (NegF src));
18088   ins_cost(150);
18089   format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
18090   ins_encode %{
18091     __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
18092                  ExternalAddress(float_signflip()));
18093   %}
18094   ins_pipe(pipe_slow);
18095 %}
18096 
18097 instruct negD_reg(regD dst) %{
18098   predicate(UseAVX == 0);
18099   match(Set dst (NegD dst));
18100   ins_cost(150);
18101   format %{ "xorpd   $dst, [0x8000000000000000]\t"
18102             "# neg double by sign flipping" %}
18103   ins_encode %{
18104     __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
18105   %}
18106   ins_pipe(pipe_slow);
18107 %}
18108 
18109 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
18110   predicate(UseAVX > 0);
18111   match(Set dst (NegD src));
18112   ins_cost(150);
18113   format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
18114             "# neg double by sign flipping" %}
18115   ins_encode %{
18116     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
18117                  ExternalAddress(double_signflip()));
18118   %}
18119   ins_pipe(pipe_slow);
18120 %}
18121 
18122 // sqrtss instruction needs destination register to be pre initialized for best performance
18123 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18124 instruct sqrtF_reg(regF dst) %{
18125   match(Set dst (SqrtF dst));
18126   format %{ "sqrtss  $dst, $dst" %}
18127   ins_encode %{
18128     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
18129   %}
18130   ins_pipe(pipe_slow);
18131 %}
18132 
18133 // sqrtsd instruction needs destination register to be pre initialized for best performance
18134 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
18135 instruct sqrtD_reg(regD dst) %{
18136   match(Set dst (SqrtD dst));
18137   format %{ "sqrtsd  $dst, $dst" %}
18138   ins_encode %{
18139     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
18140   %}
18141   ins_pipe(pipe_slow);
18142 %}
18143 
18144 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{
18145   effect(TEMP tmp);
18146   match(Set dst (ConvF2HF src));
18147   ins_cost(125);
18148   format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%}
18149   ins_encode %{
18150     __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister);
18151   %}
18152   ins_pipe( pipe_slow );
18153 %}
18154 
18155 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{
18156   predicate((UseAVX > 2) && VM_Version::supports_avx512vl());
18157   effect(TEMP ktmp, TEMP rtmp);
18158   match(Set mem (StoreC mem (ConvF2HF src)));
18159   format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %}
18160   ins_encode %{
18161     __ movl($rtmp$$Register, 0x1);
18162     __ kmovwl($ktmp$$KRegister, $rtmp$$Register);
18163     __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
18164   %}
18165   ins_pipe( pipe_slow );
18166 %}
18167 
18168 instruct vconvF2HF(vec dst, vec src) %{
18169   match(Set dst (VectorCastF2HF src));
18170   format %{ "vector_conv_F2HF $dst $src" %}
18171   ins_encode %{
18172     int vlen_enc = vector_length_encoding(this, $src);
18173     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc);
18174   %}
18175   ins_pipe( pipe_slow );
18176 %}
18177 
18178 instruct vconvF2HF_mem_reg(memory mem, vec src) %{
18179   predicate(n->as_StoreVector()->memory_size() >= 16);
18180   match(Set mem (StoreVector mem (VectorCastF2HF src)));
18181   format %{ "vcvtps2ph $mem,$src" %}
18182   ins_encode %{
18183     int vlen_enc = vector_length_encoding(this, $src);
18184     __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc);
18185   %}
18186   ins_pipe( pipe_slow );
18187 %}
18188 
18189 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{
18190   match(Set dst (ConvHF2F src));
18191   format %{ "vcvtph2ps $dst,$src" %}
18192   ins_encode %{
18193     __ flt16_to_flt($dst$$XMMRegister, $src$$Register);
18194   %}
18195   ins_pipe( pipe_slow );
18196 %}
18197 
18198 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{
18199   match(Set dst (VectorCastHF2F (LoadVector mem)));
18200   format %{ "vcvtph2ps $dst,$mem" %}
18201   ins_encode %{
18202     int vlen_enc = vector_length_encoding(this);
18203     __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc);
18204   %}
18205   ins_pipe( pipe_slow );
18206 %}
18207 
18208 instruct vconvHF2F(vec dst, vec src) %{
18209   match(Set dst (VectorCastHF2F src));
18210   ins_cost(125);
18211   format %{ "vector_conv_HF2F $dst,$src" %}
18212   ins_encode %{
18213     int vlen_enc = vector_length_encoding(this);
18214     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
18215   %}
18216   ins_pipe( pipe_slow );
18217 %}
18218 
18219 // ---------------------------------------- VectorReinterpret ------------------------------------
18220 instruct reinterpret_mask(kReg dst) %{
18221   predicate(n->bottom_type()->isa_pvectmask() &&
18222             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
18223   match(Set dst (VectorReinterpret dst));
18224   ins_cost(125);
18225   format %{ "vector_reinterpret $dst\t!" %}
18226   ins_encode %{
18227     // empty
18228   %}
18229   ins_pipe( pipe_slow );
18230 %}
18231 
18232 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
18233   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18234             n->bottom_type()->isa_pvectmask() &&
18235             n->in(1)->bottom_type()->isa_pvectmask() &&
18236             n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_SHORT &&
18237             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18238   match(Set dst (VectorReinterpret src));
18239   effect(TEMP xtmp);
18240   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
18241   ins_encode %{
18242      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
18243      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18244      assert(src_sz == dst_sz , "src and dst size mismatch");
18245      int vlen_enc = vector_length_encoding(src_sz);
18246      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18247      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18248   %}
18249   ins_pipe( pipe_slow );
18250 %}
18251 
18252 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
18253   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18254             n->bottom_type()->isa_pvectmask() &&
18255             n->in(1)->bottom_type()->isa_pvectmask() &&
18256             (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_INT ||
18257              n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_FLOAT) &&
18258             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18259   match(Set dst (VectorReinterpret src));
18260   effect(TEMP xtmp);
18261   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
18262   ins_encode %{
18263      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
18264      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18265      assert(src_sz == dst_sz , "src and dst size mismatch");
18266      int vlen_enc = vector_length_encoding(src_sz);
18267      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18268      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18269   %}
18270   ins_pipe( pipe_slow );
18271 %}
18272 
18273 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
18274   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
18275             n->bottom_type()->isa_pvectmask() &&
18276             n->in(1)->bottom_type()->isa_pvectmask() &&
18277             (n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_LONG ||
18278              n->in(1)->bottom_type()->is_pvectmask()->element_basic_type() == T_DOUBLE) &&
18279             n->bottom_type()->is_pvectmask()->element_basic_type() == T_BYTE); // dst == src
18280   match(Set dst (VectorReinterpret src));
18281   effect(TEMP xtmp);
18282   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
18283   ins_encode %{
18284      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
18285      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
18286      assert(src_sz == dst_sz , "src and dst size mismatch");
18287      int vlen_enc = vector_length_encoding(src_sz);
18288      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
18289      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
18290   %}
18291   ins_pipe( pipe_slow );
18292 %}
18293 
18294 instruct reinterpret(vec dst) %{
18295   predicate(!n->bottom_type()->isa_pvectmask() &&
18296             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
18297   match(Set dst (VectorReinterpret dst));
18298   ins_cost(125);
18299   format %{ "vector_reinterpret $dst\t!" %}
18300   ins_encode %{
18301     // empty
18302   %}
18303   ins_pipe( pipe_slow );
18304 %}
18305 
18306 instruct reinterpret_expand(vec dst, vec src) %{
18307   predicate(UseAVX == 0 &&
18308             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18309   match(Set dst (VectorReinterpret src));
18310   ins_cost(125);
18311   effect(TEMP dst);
18312   format %{ "vector_reinterpret_expand $dst,$src" %}
18313   ins_encode %{
18314     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
18315     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
18316 
18317     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
18318     if (src_vlen_in_bytes == 4) {
18319       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg);
18320     } else {
18321       assert(src_vlen_in_bytes == 8, "");
18322       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg);
18323     }
18324     __ pand($dst$$XMMRegister, $src$$XMMRegister);
18325   %}
18326   ins_pipe( pipe_slow );
18327 %}
18328 
18329 instruct vreinterpret_expand4(legVec dst, vec src) %{
18330   predicate(UseAVX > 0 &&
18331             !n->bottom_type()->isa_pvectmask() &&
18332             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
18333             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18334   match(Set dst (VectorReinterpret src));
18335   ins_cost(125);
18336   format %{ "vector_reinterpret_expand $dst,$src" %}
18337   ins_encode %{
18338     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg);
18339   %}
18340   ins_pipe( pipe_slow );
18341 %}
18342 
18343 
18344 instruct vreinterpret_expand(legVec dst, vec src) %{
18345   predicate(UseAVX > 0 &&
18346             !n->bottom_type()->isa_pvectmask() &&
18347             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
18348             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
18349   match(Set dst (VectorReinterpret src));
18350   ins_cost(125);
18351   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
18352   ins_encode %{
18353     switch (Matcher::vector_length_in_bytes(this, $src)) {
18354       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18355       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18356       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18357       default: ShouldNotReachHere();
18358     }
18359   %}
18360   ins_pipe( pipe_slow );
18361 %}
18362 
18363 instruct reinterpret_shrink(vec dst, legVec src) %{
18364   predicate(!n->bottom_type()->isa_pvectmask() &&
18365             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
18366   match(Set dst (VectorReinterpret src));
18367   ins_cost(125);
18368   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
18369   ins_encode %{
18370     switch (Matcher::vector_length_in_bytes(this)) {
18371       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
18372       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
18373       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
18374       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
18375       default: ShouldNotReachHere();
18376     }
18377   %}
18378   ins_pipe( pipe_slow );
18379 %}
18380 
18381 // ----------------------------------------------------------------------------------------------------
18382 
18383 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
18384   match(Set dst (RoundDoubleMode src rmode));
18385   format %{ "roundsd $dst,$src" %}
18386   ins_cost(150);
18387   ins_encode %{
18388     assert(UseSSE >= 4, "required");
18389     if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) {
18390       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18391     }
18392     __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
18393   %}
18394   ins_pipe(pipe_slow);
18395 %}
18396 
18397 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{
18398   match(Set dst (RoundDoubleMode con rmode));
18399   format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
18400   ins_cost(150);
18401   ins_encode %{
18402     assert(UseSSE >= 4, "required");
18403     __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg);
18404   %}
18405   ins_pipe(pipe_slow);
18406 %}
18407 
18408 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
18409   predicate(Matcher::vector_length(n) < 8);
18410   match(Set dst (RoundDoubleModeV src rmode));
18411   format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
18412   ins_encode %{
18413     assert(UseAVX > 0, "required");
18414     int vlen_enc = vector_length_encoding(this);
18415     __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
18416   %}
18417   ins_pipe( pipe_slow );
18418 %}
18419 
18420 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
18421   predicate(Matcher::vector_length(n) == 8);
18422   match(Set dst (RoundDoubleModeV src rmode));
18423   format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
18424   ins_encode %{
18425     assert(UseAVX > 2, "required");
18426     __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
18427   %}
18428   ins_pipe( pipe_slow );
18429 %}
18430 
18431 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
18432   predicate(Matcher::vector_length(n) < 8);
18433   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18434   format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
18435   ins_encode %{
18436     assert(UseAVX > 0, "required");
18437     int vlen_enc = vector_length_encoding(this);
18438     __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
18439   %}
18440   ins_pipe( pipe_slow );
18441 %}
18442 
18443 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
18444   predicate(Matcher::vector_length(n) == 8);
18445   match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
18446   format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
18447   ins_encode %{
18448     assert(UseAVX > 2, "required");
18449     __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
18450   %}
18451   ins_pipe( pipe_slow );
18452 %}
18453 
18454 instruct onspinwait() %{
18455   match(OnSpinWait);
18456   ins_cost(200);
18457 
18458   format %{
18459     $$template
18460     $$emit$$"pause\t! membar_onspinwait"
18461   %}
18462   ins_encode %{
18463     __ pause();
18464   %}
18465   ins_pipe(pipe_slow);
18466 %}
18467 
18468 // a * b + c
18469 instruct fmaD_reg(regD a, regD b, regD c) %{
18470   match(Set c (FmaD  c (Binary a b)));
18471   format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
18472   ins_cost(150);
18473   ins_encode %{
18474     assert(UseFMA, "Needs FMA instructions support.");
18475     __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18476   %}
18477   ins_pipe( pipe_slow );
18478 %}
18479 
18480 // a * b + c
18481 instruct fmaF_reg(regF a, regF b, regF c) %{
18482   match(Set c (FmaF  c (Binary a b)));
18483   format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
18484   ins_cost(150);
18485   ins_encode %{
18486     assert(UseFMA, "Needs FMA instructions support.");
18487     __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
18488   %}
18489   ins_pipe( pipe_slow );
18490 %}
18491 
18492 // ====================VECTOR INSTRUCTIONS=====================================
18493 
18494 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
18495 instruct MoveVec2Leg(legVec dst, vec src) %{
18496   match(Set dst src);
18497   format %{ "" %}
18498   ins_encode %{
18499     ShouldNotReachHere();
18500   %}
18501   ins_pipe( fpu_reg_reg );
18502 %}
18503 
18504 instruct MoveLeg2Vec(vec dst, legVec src) %{
18505   match(Set dst src);
18506   format %{ "" %}
18507   ins_encode %{
18508     ShouldNotReachHere();
18509   %}
18510   ins_pipe( fpu_reg_reg );
18511 %}
18512 
18513 // ============================================================================
18514 
18515 // Load vectors generic operand pattern
18516 instruct loadV(vec dst, memory mem) %{
18517   match(Set dst (LoadVector mem));
18518   ins_cost(125);
18519   format %{ "load_vector $dst,$mem" %}
18520   ins_encode %{
18521     BasicType bt = Matcher::vector_element_basic_type(this);
18522     __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this));
18523   %}
18524   ins_pipe( pipe_slow );
18525 %}
18526 
18527 // Store vectors generic operand pattern.
18528 instruct storeV(memory mem, vec src) %{
18529   match(Set mem (StoreVector mem src));
18530   ins_cost(145);
18531   format %{ "store_vector $mem,$src\n\t" %}
18532   ins_encode %{
18533     switch (Matcher::vector_length_in_bytes(this, $src)) {
18534       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
18535       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
18536       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
18537       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
18538       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
18539       default: ShouldNotReachHere();
18540     }
18541   %}
18542   ins_pipe( pipe_slow );
18543 %}
18544 
18545 // ---------------------------------------- Gather ------------------------------------
18546 
18547 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE
18548 
18549 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
18550   predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) &&
18551             Matcher::vector_length_in_bytes(n) <= 32);
18552   match(Set dst (LoadVectorGather mem idx));
18553   effect(TEMP dst, TEMP tmp, TEMP mask);
18554   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
18555   ins_encode %{
18556     int vlen_enc = vector_length_encoding(this);
18557     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18558     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18559     __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18560     __ lea($tmp$$Register, $mem$$Address);
18561     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
18562   %}
18563   ins_pipe( pipe_slow );
18564 %}
18565 
18566 
18567 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
18568   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18569             !is_subword_type(Matcher::vector_element_basic_type(n)));
18570   match(Set dst (LoadVectorGather mem idx));
18571   effect(TEMP dst, TEMP tmp, TEMP ktmp);
18572   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
18573   ins_encode %{
18574     int vlen_enc = vector_length_encoding(this);
18575     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18576     __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister);
18577     __ lea($tmp$$Register, $mem$$Address);
18578     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18579   %}
18580   ins_pipe( pipe_slow );
18581 %}
18582 
18583 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18584   predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) &&
18585             !is_subword_type(Matcher::vector_element_basic_type(n)));
18586   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
18587   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
18588   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
18589   ins_encode %{
18590     assert(UseAVX > 2, "sanity");
18591     int vlen_enc = vector_length_encoding(this);
18592     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18593     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18594     // Note: Since gather instruction partially updates the opmask register used
18595     // for predication hense moving mask operand to a temporary.
18596     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18597     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18598     __ lea($tmp$$Register, $mem$$Address);
18599     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
18600   %}
18601   ins_pipe( pipe_slow );
18602 %}
18603 
18604 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegI rtmp) %{
18605   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18606   match(Set dst (LoadVectorGather mem idx_base));
18607   effect(TEMP tmp, TEMP rtmp);
18608   format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %}
18609   ins_encode %{
18610     int vlen_enc = vector_length_encoding(this);
18611     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18612     __ lea($tmp$$Register, $mem$$Address);
18613     __ vgather8b(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp$$Register, vlen_enc);
18614   %}
18615   ins_pipe( pipe_slow );
18616 %}
18617 
18618 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, rRegP tmp, rRegP idx_base_temp,
18619                              vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{
18620   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18621   match(Set dst (LoadVectorGather mem idx_base));
18622   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr);
18623   format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %}
18624   ins_encode %{
18625     int vlen_enc = vector_length_encoding(this);
18626     int vector_len = Matcher::vector_length(this);
18627     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18628     __ lea($tmp$$Register, $mem$$Address);
18629     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18630     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $xtmp1$$XMMRegister,
18631                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc);
18632   %}
18633   ins_pipe( pipe_slow );
18634 %}
18635 
18636 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{
18637   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18638   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18639   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18640   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18641   ins_encode %{
18642     int vlen_enc = vector_length_encoding(this);
18643     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18644     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18645     __ lea($tmp$$Register, $mem$$Address);
18646     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18647     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18648   %}
18649   ins_pipe( pipe_slow );
18650 %}
18651 
18652 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, kReg mask, rRegP tmp, rRegP idx_base_temp,
18653                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{
18654   predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18655   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18656   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18657   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18658   ins_encode %{
18659     int vlen_enc = vector_length_encoding(this);
18660     int vector_len = Matcher::vector_length(this);
18661     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18662     __ xorq($mask_idx$$Register, $mask_idx$$Register);
18663     __ lea($tmp$$Register, $mem$$Address);
18664     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18665     __ kmovql($rtmp2$$Register, $mask$$KRegister);
18666     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18667                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18668   %}
18669   ins_pipe( pipe_slow );
18670 %}
18671 
18672 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{
18673   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8);
18674   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18675   effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr);
18676   format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %}
18677   ins_encode %{
18678     int vlen_enc = vector_length_encoding(this);
18679     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18680     __ lea($tmp$$Register, $mem$$Address);
18681     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18682     if (elem_bt == T_SHORT) {
18683       __ movl($mask_idx$$Register, 0x55555555);
18684       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18685     }
18686     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18687     __ vgather8b_masked(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc);
18688   %}
18689   ins_pipe( pipe_slow );
18690 %}
18691 
18692 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, vec mask, rRegP tmp, rRegP idx_base_temp,
18693                                          vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{
18694   predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8);
18695   match(Set dst (LoadVectorGatherMasked mem (Binary idx_base mask)));
18696   effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr);
18697   format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %}
18698   ins_encode %{
18699     int vlen_enc = vector_length_encoding(this);
18700     int vector_len = Matcher::vector_length(this);
18701     BasicType elem_bt = Matcher::vector_element_basic_type(this);
18702     __ lea($tmp$$Register, $mem$$Address);
18703     __ movptr($idx_base_temp$$Register, $idx_base$$Register);
18704     __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc);
18705     if (elem_bt == T_SHORT) {
18706       __ movl($mask_idx$$Register, 0x55555555);
18707       __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register);
18708     }
18709     __ xorl($mask_idx$$Register, $mask_idx$$Register);
18710     __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister,
18711                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc);
18712   %}
18713   ins_pipe( pipe_slow );
18714 %}
18715 
18716 // ====================Scatter=======================================
18717 
18718 // Scatter INT, LONG, FLOAT, DOUBLE
18719 
18720 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
18721   predicate(UseAVX > 2);
18722   match(Set mem (StoreVectorScatter mem (Binary src idx)));
18723   effect(TEMP tmp, TEMP ktmp);
18724   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
18725   ins_encode %{
18726     int vlen_enc = vector_length_encoding(this, $src);
18727     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18728 
18729     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18730     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18731 
18732     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg);
18733     __ lea($tmp$$Register, $mem$$Address);
18734     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18735   %}
18736   ins_pipe( pipe_slow );
18737 %}
18738 
18739 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
18740   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
18741   effect(TEMP tmp, TEMP ktmp);
18742   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
18743   ins_encode %{
18744     int vlen_enc = vector_length_encoding(this, $src);
18745     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
18746     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
18747     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
18748     // Note: Since scatter instruction partially updates the opmask register used
18749     // for predication hense moving mask operand to a temporary.
18750     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
18751     __ lea($tmp$$Register, $mem$$Address);
18752     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
18753   %}
18754   ins_pipe( pipe_slow );
18755 %}
18756 
18757 // ====================REPLICATE=======================================
18758 
18759 // Replicate byte scalar to be vector
18760 instruct vReplB_reg(vec dst, rRegI src) %{
18761   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
18762   match(Set dst (Replicate src));
18763   format %{ "replicateB $dst,$src" %}
18764   ins_encode %{
18765     uint vlen = Matcher::vector_length(this);
18766     if (UseAVX >= 2) {
18767       int vlen_enc = vector_length_encoding(this);
18768       if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18769         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
18770         __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
18771       } else {
18772         __ movdl($dst$$XMMRegister, $src$$Register);
18773         __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18774       }
18775     } else {
18776        assert(UseAVX < 2, "");
18777       __ movdl($dst$$XMMRegister, $src$$Register);
18778       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
18779       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18780       if (vlen >= 16) {
18781         assert(vlen == 16, "");
18782         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18783       }
18784     }
18785   %}
18786   ins_pipe( pipe_slow );
18787 %}
18788 
18789 instruct ReplB_mem(vec dst, memory mem) %{
18790   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE);
18791   match(Set dst (Replicate (LoadB mem)));
18792   format %{ "replicateB $dst,$mem" %}
18793   ins_encode %{
18794     int vlen_enc = vector_length_encoding(this);
18795     __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
18796   %}
18797   ins_pipe( pipe_slow );
18798 %}
18799 
18800 // ====================ReplicateS=======================================
18801 
18802 instruct vReplS_reg(vec dst, rRegI src) %{
18803   predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
18804   match(Set dst (Replicate src));
18805   format %{ "replicateS $dst,$src" %}
18806   ins_encode %{
18807     uint vlen = Matcher::vector_length(this);
18808     int vlen_enc = vector_length_encoding(this);
18809     if (UseAVX >= 2) {
18810       if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
18811         assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
18812         __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
18813       } else {
18814         __ movdl($dst$$XMMRegister, $src$$Register);
18815         __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18816       }
18817     } else {
18818       assert(UseAVX < 2, "");
18819       __ movdl($dst$$XMMRegister, $src$$Register);
18820       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18821       if (vlen >= 8) {
18822         assert(vlen == 8, "");
18823         __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18824       }
18825     }
18826   %}
18827   ins_pipe( pipe_slow );
18828 %}
18829 
18830 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{
18831   match(Set dst (Replicate con));
18832   effect(TEMP rtmp);
18833   format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %}
18834   ins_encode %{
18835     int vlen_enc = vector_length_encoding(this);
18836     BasicType bt = Matcher::vector_element_basic_type(this);
18837     assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, "");
18838     __ movl($rtmp$$Register, $con$$constant);
18839     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18840   %}
18841   ins_pipe( pipe_slow );
18842 %}
18843 
18844 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{
18845   predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT);
18846   match(Set dst (Replicate src));
18847   effect(TEMP rtmp);
18848   format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %}
18849   ins_encode %{
18850     int vlen_enc = vector_length_encoding(this);
18851     __ evmovw($rtmp$$Register, $src$$XMMRegister);
18852     __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc);
18853   %}
18854   ins_pipe( pipe_slow );
18855 %}
18856 
18857 instruct ReplS_mem(vec dst, memory mem) %{
18858   predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT);
18859   match(Set dst (Replicate (LoadS mem)));
18860   format %{ "replicateS $dst,$mem" %}
18861   ins_encode %{
18862     int vlen_enc = vector_length_encoding(this);
18863     __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
18864   %}
18865   ins_pipe( pipe_slow );
18866 %}
18867 
18868 // ====================ReplicateI=======================================
18869 
18870 instruct ReplI_reg(vec dst, rRegI src) %{
18871   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18872   match(Set dst (Replicate src));
18873   format %{ "replicateI $dst,$src" %}
18874   ins_encode %{
18875     uint vlen = Matcher::vector_length(this);
18876     int vlen_enc = vector_length_encoding(this);
18877     if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18878       __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
18879     } else if (VM_Version::supports_avx2()) {
18880       __ movdl($dst$$XMMRegister, $src$$Register);
18881       __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18882     } else {
18883       __ movdl($dst$$XMMRegister, $src$$Register);
18884       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18885     }
18886   %}
18887   ins_pipe( pipe_slow );
18888 %}
18889 
18890 instruct ReplI_mem(vec dst, memory mem) %{
18891   predicate(Matcher::vector_element_basic_type(n) == T_INT);
18892   match(Set dst (Replicate (LoadI mem)));
18893   format %{ "replicateI $dst,$mem" %}
18894   ins_encode %{
18895     int vlen_enc = vector_length_encoding(this);
18896     if (VM_Version::supports_avx2()) {
18897       __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
18898     } else if (VM_Version::supports_avx()) {
18899       __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
18900     } else {
18901       __ movdl($dst$$XMMRegister, $mem$$Address);
18902       __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
18903     }
18904   %}
18905   ins_pipe( pipe_slow );
18906 %}
18907 
18908 instruct ReplI_imm(vec dst, immI con) %{
18909   predicate(Matcher::is_non_long_integral_vector(n));
18910   match(Set dst (Replicate con));
18911   format %{ "replicateI $dst,$con" %}
18912   ins_encode %{
18913     InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant,
18914                                                            (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) /
18915                                                                    type2aelembytes(Matcher::vector_element_basic_type(this))));
18916     BasicType bt = Matcher::vector_element_basic_type(this);
18917     int vlen = Matcher::vector_length_in_bytes(this);
18918     __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen);
18919   %}
18920   ins_pipe( pipe_slow );
18921 %}
18922 
18923 // Replicate scalar zero to be vector
18924 instruct ReplI_zero(vec dst, immI_0 zero) %{
18925   predicate(Matcher::is_non_long_integral_vector(n));
18926   match(Set dst (Replicate zero));
18927   format %{ "replicateI $dst,$zero" %}
18928   ins_encode %{
18929     int vlen_enc = vector_length_encoding(this);
18930     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
18931       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18932     } else {
18933       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
18934     }
18935   %}
18936   ins_pipe( fpu_reg_reg );
18937 %}
18938 
18939 instruct ReplI_M1(vec dst, immI_M1 con) %{
18940   predicate(Matcher::is_non_long_integral_vector(n));
18941   match(Set dst (Replicate con));
18942   format %{ "vallones $dst" %}
18943   ins_encode %{
18944     int vector_len = vector_length_encoding(this);
18945     __ vallones($dst$$XMMRegister, vector_len);
18946   %}
18947   ins_pipe( pipe_slow );
18948 %}
18949 
18950 // ====================ReplicateL=======================================
18951 
18952 // Replicate long (8 byte) scalar to be vector
18953 instruct ReplL_reg(vec dst, rRegL src) %{
18954   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18955   match(Set dst (Replicate src));
18956   format %{ "replicateL $dst,$src" %}
18957   ins_encode %{
18958     int vlen = Matcher::vector_length(this);
18959     int vlen_enc = vector_length_encoding(this);
18960     if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
18961       __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
18962     } else if (VM_Version::supports_avx2()) {
18963       __ movdq($dst$$XMMRegister, $src$$Register);
18964       __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
18965     } else {
18966       __ movdq($dst$$XMMRegister, $src$$Register);
18967       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18968     }
18969   %}
18970   ins_pipe( pipe_slow );
18971 %}
18972 
18973 instruct ReplL_mem(vec dst, memory mem) %{
18974   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18975   match(Set dst (Replicate (LoadL mem)));
18976   format %{ "replicateL $dst,$mem" %}
18977   ins_encode %{
18978     int vlen_enc = vector_length_encoding(this);
18979     if (VM_Version::supports_avx2()) {
18980       __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
18981     } else if (VM_Version::supports_sse3()) {
18982       __ movddup($dst$$XMMRegister, $mem$$Address);
18983     } else {
18984       __ movq($dst$$XMMRegister, $mem$$Address);
18985       __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
18986     }
18987   %}
18988   ins_pipe( pipe_slow );
18989 %}
18990 
18991 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
18992 instruct ReplL_imm(vec dst, immL con) %{
18993   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
18994   match(Set dst (Replicate con));
18995   format %{ "replicateL $dst,$con" %}
18996   ins_encode %{
18997     InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
18998     int vlen = Matcher::vector_length_in_bytes(this);
18999     __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen);
19000   %}
19001   ins_pipe( pipe_slow );
19002 %}
19003 
19004 instruct ReplL_zero(vec dst, immL0 zero) %{
19005   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19006   match(Set dst (Replicate zero));
19007   format %{ "replicateL $dst,$zero" %}
19008   ins_encode %{
19009     int vlen_enc = vector_length_encoding(this);
19010     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) {
19011       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19012     } else {
19013       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
19014     }
19015   %}
19016   ins_pipe( fpu_reg_reg );
19017 %}
19018 
19019 instruct ReplL_M1(vec dst, immL_M1 con) %{
19020   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
19021   match(Set dst (Replicate con));
19022   format %{ "vallones $dst" %}
19023   ins_encode %{
19024     int vector_len = vector_length_encoding(this);
19025     __ vallones($dst$$XMMRegister, vector_len);
19026   %}
19027   ins_pipe( pipe_slow );
19028 %}
19029 
19030 // ====================ReplicateF=======================================
19031 
19032 instruct vReplF_reg(vec dst, vlRegF src) %{
19033   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19034   match(Set dst (Replicate src));
19035   format %{ "replicateF $dst,$src" %}
19036   ins_encode %{
19037     uint vlen = Matcher::vector_length(this);
19038     int vlen_enc = vector_length_encoding(this);
19039     if (vlen <= 4) {
19040       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19041     } else if (VM_Version::supports_avx2()) {
19042       __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19043     } else {
19044       assert(vlen == 8, "sanity");
19045       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit);
19046       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19047     }
19048   %}
19049   ins_pipe( pipe_slow );
19050 %}
19051 
19052 instruct ReplF_reg(vec dst, vlRegF src) %{
19053   predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19054   match(Set dst (Replicate src));
19055   format %{ "replicateF $dst,$src" %}
19056   ins_encode %{
19057     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
19058   %}
19059   ins_pipe( pipe_slow );
19060 %}
19061 
19062 instruct ReplF_mem(vec dst, memory mem) %{
19063   predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT);
19064   match(Set dst (Replicate (LoadF mem)));
19065   format %{ "replicateF $dst,$mem" %}
19066   ins_encode %{
19067     int vlen_enc = vector_length_encoding(this);
19068     __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
19069   %}
19070   ins_pipe( pipe_slow );
19071 %}
19072 
19073 // Replicate float scalar immediate to be vector by loading from const table.
19074 instruct ReplF_imm(vec dst, immF con) %{
19075   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19076   match(Set dst (Replicate con));
19077   format %{ "replicateF $dst,$con" %}
19078   ins_encode %{
19079     InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant,
19080                                                            VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4));
19081     int vlen = Matcher::vector_length_in_bytes(this);
19082     __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen);
19083   %}
19084   ins_pipe( pipe_slow );
19085 %}
19086 
19087 instruct ReplF_zero(vec dst, immF0 zero) %{
19088   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
19089   match(Set dst (Replicate zero));
19090   format %{ "replicateF $dst,$zero" %}
19091   ins_encode %{
19092     int vlen_enc = vector_length_encoding(this);
19093     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19094       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19095     } else {
19096       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19097     }
19098   %}
19099   ins_pipe( fpu_reg_reg );
19100 %}
19101 
19102 // ====================ReplicateD=======================================
19103 
19104 // Replicate double (8 bytes) scalar to be vector
19105 instruct vReplD_reg(vec dst, vlRegD src) %{
19106   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19107   match(Set dst (Replicate src));
19108   format %{ "replicateD $dst,$src" %}
19109   ins_encode %{
19110     uint vlen = Matcher::vector_length(this);
19111     int vlen_enc = vector_length_encoding(this);
19112     if (vlen <= 2) {
19113       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19114     } else if (VM_Version::supports_avx2()) {
19115       __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
19116     } else {
19117       assert(vlen == 4, "sanity");
19118       __ movddup($dst$$XMMRegister, $src$$XMMRegister);
19119       __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
19120     }
19121   %}
19122   ins_pipe( pipe_slow );
19123 %}
19124 
19125 instruct ReplD_reg(vec dst, vlRegD src) %{
19126   predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19127   match(Set dst (Replicate src));
19128   format %{ "replicateD $dst,$src" %}
19129   ins_encode %{
19130     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
19131   %}
19132   ins_pipe( pipe_slow );
19133 %}
19134 
19135 instruct ReplD_mem(vec dst, memory mem) %{
19136   predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE);
19137   match(Set dst (Replicate (LoadD mem)));
19138   format %{ "replicateD $dst,$mem" %}
19139   ins_encode %{
19140     if (Matcher::vector_length(this) >= 4) {
19141       int vlen_enc = vector_length_encoding(this);
19142       __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
19143     } else {
19144       __ movddup($dst$$XMMRegister, $mem$$Address);
19145     }
19146   %}
19147   ins_pipe( pipe_slow );
19148 %}
19149 
19150 // Replicate double (8 byte) scalar immediate to be vector by loading from const table.
19151 instruct ReplD_imm(vec dst, immD con) %{
19152   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19153   match(Set dst (Replicate con));
19154   format %{ "replicateD $dst,$con" %}
19155   ins_encode %{
19156     InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2));
19157     int vlen = Matcher::vector_length_in_bytes(this);
19158     __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen);
19159   %}
19160   ins_pipe( pipe_slow );
19161 %}
19162 
19163 instruct ReplD_zero(vec dst, immD0 zero) %{
19164   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
19165   match(Set dst (Replicate zero));
19166   format %{ "replicateD $dst,$zero" %}
19167   ins_encode %{
19168     int vlen_enc = vector_length_encoding(this);
19169     if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) {
19170       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
19171     } else {
19172       __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
19173     }
19174   %}
19175   ins_pipe( fpu_reg_reg );
19176 %}
19177 
19178 // ====================VECTOR INSERT=======================================
19179 
19180 instruct insert(vec dst, rRegI val, immU8 idx) %{
19181   predicate(Matcher::vector_length_in_bytes(n) < 32);
19182   match(Set dst (VectorInsert (Binary dst val) idx));
19183   format %{ "vector_insert $dst,$val,$idx" %}
19184   ins_encode %{
19185     assert(UseSSE >= 4, "required");
19186     assert(Matcher::vector_length_in_bytes(this) >= 8, "required");
19187 
19188     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19189 
19190     assert(is_integral_type(elem_bt), "");
19191     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19192 
19193     __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
19194   %}
19195   ins_pipe( pipe_slow );
19196 %}
19197 
19198 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
19199   predicate(Matcher::vector_length_in_bytes(n) == 32);
19200   match(Set dst (VectorInsert (Binary src val) idx));
19201   effect(TEMP vtmp);
19202   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19203   ins_encode %{
19204     int vlen_enc = Assembler::AVX_256bit;
19205     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19206     int elem_per_lane = 16/type2aelembytes(elem_bt);
19207     int log2epr = log2(elem_per_lane);
19208 
19209     assert(is_integral_type(elem_bt), "sanity");
19210     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19211 
19212     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19213     uint y_idx = ($idx$$constant >> log2epr) & 1;
19214     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19215     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19216     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19217   %}
19218   ins_pipe( pipe_slow );
19219 %}
19220 
19221 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
19222   predicate(Matcher::vector_length_in_bytes(n) == 64);
19223   match(Set dst (VectorInsert (Binary src val) idx));
19224   effect(TEMP vtmp);
19225   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19226   ins_encode %{
19227     assert(UseAVX > 2, "sanity");
19228 
19229     BasicType elem_bt = Matcher::vector_element_basic_type(this);
19230     int elem_per_lane = 16/type2aelembytes(elem_bt);
19231     int log2epr = log2(elem_per_lane);
19232 
19233     assert(is_integral_type(elem_bt), "");
19234     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19235 
19236     uint x_idx = $idx$$constant & right_n_bits(log2epr);
19237     uint y_idx = ($idx$$constant >> log2epr) & 3;
19238     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19239     __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19240     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19241   %}
19242   ins_pipe( pipe_slow );
19243 %}
19244 
19245 instruct insert2L(vec dst, rRegL val, immU8 idx) %{
19246   predicate(Matcher::vector_length(n) == 2);
19247   match(Set dst (VectorInsert (Binary dst val) idx));
19248   format %{ "vector_insert $dst,$val,$idx" %}
19249   ins_encode %{
19250     assert(UseSSE >= 4, "required");
19251     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19252     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19253 
19254     __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
19255   %}
19256   ins_pipe( pipe_slow );
19257 %}
19258 
19259 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
19260   predicate(Matcher::vector_length(n) == 4);
19261   match(Set dst (VectorInsert (Binary src val) idx));
19262   effect(TEMP vtmp);
19263   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19264   ins_encode %{
19265     assert(Matcher::vector_element_basic_type(this) == T_LONG, "");
19266     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19267 
19268     uint x_idx = $idx$$constant & right_n_bits(1);
19269     uint y_idx = ($idx$$constant >> 1) & 1;
19270     int vlen_enc = Assembler::AVX_256bit;
19271     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19272     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19273     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19274   %}
19275   ins_pipe( pipe_slow );
19276 %}
19277 
19278 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
19279   predicate(Matcher::vector_length(n) == 8);
19280   match(Set dst (VectorInsert (Binary src val) idx));
19281   effect(TEMP vtmp);
19282   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19283   ins_encode %{
19284     assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity");
19285     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19286 
19287     uint x_idx = $idx$$constant & right_n_bits(1);
19288     uint y_idx = ($idx$$constant >> 1) & 3;
19289     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19290     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
19291     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19292   %}
19293   ins_pipe( pipe_slow );
19294 %}
19295 
19296 instruct insertF(vec dst, regF val, immU8 idx) %{
19297   predicate(Matcher::vector_length(n) < 8);
19298   match(Set dst (VectorInsert (Binary dst val) idx));
19299   format %{ "vector_insert $dst,$val,$idx" %}
19300   ins_encode %{
19301     assert(UseSSE >= 4, "sanity");
19302 
19303     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19304     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19305 
19306     uint x_idx = $idx$$constant & right_n_bits(2);
19307     __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19308   %}
19309   ins_pipe( pipe_slow );
19310 %}
19311 
19312 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
19313   predicate(Matcher::vector_length(n) >= 8);
19314   match(Set dst (VectorInsert (Binary src val) idx));
19315   effect(TEMP vtmp);
19316   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19317   ins_encode %{
19318     assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity");
19319     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19320 
19321     int vlen = Matcher::vector_length(this);
19322     uint x_idx = $idx$$constant & right_n_bits(2);
19323     if (vlen == 8) {
19324       uint y_idx = ($idx$$constant >> 2) & 1;
19325       int vlen_enc = Assembler::AVX_256bit;
19326       __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19327       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19328       __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19329     } else {
19330       assert(vlen == 16, "sanity");
19331       uint y_idx = ($idx$$constant >> 2) & 3;
19332       __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19333       __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4);
19334       __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19335     }
19336   %}
19337   ins_pipe( pipe_slow );
19338 %}
19339 
19340 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
19341   predicate(Matcher::vector_length(n) == 2);
19342   match(Set dst (VectorInsert (Binary dst val) idx));
19343   effect(TEMP tmp);
19344   format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
19345   ins_encode %{
19346     assert(UseSSE >= 4, "sanity");
19347     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19348     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19349 
19350     __ movq($tmp$$Register, $val$$XMMRegister);
19351     __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
19352   %}
19353   ins_pipe( pipe_slow );
19354 %}
19355 
19356 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
19357   predicate(Matcher::vector_length(n) == 4);
19358   match(Set dst (VectorInsert (Binary src val) idx));
19359   effect(TEMP vtmp, TEMP tmp);
19360   format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
19361   ins_encode %{
19362     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19363     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19364 
19365     uint x_idx = $idx$$constant & right_n_bits(1);
19366     uint y_idx = ($idx$$constant >> 1) & 1;
19367     int vlen_enc = Assembler::AVX_256bit;
19368     __ movq($tmp$$Register, $val$$XMMRegister);
19369     __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19370     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19371     __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19372   %}
19373   ins_pipe( pipe_slow );
19374 %}
19375 
19376 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
19377   predicate(Matcher::vector_length(n) == 8);
19378   match(Set dst (VectorInsert (Binary src val) idx));
19379   effect(TEMP tmp, TEMP vtmp);
19380   format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
19381   ins_encode %{
19382     assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity");
19383     assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds");
19384 
19385     uint x_idx = $idx$$constant & right_n_bits(1);
19386     uint y_idx = ($idx$$constant >> 1) & 3;
19387     __ movq($tmp$$Register, $val$$XMMRegister);
19388     __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
19389     __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
19390     __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
19391   %}
19392   ins_pipe( pipe_slow );
19393 %}
19394 
19395 // ====================REDUCTION ARITHMETIC=======================================
19396 
19397 // =======================Int Reduction==========================================
19398 
19399 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19400   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2
19401   match(Set dst (AddReductionVI src1 src2));
19402   match(Set dst (MulReductionVI src1 src2));
19403   match(Set dst (AndReductionV  src1 src2));
19404   match(Set dst ( OrReductionV  src1 src2));
19405   match(Set dst (XorReductionV  src1 src2));
19406   match(Set dst (MinReductionV  src1 src2));
19407   match(Set dst (MaxReductionV  src1 src2));
19408   match(Set dst (UMinReductionV  src1 src2));
19409   match(Set dst (UMaxReductionV  src1 src2));
19410   effect(TEMP vtmp1, TEMP vtmp2);
19411   format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19412   ins_encode %{
19413     int opcode = this->ideal_Opcode();
19414     int vlen = Matcher::vector_length(this, $src2);
19415     __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19416   %}
19417   ins_pipe( pipe_slow );
19418 %}
19419 
19420 // =======================Long Reduction==========================================
19421 
19422 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19423   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
19424   match(Set dst (AddReductionVL src1 src2));
19425   match(Set dst (MulReductionVL src1 src2));
19426   match(Set dst (AndReductionV  src1 src2));
19427   match(Set dst ( OrReductionV  src1 src2));
19428   match(Set dst (XorReductionV  src1 src2));
19429   match(Set dst (MinReductionV  src1 src2));
19430   match(Set dst (MaxReductionV  src1 src2));
19431   match(Set dst (UMinReductionV  src1 src2));
19432   match(Set dst (UMaxReductionV  src1 src2));
19433   effect(TEMP vtmp1, TEMP vtmp2);
19434   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19435   ins_encode %{
19436     int opcode = this->ideal_Opcode();
19437     int vlen = Matcher::vector_length(this, $src2);
19438     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19439   %}
19440   ins_pipe( pipe_slow );
19441 %}
19442 
19443 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
19444   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
19445   match(Set dst (AddReductionVL src1 src2));
19446   match(Set dst (MulReductionVL src1 src2));
19447   match(Set dst (AndReductionV  src1 src2));
19448   match(Set dst ( OrReductionV  src1 src2));
19449   match(Set dst (XorReductionV  src1 src2));
19450   match(Set dst (MinReductionV  src1 src2));
19451   match(Set dst (MaxReductionV  src1 src2));
19452   match(Set dst (UMinReductionV  src1 src2));
19453   match(Set dst (UMaxReductionV  src1 src2));
19454   effect(TEMP vtmp1, TEMP vtmp2);
19455   format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19456   ins_encode %{
19457     int opcode = this->ideal_Opcode();
19458     int vlen = Matcher::vector_length(this, $src2);
19459     __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19460   %}
19461   ins_pipe( pipe_slow );
19462 %}
19463 
19464 // =======================Float Reduction==========================================
19465 
19466 instruct reductionF128(regF dst, vec src, vec vtmp) %{
19467   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src
19468   match(Set dst (AddReductionVF dst src));
19469   match(Set dst (MulReductionVF dst src));
19470   effect(TEMP dst, TEMP vtmp);
19471   format %{ "vector_reduction_float  $dst,$src ; using $vtmp as TEMP" %}
19472   ins_encode %{
19473     int opcode = this->ideal_Opcode();
19474     int vlen = Matcher::vector_length(this, $src);
19475     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19476   %}
19477   ins_pipe( pipe_slow );
19478 %}
19479 
19480 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
19481   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19482   match(Set dst (AddReductionVF dst src));
19483   match(Set dst (MulReductionVF dst src));
19484   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19485   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19486   ins_encode %{
19487     int opcode = this->ideal_Opcode();
19488     int vlen = Matcher::vector_length(this, $src);
19489     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19490   %}
19491   ins_pipe( pipe_slow );
19492 %}
19493 
19494 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19495   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src
19496   match(Set dst (AddReductionVF dst src));
19497   match(Set dst (MulReductionVF dst src));
19498   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19499   format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19500   ins_encode %{
19501     int opcode = this->ideal_Opcode();
19502     int vlen = Matcher::vector_length(this, $src);
19503     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19504   %}
19505   ins_pipe( pipe_slow );
19506 %}
19507 
19508 
19509 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{
19510   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19511   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19512   // src1 contains reduction identity
19513   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19514   match(Set dst (AddReductionVF src1 src2));
19515   match(Set dst (MulReductionVF src1 src2));
19516   effect(TEMP dst);
19517   format %{ "vector_reduction_float  $dst,$src1,$src2 ;" %}
19518   ins_encode %{
19519     int opcode = this->ideal_Opcode();
19520     int vlen = Matcher::vector_length(this, $src2);
19521     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19522   %}
19523   ins_pipe( pipe_slow );
19524 %}
19525 
19526 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{
19527   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19528   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19529   // src1 contains reduction identity
19530   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19531   match(Set dst (AddReductionVF src1 src2));
19532   match(Set dst (MulReductionVF src1 src2));
19533   effect(TEMP dst, TEMP vtmp);
19534   format %{ "vector_reduction_float  $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19535   ins_encode %{
19536     int opcode = this->ideal_Opcode();
19537     int vlen = Matcher::vector_length(this, $src2);
19538     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19539   %}
19540   ins_pipe( pipe_slow );
19541 %}
19542 
19543 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{
19544   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19545   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19546   // src1 contains reduction identity
19547   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19548   match(Set dst (AddReductionVF src1 src2));
19549   match(Set dst (MulReductionVF src1 src2));
19550   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19551   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19552   ins_encode %{
19553     int opcode = this->ideal_Opcode();
19554     int vlen = Matcher::vector_length(this, $src2);
19555     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19556   %}
19557   ins_pipe( pipe_slow );
19558 %}
19559 
19560 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19561   // Non-strictly ordered floating-point add/mul reduction for floats. This rule is
19562   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19563   // src1 contains reduction identity
19564   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2
19565   match(Set dst (AddReductionVF src1 src2));
19566   match(Set dst (MulReductionVF src1 src2));
19567   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19568   format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19569   ins_encode %{
19570     int opcode = this->ideal_Opcode();
19571     int vlen = Matcher::vector_length(this, $src2);
19572     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19573   %}
19574   ins_pipe( pipe_slow );
19575 %}
19576 
19577 // =======================Double Reduction==========================================
19578 
19579 instruct reduction2D(regD dst, vec src, vec vtmp) %{
19580   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src
19581   match(Set dst (AddReductionVD dst src));
19582   match(Set dst (MulReductionVD dst src));
19583   effect(TEMP dst, TEMP vtmp);
19584   format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
19585   ins_encode %{
19586     int opcode = this->ideal_Opcode();
19587     int vlen = Matcher::vector_length(this, $src);
19588     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
19589 %}
19590   ins_pipe( pipe_slow );
19591 %}
19592 
19593 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
19594   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src
19595   match(Set dst (AddReductionVD dst src));
19596   match(Set dst (MulReductionVD dst src));
19597   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19598   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19599   ins_encode %{
19600     int opcode = this->ideal_Opcode();
19601     int vlen = Matcher::vector_length(this, $src);
19602     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19603   %}
19604   ins_pipe( pipe_slow );
19605 %}
19606 
19607 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
19608   predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src
19609   match(Set dst (AddReductionVD dst src));
19610   match(Set dst (MulReductionVD dst src));
19611   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19612   format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
19613   ins_encode %{
19614     int opcode = this->ideal_Opcode();
19615     int vlen = Matcher::vector_length(this, $src);
19616     __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19617   %}
19618   ins_pipe( pipe_slow );
19619 %}
19620 
19621 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{
19622   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19623   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19624   // src1 contains reduction identity
19625   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2
19626   match(Set dst (AddReductionVD src1 src2));
19627   match(Set dst (MulReductionVD src1 src2));
19628   effect(TEMP dst);
19629   format %{ "vector_reduction_double $dst,$src1,$src2 ;" %}
19630   ins_encode %{
19631     int opcode = this->ideal_Opcode();
19632     int vlen = Matcher::vector_length(this, $src2);
19633     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister);
19634 %}
19635   ins_pipe( pipe_slow );
19636 %}
19637 
19638 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{
19639   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19640   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19641   // src1 contains reduction identity
19642   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2
19643   match(Set dst (AddReductionVD src1 src2));
19644   match(Set dst (MulReductionVD src1 src2));
19645   effect(TEMP dst, TEMP vtmp);
19646   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %}
19647   ins_encode %{
19648     int opcode = this->ideal_Opcode();
19649     int vlen = Matcher::vector_length(this, $src2);
19650     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
19651   %}
19652   ins_pipe( pipe_slow );
19653 %}
19654 
19655 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19656   // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is
19657   // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction).
19658   // src1 contains reduction identity
19659   predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2
19660   match(Set dst (AddReductionVD src1 src2));
19661   match(Set dst (MulReductionVD src1 src2));
19662   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19663   format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19664   ins_encode %{
19665     int opcode = this->ideal_Opcode();
19666     int vlen = Matcher::vector_length(this, $src2);
19667     __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19668   %}
19669   ins_pipe( pipe_slow );
19670 %}
19671 
19672 // =======================Byte Reduction==========================================
19673 
19674 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19675   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
19676   match(Set dst (AddReductionVI src1 src2));
19677   match(Set dst (AndReductionV  src1 src2));
19678   match(Set dst ( OrReductionV  src1 src2));
19679   match(Set dst (XorReductionV  src1 src2));
19680   match(Set dst (MinReductionV  src1 src2));
19681   match(Set dst (MaxReductionV  src1 src2));
19682   match(Set dst (UMinReductionV  src1 src2));
19683   match(Set dst (UMaxReductionV  src1 src2));
19684   effect(TEMP vtmp1, TEMP vtmp2);
19685   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19686   ins_encode %{
19687     int opcode = this->ideal_Opcode();
19688     int vlen = Matcher::vector_length(this, $src2);
19689     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19690   %}
19691   ins_pipe( pipe_slow );
19692 %}
19693 
19694 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19695   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
19696   match(Set dst (AddReductionVI src1 src2));
19697   match(Set dst (AndReductionV  src1 src2));
19698   match(Set dst ( OrReductionV  src1 src2));
19699   match(Set dst (XorReductionV  src1 src2));
19700   match(Set dst (MinReductionV  src1 src2));
19701   match(Set dst (MaxReductionV  src1 src2));
19702   match(Set dst (UMinReductionV  src1 src2));
19703   match(Set dst (UMaxReductionV  src1 src2));
19704   effect(TEMP vtmp1, TEMP vtmp2);
19705   format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19706   ins_encode %{
19707     int opcode = this->ideal_Opcode();
19708     int vlen = Matcher::vector_length(this, $src2);
19709     __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19710   %}
19711   ins_pipe( pipe_slow );
19712 %}
19713 
19714 // =======================Short Reduction==========================================
19715 
19716 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19717   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2
19718   match(Set dst (AddReductionVI src1 src2));
19719   match(Set dst (MulReductionVI src1 src2));
19720   match(Set dst (AndReductionV  src1 src2));
19721   match(Set dst ( OrReductionV  src1 src2));
19722   match(Set dst (XorReductionV  src1 src2));
19723   match(Set dst (MinReductionV  src1 src2));
19724   match(Set dst (MaxReductionV  src1 src2));
19725   match(Set dst (UMinReductionV  src1 src2));
19726   match(Set dst (UMaxReductionV  src1 src2));
19727   effect(TEMP vtmp1, TEMP vtmp2);
19728   format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
19729   ins_encode %{
19730     int opcode = this->ideal_Opcode();
19731     int vlen = Matcher::vector_length(this, $src2);
19732     __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19733   %}
19734   ins_pipe( pipe_slow );
19735 %}
19736 
19737 // =======================Mul Reduction==========================================
19738 
19739 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
19740   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19741             Matcher::vector_length(n->in(2)) <= 32); // src2
19742   match(Set dst (MulReductionVI src1 src2));
19743   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19744   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19745   ins_encode %{
19746     int opcode = this->ideal_Opcode();
19747     int vlen = Matcher::vector_length(this, $src2);
19748     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19749   %}
19750   ins_pipe( pipe_slow );
19751 %}
19752 
19753 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
19754   predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE &&
19755             Matcher::vector_length(n->in(2)) == 64); // src2
19756   match(Set dst (MulReductionVI src1 src2));
19757   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
19758   format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
19759   ins_encode %{
19760     int opcode = this->ideal_Opcode();
19761     int vlen = Matcher::vector_length(this, $src2);
19762     __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
19763   %}
19764   ins_pipe( pipe_slow );
19765 %}
19766 
19767 //--------------------Min/Max Float Reduction --------------------
19768 // Float Min Reduction
19769 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19770                             legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19771   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19772             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19773              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19774             Matcher::vector_length(n->in(2)) == 2);
19775   match(Set dst (MinReductionV src1 src2));
19776   match(Set dst (MaxReductionV src1 src2));
19777   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19778   format %{ "vector_minmax2F_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19779   ins_encode %{
19780     assert(UseAVX > 0, "sanity");
19781 
19782     int opcode = this->ideal_Opcode();
19783     int vlen = Matcher::vector_length(this, $src2);
19784     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19785                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19786   %}
19787   ins_pipe( pipe_slow );
19788 %}
19789 
19790 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
19791                            legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19792   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19793             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19794              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19795             Matcher::vector_length(n->in(2)) >= 4);
19796   match(Set dst (MinReductionV src1 src2));
19797   match(Set dst (MaxReductionV src1 src2));
19798   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19799   format %{ "vector_minmaxF_reduction $dst,$src1,$src2  ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19800   ins_encode %{
19801     assert(UseAVX > 0, "sanity");
19802 
19803     int opcode = this->ideal_Opcode();
19804     int vlen = Matcher::vector_length(this, $src2);
19805     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
19806                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19807   %}
19808   ins_pipe( pipe_slow );
19809 %}
19810 
19811 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, legVec atmp,
19812                                legVec btmp, legVec xmm_1, rFlagsReg cr) %{
19813   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19814             Matcher::vector_length(n->in(2)) == 2);
19815   match(Set dst (MinReductionV dst src));
19816   match(Set dst (MaxReductionV dst src));
19817   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
19818   format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
19819   ins_encode %{
19820     assert(UseAVX > 0, "sanity");
19821 
19822     int opcode = this->ideal_Opcode();
19823     int vlen = Matcher::vector_length(this, $src);
19824     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19825                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
19826   %}
19827   ins_pipe( pipe_slow );
19828 %}
19829 
19830 
19831 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, legVec atmp, legVec btmp,
19832                               legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
19833   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19834             Matcher::vector_length(n->in(2)) >= 4);
19835   match(Set dst (MinReductionV dst src));
19836   match(Set dst (MaxReductionV dst src));
19837   effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
19838   format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
19839   ins_encode %{
19840     assert(UseAVX > 0, "sanity");
19841 
19842     int opcode = this->ideal_Opcode();
19843     int vlen = Matcher::vector_length(this, $src);
19844     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
19845                          $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
19846   %}
19847   ins_pipe( pipe_slow );
19848 %}
19849 
19850 instruct minmax_reduction2F_avx10_2(regF dst, immF src1, vec src2, vec xtmp1) %{
19851   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19852             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19853              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19854             Matcher::vector_length(n->in(2)) == 2);
19855   match(Set dst (MinReductionV src1 src2));
19856   match(Set dst (MaxReductionV src1 src2));
19857   effect(TEMP dst, TEMP xtmp1);
19858   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 as TEMP" %}
19859   ins_encode %{
19860     int opcode = this->ideal_Opcode();
19861     int vlen = Matcher::vector_length(this, $src2);
19862     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19863                          xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
19864   %}
19865   ins_pipe( pipe_slow );
19866 %}
19867 
19868 instruct minmax_reductionF_avx10_2(regF dst, immF src1, vec src2, vec xtmp1, vec xtmp2) %{
19869   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19870             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
19871              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
19872             Matcher::vector_length(n->in(2)) >= 4);
19873   match(Set dst (MinReductionV src1 src2));
19874   match(Set dst (MaxReductionV src1 src2));
19875   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19876   format %{ "vector_minmax_reduction $dst, $src1, $src2 \t; using $xtmp1 and $xtmp2 as TEMP" %}
19877   ins_encode %{
19878     int opcode = this->ideal_Opcode();
19879     int vlen = Matcher::vector_length(this, $src2);
19880     __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
19881                          xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19882   %}
19883   ins_pipe( pipe_slow );
19884 %}
19885 
19886 instruct minmax_reduction2F_av_avx10_2(regF dst, vec src, vec xtmp1) %{
19887   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19888             Matcher::vector_length(n->in(2)) == 2);
19889   match(Set dst (MinReductionV dst src));
19890   match(Set dst (MaxReductionV dst src));
19891   effect(TEMP dst, TEMP xtmp1);
19892   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 as TEMP" %}
19893   ins_encode %{
19894     int opcode = this->ideal_Opcode();
19895     int vlen = Matcher::vector_length(this, $src);
19896     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19897                          $xtmp1$$XMMRegister);
19898   %}
19899   ins_pipe( pipe_slow );
19900 %}
19901 
19902 instruct minmax_reductionF_av_avx10_2(regF dst, vec src, vec xtmp1, vec xtmp2) %{
19903   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT &&
19904             Matcher::vector_length(n->in(2)) >= 4);
19905   match(Set dst (MinReductionV dst src));
19906   match(Set dst (MaxReductionV dst src));
19907   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
19908   format %{ "vector_minmax2F_reduction $dst, $src \t; using $xtmp1 and $xtmp2 as TEMP" %}
19909   ins_encode %{
19910     int opcode = this->ideal_Opcode();
19911     int vlen = Matcher::vector_length(this, $src);
19912     __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg,
19913                          $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
19914   %}
19915   ins_pipe( pipe_slow );
19916 %}
19917 
19918 //--------------------Min Double Reduction --------------------
19919 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19920                             legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19921   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19922             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19923              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19924             Matcher::vector_length(n->in(2)) == 2);
19925   match(Set dst (MinReductionV src1 src2));
19926   match(Set dst (MaxReductionV src1 src2));
19927   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19928   format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19929   ins_encode %{
19930     assert(UseAVX > 0, "sanity");
19931 
19932     int opcode = this->ideal_Opcode();
19933     int vlen = Matcher::vector_length(this, $src2);
19934     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19935                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19936   %}
19937   ins_pipe( pipe_slow );
19938 %}
19939 
19940 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, legVec tmp1, legVec tmp2,
19941                            legVec tmp3, legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19942   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19943             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
19944              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
19945             Matcher::vector_length(n->in(2)) >= 4);
19946   match(Set dst (MinReductionV src1 src2));
19947   match(Set dst (MaxReductionV src1 src2));
19948   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19949   format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19950   ins_encode %{
19951     assert(UseAVX > 0, "sanity");
19952 
19953     int opcode = this->ideal_Opcode();
19954     int vlen = Matcher::vector_length(this, $src2);
19955     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
19956                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19957   %}
19958   ins_pipe( pipe_slow );
19959 %}
19960 
19961 
19962 instruct minmax_reduction2D_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2,
19963                                legVec tmp3, legVec tmp4, rFlagsReg cr) %{
19964   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19965             Matcher::vector_length(n->in(2)) == 2);
19966   match(Set dst (MinReductionV dst src));
19967   match(Set dst (MaxReductionV dst src));
19968   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
19969   format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
19970   ins_encode %{
19971     assert(UseAVX > 0, "sanity");
19972 
19973     int opcode = this->ideal_Opcode();
19974     int vlen = Matcher::vector_length(this, $src);
19975     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19976                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
19977   %}
19978   ins_pipe( pipe_slow );
19979 %}
19980 
19981 instruct minmax_reductionD_av(legRegD dst, legVec src, legVec tmp1, legVec tmp2, legVec tmp3,
19982                               legVec tmp4, legVec tmp5, rFlagsReg cr) %{
19983   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
19984             Matcher::vector_length(n->in(2)) >= 4);
19985   match(Set dst (MinReductionV dst src));
19986   match(Set dst (MaxReductionV dst src));
19987   effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
19988   format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
19989   ins_encode %{
19990     assert(UseAVX > 0, "sanity");
19991 
19992     int opcode = this->ideal_Opcode();
19993     int vlen = Matcher::vector_length(this, $src);
19994     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
19995                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
19996   %}
19997   ins_pipe( pipe_slow );
19998 %}
19999 
20000 instruct minmax_reduction2D_avx10_2(regD dst, immD src1, vec src2, vec xtmp1) %{
20001   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20002             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20003              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20004             Matcher::vector_length(n->in(2)) == 2);
20005   match(Set dst (MinReductionV src1 src2));
20006   match(Set dst (MaxReductionV src1 src2));
20007   effect(TEMP dst, TEMP xtmp1);
20008   format %{ "vector_minmax2D_reduction $dst, $src1, $src2 ; using $xtmp1 as TEMP" %}
20009   ins_encode %{
20010     int opcode = this->ideal_Opcode();
20011     int vlen = Matcher::vector_length(this, $src2);
20012     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg,
20013                           xnoreg, xnoreg, $xtmp1$$XMMRegister);
20014   %}
20015   ins_pipe( pipe_slow );
20016 %}
20017 
20018 instruct minmax_reductionD_avx10_2(regD dst, immD src1, vec src2, vec xtmp1, vec xtmp2) %{
20019   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20020             ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
20021              (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
20022             Matcher::vector_length(n->in(2)) >= 4);
20023   match(Set dst (MinReductionV src1 src2));
20024   match(Set dst (MaxReductionV src1 src2));
20025   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20026   format %{ "vector_minmaxD_reduction $dst, $src1, $src2 ; using $xtmp1 and $xtmp2 as TEMP" %}
20027   ins_encode %{
20028     int opcode = this->ideal_Opcode();
20029     int vlen = Matcher::vector_length(this, $src2);
20030     __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg,
20031                           xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20032   %}
20033   ins_pipe( pipe_slow );
20034 %}
20035 
20036 
20037 instruct minmax_reduction2D_av_avx10_2(regD dst, vec src, vec xtmp1) %{
20038   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20039             Matcher::vector_length(n->in(2)) == 2);
20040   match(Set dst (MinReductionV dst src));
20041   match(Set dst (MaxReductionV dst src));
20042   effect(TEMP dst, TEMP xtmp1);
20043   format %{ "vector_minmax2D_reduction $dst, $src ; using $xtmp1 as TEMP" %}
20044   ins_encode %{
20045     int opcode = this->ideal_Opcode();
20046     int vlen = Matcher::vector_length(this, $src);
20047     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20048                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister);
20049   %}
20050   ins_pipe( pipe_slow );
20051 %}
20052 
20053 instruct minmax_reductionD_av_avx10_2(regD dst, vec src, vec xtmp1, vec xtmp2) %{
20054   predicate(VM_Version::supports_avx10_2() && Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE &&
20055             Matcher::vector_length(n->in(2)) >= 4);
20056   match(Set dst (MinReductionV dst src));
20057   match(Set dst (MaxReductionV dst src));
20058   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
20059   format %{ "vector_minmaxD_reduction $dst, $src ; using $xtmp1 and $xtmp2 as TEMP" %}
20060   ins_encode %{
20061     int opcode = this->ideal_Opcode();
20062     int vlen = Matcher::vector_length(this, $src);
20063     __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
20064                           xnoreg, xnoreg, xnoreg, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
20065   %}
20066   ins_pipe( pipe_slow );
20067 %}
20068 
20069 // ====================VECTOR ARITHMETIC=======================================
20070 
20071 // --------------------------------- ADD --------------------------------------
20072 
20073 // Bytes vector add
20074 instruct vaddB(vec dst, vec src) %{
20075   predicate(UseAVX == 0);
20076   match(Set dst (AddVB dst src));
20077   format %{ "paddb   $dst,$src\t! add packedB" %}
20078   ins_encode %{
20079     __ paddb($dst$$XMMRegister, $src$$XMMRegister);
20080   %}
20081   ins_pipe( pipe_slow );
20082 %}
20083 
20084 instruct vaddB_reg(vec dst, vec src1, vec src2) %{
20085   predicate(UseAVX > 0);
20086   match(Set dst (AddVB src1 src2));
20087   format %{ "vpaddb  $dst,$src1,$src2\t! add packedB" %}
20088   ins_encode %{
20089     int vlen_enc = vector_length_encoding(this);
20090     __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20091   %}
20092   ins_pipe( pipe_slow );
20093 %}
20094 
20095 instruct vaddB_mem(vec dst, vec src, memory mem) %{
20096   predicate((UseAVX > 0) &&
20097             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20098   match(Set dst (AddVB src (LoadVector mem)));
20099   format %{ "vpaddb  $dst,$src,$mem\t! add packedB" %}
20100   ins_encode %{
20101     int vlen_enc = vector_length_encoding(this);
20102     __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20103   %}
20104   ins_pipe( pipe_slow );
20105 %}
20106 
20107 // Shorts/Chars vector add
20108 instruct vaddS(vec dst, vec src) %{
20109   predicate(UseAVX == 0);
20110   match(Set dst (AddVS dst src));
20111   format %{ "paddw   $dst,$src\t! add packedS" %}
20112   ins_encode %{
20113     __ paddw($dst$$XMMRegister, $src$$XMMRegister);
20114   %}
20115   ins_pipe( pipe_slow );
20116 %}
20117 
20118 instruct vaddS_reg(vec dst, vec src1, vec src2) %{
20119   predicate(UseAVX > 0);
20120   match(Set dst (AddVS src1 src2));
20121   format %{ "vpaddw  $dst,$src1,$src2\t! add packedS" %}
20122   ins_encode %{
20123     int vlen_enc = vector_length_encoding(this);
20124     __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20125   %}
20126   ins_pipe( pipe_slow );
20127 %}
20128 
20129 instruct vaddS_mem(vec dst, vec src, memory mem) %{
20130   predicate((UseAVX > 0) &&
20131             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20132   match(Set dst (AddVS src (LoadVector mem)));
20133   format %{ "vpaddw  $dst,$src,$mem\t! add packedS" %}
20134   ins_encode %{
20135     int vlen_enc = vector_length_encoding(this);
20136     __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20137   %}
20138   ins_pipe( pipe_slow );
20139 %}
20140 
20141 // Integers vector add
20142 instruct vaddI(vec dst, vec src) %{
20143   predicate(UseAVX == 0);
20144   match(Set dst (AddVI dst src));
20145   format %{ "paddd   $dst,$src\t! add packedI" %}
20146   ins_encode %{
20147     __ paddd($dst$$XMMRegister, $src$$XMMRegister);
20148   %}
20149   ins_pipe( pipe_slow );
20150 %}
20151 
20152 instruct vaddI_reg(vec dst, vec src1, vec src2) %{
20153   predicate(UseAVX > 0);
20154   match(Set dst (AddVI src1 src2));
20155   format %{ "vpaddd  $dst,$src1,$src2\t! add packedI" %}
20156   ins_encode %{
20157     int vlen_enc = vector_length_encoding(this);
20158     __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20159   %}
20160   ins_pipe( pipe_slow );
20161 %}
20162 
20163 
20164 instruct vaddI_mem(vec dst, vec src, memory mem) %{
20165   predicate((UseAVX > 0) &&
20166             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20167   match(Set dst (AddVI src (LoadVector mem)));
20168   format %{ "vpaddd  $dst,$src,$mem\t! add packedI" %}
20169   ins_encode %{
20170     int vlen_enc = vector_length_encoding(this);
20171     __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20172   %}
20173   ins_pipe( pipe_slow );
20174 %}
20175 
20176 // Longs vector add
20177 instruct vaddL(vec dst, vec src) %{
20178   predicate(UseAVX == 0);
20179   match(Set dst (AddVL dst src));
20180   format %{ "paddq   $dst,$src\t! add packedL" %}
20181   ins_encode %{
20182     __ paddq($dst$$XMMRegister, $src$$XMMRegister);
20183   %}
20184   ins_pipe( pipe_slow );
20185 %}
20186 
20187 instruct vaddL_reg(vec dst, vec src1, vec src2) %{
20188   predicate(UseAVX > 0);
20189   match(Set dst (AddVL src1 src2));
20190   format %{ "vpaddq  $dst,$src1,$src2\t! add packedL" %}
20191   ins_encode %{
20192     int vlen_enc = vector_length_encoding(this);
20193     __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20194   %}
20195   ins_pipe( pipe_slow );
20196 %}
20197 
20198 instruct vaddL_mem(vec dst, vec src, memory mem) %{
20199   predicate((UseAVX > 0) &&
20200             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20201   match(Set dst (AddVL src (LoadVector mem)));
20202   format %{ "vpaddq  $dst,$src,$mem\t! add packedL" %}
20203   ins_encode %{
20204     int vlen_enc = vector_length_encoding(this);
20205     __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20206   %}
20207   ins_pipe( pipe_slow );
20208 %}
20209 
20210 // Floats vector add
20211 instruct vaddF(vec dst, vec src) %{
20212   predicate(UseAVX == 0);
20213   match(Set dst (AddVF dst src));
20214   format %{ "addps   $dst,$src\t! add packedF" %}
20215   ins_encode %{
20216     __ addps($dst$$XMMRegister, $src$$XMMRegister);
20217   %}
20218   ins_pipe( pipe_slow );
20219 %}
20220 
20221 instruct vaddF_reg(vec dst, vec src1, vec src2) %{
20222   predicate(UseAVX > 0);
20223   match(Set dst (AddVF src1 src2));
20224   format %{ "vaddps  $dst,$src1,$src2\t! add packedF" %}
20225   ins_encode %{
20226     int vlen_enc = vector_length_encoding(this);
20227     __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20228   %}
20229   ins_pipe( pipe_slow );
20230 %}
20231 
20232 instruct vaddF_mem(vec dst, vec src, memory mem) %{
20233   predicate((UseAVX > 0) &&
20234             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20235   match(Set dst (AddVF src (LoadVector mem)));
20236   format %{ "vaddps  $dst,$src,$mem\t! add packedF" %}
20237   ins_encode %{
20238     int vlen_enc = vector_length_encoding(this);
20239     __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20240   %}
20241   ins_pipe( pipe_slow );
20242 %}
20243 
20244 // Doubles vector add
20245 instruct vaddD(vec dst, vec src) %{
20246   predicate(UseAVX == 0);
20247   match(Set dst (AddVD dst src));
20248   format %{ "addpd   $dst,$src\t! add packedD" %}
20249   ins_encode %{
20250     __ addpd($dst$$XMMRegister, $src$$XMMRegister);
20251   %}
20252   ins_pipe( pipe_slow );
20253 %}
20254 
20255 instruct vaddD_reg(vec dst, vec src1, vec src2) %{
20256   predicate(UseAVX > 0);
20257   match(Set dst (AddVD src1 src2));
20258   format %{ "vaddpd  $dst,$src1,$src2\t! add packedD" %}
20259   ins_encode %{
20260     int vlen_enc = vector_length_encoding(this);
20261     __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20262   %}
20263   ins_pipe( pipe_slow );
20264 %}
20265 
20266 instruct vaddD_mem(vec dst, vec src, memory mem) %{
20267   predicate((UseAVX > 0) &&
20268             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20269   match(Set dst (AddVD src (LoadVector mem)));
20270   format %{ "vaddpd  $dst,$src,$mem\t! add packedD" %}
20271   ins_encode %{
20272     int vlen_enc = vector_length_encoding(this);
20273     __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20274   %}
20275   ins_pipe( pipe_slow );
20276 %}
20277 
20278 // --------------------------------- SUB --------------------------------------
20279 
20280 // Bytes vector sub
20281 instruct vsubB(vec dst, vec src) %{
20282   predicate(UseAVX == 0);
20283   match(Set dst (SubVB dst src));
20284   format %{ "psubb   $dst,$src\t! sub packedB" %}
20285   ins_encode %{
20286     __ psubb($dst$$XMMRegister, $src$$XMMRegister);
20287   %}
20288   ins_pipe( pipe_slow );
20289 %}
20290 
20291 instruct vsubB_reg(vec dst, vec src1, vec src2) %{
20292   predicate(UseAVX > 0);
20293   match(Set dst (SubVB src1 src2));
20294   format %{ "vpsubb  $dst,$src1,$src2\t! sub packedB" %}
20295   ins_encode %{
20296     int vlen_enc = vector_length_encoding(this);
20297     __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20298   %}
20299   ins_pipe( pipe_slow );
20300 %}
20301 
20302 instruct vsubB_mem(vec dst, vec src, memory mem) %{
20303   predicate((UseAVX > 0) &&
20304             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20305   match(Set dst (SubVB src (LoadVector mem)));
20306   format %{ "vpsubb  $dst,$src,$mem\t! sub packedB" %}
20307   ins_encode %{
20308     int vlen_enc = vector_length_encoding(this);
20309     __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20310   %}
20311   ins_pipe( pipe_slow );
20312 %}
20313 
20314 // Shorts/Chars vector sub
20315 instruct vsubS(vec dst, vec src) %{
20316   predicate(UseAVX == 0);
20317   match(Set dst (SubVS dst src));
20318   format %{ "psubw   $dst,$src\t! sub packedS" %}
20319   ins_encode %{
20320     __ psubw($dst$$XMMRegister, $src$$XMMRegister);
20321   %}
20322   ins_pipe( pipe_slow );
20323 %}
20324 
20325 
20326 instruct vsubS_reg(vec dst, vec src1, vec src2) %{
20327   predicate(UseAVX > 0);
20328   match(Set dst (SubVS src1 src2));
20329   format %{ "vpsubw  $dst,$src1,$src2\t! sub packedS" %}
20330   ins_encode %{
20331     int vlen_enc = vector_length_encoding(this);
20332     __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20333   %}
20334   ins_pipe( pipe_slow );
20335 %}
20336 
20337 instruct vsubS_mem(vec dst, vec src, memory mem) %{
20338   predicate((UseAVX > 0) &&
20339             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20340   match(Set dst (SubVS src (LoadVector mem)));
20341   format %{ "vpsubw  $dst,$src,$mem\t! sub packedS" %}
20342   ins_encode %{
20343     int vlen_enc = vector_length_encoding(this);
20344     __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20345   %}
20346   ins_pipe( pipe_slow );
20347 %}
20348 
20349 // Integers vector sub
20350 instruct vsubI(vec dst, vec src) %{
20351   predicate(UseAVX == 0);
20352   match(Set dst (SubVI dst src));
20353   format %{ "psubd   $dst,$src\t! sub packedI" %}
20354   ins_encode %{
20355     __ psubd($dst$$XMMRegister, $src$$XMMRegister);
20356   %}
20357   ins_pipe( pipe_slow );
20358 %}
20359 
20360 instruct vsubI_reg(vec dst, vec src1, vec src2) %{
20361   predicate(UseAVX > 0);
20362   match(Set dst (SubVI src1 src2));
20363   format %{ "vpsubd  $dst,$src1,$src2\t! sub packedI" %}
20364   ins_encode %{
20365     int vlen_enc = vector_length_encoding(this);
20366     __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20367   %}
20368   ins_pipe( pipe_slow );
20369 %}
20370 
20371 instruct vsubI_mem(vec dst, vec src, memory mem) %{
20372   predicate((UseAVX > 0) &&
20373             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20374   match(Set dst (SubVI src (LoadVector mem)));
20375   format %{ "vpsubd  $dst,$src,$mem\t! sub packedI" %}
20376   ins_encode %{
20377     int vlen_enc = vector_length_encoding(this);
20378     __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20379   %}
20380   ins_pipe( pipe_slow );
20381 %}
20382 
20383 // Longs vector sub
20384 instruct vsubL(vec dst, vec src) %{
20385   predicate(UseAVX == 0);
20386   match(Set dst (SubVL dst src));
20387   format %{ "psubq   $dst,$src\t! sub packedL" %}
20388   ins_encode %{
20389     __ psubq($dst$$XMMRegister, $src$$XMMRegister);
20390   %}
20391   ins_pipe( pipe_slow );
20392 %}
20393 
20394 instruct vsubL_reg(vec dst, vec src1, vec src2) %{
20395   predicate(UseAVX > 0);
20396   match(Set dst (SubVL src1 src2));
20397   format %{ "vpsubq  $dst,$src1,$src2\t! sub packedL" %}
20398   ins_encode %{
20399     int vlen_enc = vector_length_encoding(this);
20400     __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20401   %}
20402   ins_pipe( pipe_slow );
20403 %}
20404 
20405 
20406 instruct vsubL_mem(vec dst, vec src, memory mem) %{
20407   predicate((UseAVX > 0) &&
20408             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20409   match(Set dst (SubVL src (LoadVector mem)));
20410   format %{ "vpsubq  $dst,$src,$mem\t! sub packedL" %}
20411   ins_encode %{
20412     int vlen_enc = vector_length_encoding(this);
20413     __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20414   %}
20415   ins_pipe( pipe_slow );
20416 %}
20417 
20418 // Floats vector sub
20419 instruct vsubF(vec dst, vec src) %{
20420   predicate(UseAVX == 0);
20421   match(Set dst (SubVF dst src));
20422   format %{ "subps   $dst,$src\t! sub packedF" %}
20423   ins_encode %{
20424     __ subps($dst$$XMMRegister, $src$$XMMRegister);
20425   %}
20426   ins_pipe( pipe_slow );
20427 %}
20428 
20429 instruct vsubF_reg(vec dst, vec src1, vec src2) %{
20430   predicate(UseAVX > 0);
20431   match(Set dst (SubVF src1 src2));
20432   format %{ "vsubps  $dst,$src1,$src2\t! sub packedF" %}
20433   ins_encode %{
20434     int vlen_enc = vector_length_encoding(this);
20435     __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20436   %}
20437   ins_pipe( pipe_slow );
20438 %}
20439 
20440 instruct vsubF_mem(vec dst, vec src, memory mem) %{
20441   predicate((UseAVX > 0) &&
20442             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20443   match(Set dst (SubVF src (LoadVector mem)));
20444   format %{ "vsubps  $dst,$src,$mem\t! sub packedF" %}
20445   ins_encode %{
20446     int vlen_enc = vector_length_encoding(this);
20447     __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20448   %}
20449   ins_pipe( pipe_slow );
20450 %}
20451 
20452 // Doubles vector sub
20453 instruct vsubD(vec dst, vec src) %{
20454   predicate(UseAVX == 0);
20455   match(Set dst (SubVD dst src));
20456   format %{ "subpd   $dst,$src\t! sub packedD" %}
20457   ins_encode %{
20458     __ subpd($dst$$XMMRegister, $src$$XMMRegister);
20459   %}
20460   ins_pipe( pipe_slow );
20461 %}
20462 
20463 instruct vsubD_reg(vec dst, vec src1, vec src2) %{
20464   predicate(UseAVX > 0);
20465   match(Set dst (SubVD src1 src2));
20466   format %{ "vsubpd  $dst,$src1,$src2\t! sub packedD" %}
20467   ins_encode %{
20468     int vlen_enc = vector_length_encoding(this);
20469     __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20470   %}
20471   ins_pipe( pipe_slow );
20472 %}
20473 
20474 instruct vsubD_mem(vec dst, vec src, memory mem) %{
20475   predicate((UseAVX > 0) &&
20476             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20477   match(Set dst (SubVD src (LoadVector mem)));
20478   format %{ "vsubpd  $dst,$src,$mem\t! sub packedD" %}
20479   ins_encode %{
20480     int vlen_enc = vector_length_encoding(this);
20481     __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20482   %}
20483   ins_pipe( pipe_slow );
20484 %}
20485 
20486 // --------------------------------- MUL --------------------------------------
20487 
20488 // Byte vector mul
20489 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{
20490   predicate(Matcher::vector_length_in_bytes(n) <= 8);
20491   match(Set dst (MulVB src1 src2));
20492   effect(TEMP dst, TEMP xtmp);
20493   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20494   ins_encode %{
20495     assert(UseSSE > 3, "required");
20496     __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister);
20497     __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister);
20498     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20499     __ psllw($dst$$XMMRegister, 8);
20500     __ psrlw($dst$$XMMRegister, 8);
20501     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
20502   %}
20503   ins_pipe( pipe_slow );
20504 %}
20505 
20506 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{
20507   predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8);
20508   match(Set dst (MulVB src1 src2));
20509   effect(TEMP dst, TEMP xtmp);
20510   format %{ "mulVB   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20511   ins_encode %{
20512     assert(UseSSE > 3, "required");
20513     // Odd-index elements
20514     __ movdqu($dst$$XMMRegister, $src1$$XMMRegister);
20515     __ psrlw($dst$$XMMRegister, 8);
20516     __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister);
20517     __ psrlw($xtmp$$XMMRegister, 8);
20518     __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister);
20519     __ psllw($dst$$XMMRegister, 8);
20520     // Even-index elements
20521     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20522     __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister);
20523     __ psllw($xtmp$$XMMRegister, 8);
20524     __ psrlw($xtmp$$XMMRegister, 8);
20525     // Combine
20526     __ por($dst$$XMMRegister, $xtmp$$XMMRegister);
20527   %}
20528   ins_pipe( pipe_slow );
20529 %}
20530 
20531 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20532   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8);
20533   match(Set dst (MulVB src1 src2));
20534   effect(TEMP xtmp1, TEMP xtmp2);
20535   format %{ "vmulVB  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20536   ins_encode %{
20537     int vlen_enc = vector_length_encoding(this);
20538     // Odd-index elements
20539     __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc);
20540     __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc);
20541     __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20542     __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc);
20543     // Even-index elements
20544     __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20545     __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20546     __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc);
20547     // Combine
20548     __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20549   %}
20550   ins_pipe( pipe_slow );
20551 %}
20552 
20553 // Shorts/Chars vector mul
20554 instruct vmulS(vec dst, vec src) %{
20555   predicate(UseAVX == 0);
20556   match(Set dst (MulVS dst src));
20557   format %{ "pmullw  $dst,$src\t! mul packedS" %}
20558   ins_encode %{
20559     __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
20560   %}
20561   ins_pipe( pipe_slow );
20562 %}
20563 
20564 instruct vmulS_reg(vec dst, vec src1, vec src2) %{
20565   predicate(UseAVX > 0);
20566   match(Set dst (MulVS src1 src2));
20567   format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
20568   ins_encode %{
20569     int vlen_enc = vector_length_encoding(this);
20570     __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20571   %}
20572   ins_pipe( pipe_slow );
20573 %}
20574 
20575 instruct vmulS_mem(vec dst, vec src, memory mem) %{
20576   predicate((UseAVX > 0) &&
20577             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20578   match(Set dst (MulVS src (LoadVector mem)));
20579   format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
20580   ins_encode %{
20581     int vlen_enc = vector_length_encoding(this);
20582     __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20583   %}
20584   ins_pipe( pipe_slow );
20585 %}
20586 
20587 // Integers vector mul
20588 instruct vmulI(vec dst, vec src) %{
20589   predicate(UseAVX == 0);
20590   match(Set dst (MulVI dst src));
20591   format %{ "pmulld  $dst,$src\t! mul packedI" %}
20592   ins_encode %{
20593     assert(UseSSE > 3, "required");
20594     __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
20595   %}
20596   ins_pipe( pipe_slow );
20597 %}
20598 
20599 instruct vmulI_reg(vec dst, vec src1, vec src2) %{
20600   predicate(UseAVX > 0);
20601   match(Set dst (MulVI src1 src2));
20602   format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
20603   ins_encode %{
20604     int vlen_enc = vector_length_encoding(this);
20605     __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20606   %}
20607   ins_pipe( pipe_slow );
20608 %}
20609 
20610 instruct vmulI_mem(vec dst, vec src, memory mem) %{
20611   predicate((UseAVX > 0) &&
20612             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20613   match(Set dst (MulVI src (LoadVector mem)));
20614   format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
20615   ins_encode %{
20616     int vlen_enc = vector_length_encoding(this);
20617     __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20618   %}
20619   ins_pipe( pipe_slow );
20620 %}
20621 
20622 // Longs vector mul
20623 instruct evmulL_reg(vec dst, vec src1, vec src2) %{
20624   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20625              VM_Version::supports_avx512dq()) ||
20626             VM_Version::supports_avx512vldq());
20627   match(Set dst (MulVL src1 src2));
20628   ins_cost(500);
20629   format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %}
20630   ins_encode %{
20631     assert(UseAVX > 2, "required");
20632     int vlen_enc = vector_length_encoding(this);
20633     __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20634   %}
20635   ins_pipe( pipe_slow );
20636 %}
20637 
20638 instruct evmulL_mem(vec dst, vec src, memory mem) %{
20639   predicate((Matcher::vector_length_in_bytes(n) == 64 &&
20640              VM_Version::supports_avx512dq()) ||
20641             (Matcher::vector_length_in_bytes(n) > 8 &&
20642              VM_Version::supports_avx512vldq()));
20643   match(Set dst (MulVL src (LoadVector mem)));
20644   format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %}
20645   ins_cost(500);
20646   ins_encode %{
20647     assert(UseAVX > 2, "required");
20648     int vlen_enc = vector_length_encoding(this);
20649     __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20650   %}
20651   ins_pipe( pipe_slow );
20652 %}
20653 
20654 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{
20655   predicate(UseAVX == 0);
20656   match(Set dst (MulVL src1 src2));
20657   ins_cost(500);
20658   effect(TEMP dst, TEMP xtmp);
20659   format %{ "mulVL   $dst, $src1, $src2\t! using $xtmp as TEMP" %}
20660   ins_encode %{
20661     assert(VM_Version::supports_sse4_1(), "required");
20662     // Get the lo-hi products, only the lower 32 bits is in concerns
20663     __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1);
20664     __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister);
20665     __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1);
20666     __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister);
20667     __ psllq($dst$$XMMRegister, 32);
20668     // Get the lo-lo products
20669     __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister);
20670     __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister);
20671     __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister);
20672   %}
20673   ins_pipe( pipe_slow );
20674 %}
20675 
20676 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{
20677   predicate(UseAVX > 0 &&
20678             ((Matcher::vector_length_in_bytes(n) == 64 &&
20679               !VM_Version::supports_avx512dq()) ||
20680              (Matcher::vector_length_in_bytes(n) < 64 &&
20681               !VM_Version::supports_avx512vldq())));
20682   match(Set dst (MulVL src1 src2));
20683   effect(TEMP xtmp1, TEMP xtmp2);
20684   ins_cost(500);
20685   format %{ "vmulVL  $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %}
20686   ins_encode %{
20687     int vlen_enc = vector_length_encoding(this);
20688     // Get the lo-hi products, only the lower 32 bits is in concerns
20689     __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc);
20690     __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20691     __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc);
20692     __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc);
20693     __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc);
20694     // Get the lo-lo products
20695     __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20696     __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
20697   %}
20698   ins_pipe( pipe_slow );
20699 %}
20700 
20701 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{
20702   predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs());
20703   match(Set dst (MulVL src1 src2));
20704   ins_cost(100);
20705   format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %}
20706   ins_encode %{
20707     int vlen_enc = vector_length_encoding(this);
20708     __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20709   %}
20710   ins_pipe( pipe_slow );
20711 %}
20712 
20713 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{
20714   predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs());
20715   match(Set dst (MulVL src1 src2));
20716   ins_cost(100);
20717   format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %}
20718   ins_encode %{
20719     int vlen_enc = vector_length_encoding(this);
20720     __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20721   %}
20722   ins_pipe( pipe_slow );
20723 %}
20724 
20725 // Floats vector mul
20726 instruct vmulF(vec dst, vec src) %{
20727   predicate(UseAVX == 0);
20728   match(Set dst (MulVF dst src));
20729   format %{ "mulps   $dst,$src\t! mul packedF" %}
20730   ins_encode %{
20731     __ mulps($dst$$XMMRegister, $src$$XMMRegister);
20732   %}
20733   ins_pipe( pipe_slow );
20734 %}
20735 
20736 instruct vmulF_reg(vec dst, vec src1, vec src2) %{
20737   predicate(UseAVX > 0);
20738   match(Set dst (MulVF src1 src2));
20739   format %{ "vmulps  $dst,$src1,$src2\t! mul packedF" %}
20740   ins_encode %{
20741     int vlen_enc = vector_length_encoding(this);
20742     __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20743   %}
20744   ins_pipe( pipe_slow );
20745 %}
20746 
20747 instruct vmulF_mem(vec dst, vec src, memory mem) %{
20748   predicate((UseAVX > 0) &&
20749             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20750   match(Set dst (MulVF src (LoadVector mem)));
20751   format %{ "vmulps  $dst,$src,$mem\t! mul packedF" %}
20752   ins_encode %{
20753     int vlen_enc = vector_length_encoding(this);
20754     __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20755   %}
20756   ins_pipe( pipe_slow );
20757 %}
20758 
20759 // Doubles vector mul
20760 instruct vmulD(vec dst, vec src) %{
20761   predicate(UseAVX == 0);
20762   match(Set dst (MulVD dst src));
20763   format %{ "mulpd   $dst,$src\t! mul packedD" %}
20764   ins_encode %{
20765     __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
20766   %}
20767   ins_pipe( pipe_slow );
20768 %}
20769 
20770 instruct vmulD_reg(vec dst, vec src1, vec src2) %{
20771   predicate(UseAVX > 0);
20772   match(Set dst (MulVD src1 src2));
20773   format %{ "vmulpd  $dst,$src1,$src2\t! mul packedD" %}
20774   ins_encode %{
20775     int vlen_enc = vector_length_encoding(this);
20776     __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20777   %}
20778   ins_pipe( pipe_slow );
20779 %}
20780 
20781 instruct vmulD_mem(vec dst, vec src, memory mem) %{
20782   predicate((UseAVX > 0) &&
20783             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20784   match(Set dst (MulVD src (LoadVector mem)));
20785   format %{ "vmulpd  $dst,$src,$mem\t! mul packedD" %}
20786   ins_encode %{
20787     int vlen_enc = vector_length_encoding(this);
20788     __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20789   %}
20790   ins_pipe( pipe_slow );
20791 %}
20792 
20793 // --------------------------------- DIV --------------------------------------
20794 
20795 // Floats vector div
20796 instruct vdivF(vec dst, vec src) %{
20797   predicate(UseAVX == 0);
20798   match(Set dst (DivVF dst src));
20799   format %{ "divps   $dst,$src\t! div packedF" %}
20800   ins_encode %{
20801     __ divps($dst$$XMMRegister, $src$$XMMRegister);
20802   %}
20803   ins_pipe( pipe_slow );
20804 %}
20805 
20806 instruct vdivF_reg(vec dst, vec src1, vec src2) %{
20807   predicate(UseAVX > 0);
20808   match(Set dst (DivVF src1 src2));
20809   format %{ "vdivps  $dst,$src1,$src2\t! div packedF" %}
20810   ins_encode %{
20811     int vlen_enc = vector_length_encoding(this);
20812     __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20813   %}
20814   ins_pipe( pipe_slow );
20815 %}
20816 
20817 instruct vdivF_mem(vec dst, vec src, memory mem) %{
20818   predicate((UseAVX > 0) &&
20819             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20820   match(Set dst (DivVF src (LoadVector mem)));
20821   format %{ "vdivps  $dst,$src,$mem\t! div packedF" %}
20822   ins_encode %{
20823     int vlen_enc = vector_length_encoding(this);
20824     __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20825   %}
20826   ins_pipe( pipe_slow );
20827 %}
20828 
20829 // Doubles vector div
20830 instruct vdivD(vec dst, vec src) %{
20831   predicate(UseAVX == 0);
20832   match(Set dst (DivVD dst src));
20833   format %{ "divpd   $dst,$src\t! div packedD" %}
20834   ins_encode %{
20835     __ divpd($dst$$XMMRegister, $src$$XMMRegister);
20836   %}
20837   ins_pipe( pipe_slow );
20838 %}
20839 
20840 instruct vdivD_reg(vec dst, vec src1, vec src2) %{
20841   predicate(UseAVX > 0);
20842   match(Set dst (DivVD src1 src2));
20843   format %{ "vdivpd  $dst,$src1,$src2\t! div packedD" %}
20844   ins_encode %{
20845     int vlen_enc = vector_length_encoding(this);
20846     __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20847   %}
20848   ins_pipe( pipe_slow );
20849 %}
20850 
20851 instruct vdivD_mem(vec dst, vec src, memory mem) %{
20852   predicate((UseAVX > 0) &&
20853             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
20854   match(Set dst (DivVD src (LoadVector mem)));
20855   format %{ "vdivpd  $dst,$src,$mem\t! div packedD" %}
20856   ins_encode %{
20857     int vlen_enc = vector_length_encoding(this);
20858     __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
20859   %}
20860   ins_pipe( pipe_slow );
20861 %}
20862 
20863 // ------------------------------ MinMax ---------------------------------------
20864 
20865 // Byte, Short, Int vector Min/Max
20866 instruct minmax_reg_sse(vec dst, vec src) %{
20867   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20868             UseAVX == 0);
20869   match(Set dst (MinV dst src));
20870   match(Set dst (MaxV dst src));
20871   format %{ "vector_minmax  $dst,$src\t!  " %}
20872   ins_encode %{
20873     assert(UseSSE >= 4, "required");
20874 
20875     int opcode = this->ideal_Opcode();
20876     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20877     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
20878   %}
20879   ins_pipe( pipe_slow );
20880 %}
20881 
20882 instruct vminmax_reg(vec dst, vec src1, vec src2) %{
20883   predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
20884             UseAVX > 0);
20885   match(Set dst (MinV src1 src2));
20886   match(Set dst (MaxV src1 src2));
20887   format %{ "vector_minmax  $dst,$src1,$src2\t!  " %}
20888   ins_encode %{
20889     int opcode = this->ideal_Opcode();
20890     int vlen_enc = vector_length_encoding(this);
20891     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20892 
20893     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20894   %}
20895   ins_pipe( pipe_slow );
20896 %}
20897 
20898 // Long vector Min/Max
20899 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
20900   predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG &&
20901             UseAVX == 0);
20902   match(Set dst (MinV dst src));
20903   match(Set dst (MaxV src dst));
20904   effect(TEMP dst, TEMP tmp);
20905   format %{ "vector_minmaxL  $dst,$src\t!using $tmp as TEMP" %}
20906   ins_encode %{
20907     assert(UseSSE >= 4, "required");
20908 
20909     int opcode = this->ideal_Opcode();
20910     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20911     assert(elem_bt == T_LONG, "sanity");
20912 
20913     __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
20914   %}
20915   ins_pipe( pipe_slow );
20916 %}
20917 
20918 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
20919   predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG &&
20920             UseAVX > 0 && !VM_Version::supports_avx512vl());
20921   match(Set dst (MinV src1 src2));
20922   match(Set dst (MaxV src1 src2));
20923   effect(TEMP dst);
20924   format %{ "vector_minmaxL  $dst,$src1,$src2\t! " %}
20925   ins_encode %{
20926     int vlen_enc = vector_length_encoding(this);
20927     int opcode = this->ideal_Opcode();
20928     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20929     assert(elem_bt == T_LONG, "sanity");
20930 
20931     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20932   %}
20933   ins_pipe( pipe_slow );
20934 %}
20935 
20936 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
20937   predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
20938             Matcher::vector_element_basic_type(n) == T_LONG);
20939   match(Set dst (MinV src1 src2));
20940   match(Set dst (MaxV src1 src2));
20941   format %{ "vector_minmaxL  $dst,$src1,src2\t! " %}
20942   ins_encode %{
20943     assert(UseAVX > 2, "required");
20944 
20945     int vlen_enc = vector_length_encoding(this);
20946     int opcode = this->ideal_Opcode();
20947     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20948     assert(elem_bt == T_LONG, "sanity");
20949 
20950     __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
20951   %}
20952   ins_pipe( pipe_slow );
20953 %}
20954 
20955 // Float/Double vector Min/Max
20956 instruct minmaxFP_reg_avx10_2(vec dst, vec a, vec b) %{
20957   predicate(VM_Version::supports_avx10_2() &&
20958             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20959   match(Set dst (MinV a b));
20960   match(Set dst (MaxV a b));
20961   format %{ "vector_minmaxFP  $dst, $a, $b" %}
20962   ins_encode %{
20963     int vlen_enc = vector_length_encoding(this);
20964     int opcode = this->ideal_Opcode();
20965     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20966     __ vminmax_fp_avx10_2(opcode, elem_bt, $dst$$XMMRegister, k0, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
20967   %}
20968   ins_pipe( pipe_slow );
20969 %}
20970 
20971 // Float/Double vector Min/Max
20972 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
20973   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) <= 32 &&
20974             is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
20975             UseAVX > 0);
20976   match(Set dst (MinV a b));
20977   match(Set dst (MaxV a b));
20978   effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
20979   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
20980   ins_encode %{
20981     assert(UseAVX > 0, "required");
20982 
20983     int opcode = this->ideal_Opcode();
20984     int vlen_enc = vector_length_encoding(this);
20985     BasicType elem_bt = Matcher::vector_element_basic_type(this);
20986 
20987     __ vminmax_fp(opcode, elem_bt,
20988                   $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
20989                   $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
20990   %}
20991   ins_pipe( pipe_slow );
20992 %}
20993 
20994 instruct evminmaxFP_reg_evex(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
20995   predicate(!VM_Version::supports_avx10_2() && Matcher::vector_length_in_bytes(n) == 64 &&
20996             is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
20997   match(Set dst (MinV a b));
20998   match(Set dst (MaxV a b));
20999   effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
21000   format %{ "vector_minmaxFP  $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
21001   ins_encode %{
21002     assert(UseAVX > 2, "required");
21003 
21004     int opcode = this->ideal_Opcode();
21005     int vlen_enc = vector_length_encoding(this);
21006     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21007 
21008     __ evminmax_fp(opcode, elem_bt,
21009                    $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
21010                    $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
21011   %}
21012   ins_pipe( pipe_slow );
21013 %}
21014 
21015 // ------------------------------ Unsigned vector Min/Max ----------------------
21016 
21017 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{
21018   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21019   match(Set dst (UMinV a b));
21020   match(Set dst (UMaxV a b));
21021   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21022   ins_encode %{
21023     int opcode = this->ideal_Opcode();
21024     int vlen_enc = vector_length_encoding(this);
21025     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21026     assert(is_integral_type(elem_bt), "");
21027     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc);
21028   %}
21029   ins_pipe( pipe_slow );
21030 %}
21031 
21032 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{
21033   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG);
21034   match(Set dst (UMinV a (LoadVector b)));
21035   match(Set dst (UMaxV a (LoadVector b)));
21036   format %{ "vector_uminmax $dst,$a,$b\t!" %}
21037   ins_encode %{
21038     int opcode = this->ideal_Opcode();
21039     int vlen_enc = vector_length_encoding(this);
21040     BasicType elem_bt = Matcher::vector_element_basic_type(this);
21041     assert(is_integral_type(elem_bt), "");
21042     __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc);
21043   %}
21044   ins_pipe( pipe_slow );
21045 %}
21046 
21047 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{
21048   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG);
21049   match(Set dst (UMinV a b));
21050   match(Set dst (UMaxV a b));
21051   effect(TEMP xtmp1, TEMP xtmp2);
21052   format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %}
21053   ins_encode %{
21054     int opcode = this->ideal_Opcode();
21055     int vlen_enc = vector_length_encoding(this);
21056     __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
21057   %}
21058   ins_pipe( pipe_slow );
21059 %}
21060 
21061 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{
21062   match(Set dst (UMinV (Binary dst src2) mask));
21063   match(Set dst (UMaxV (Binary dst src2) mask));
21064   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21065   ins_encode %{
21066     int vlen_enc = vector_length_encoding(this);
21067     BasicType bt = Matcher::vector_element_basic_type(this);
21068     int opc = this->ideal_Opcode();
21069     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21070                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
21071   %}
21072   ins_pipe( pipe_slow );
21073 %}
21074 
21075 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{
21076   match(Set dst (UMinV (Binary dst (LoadVector src2)) mask));
21077   match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask));
21078   format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %}
21079   ins_encode %{
21080     int vlen_enc = vector_length_encoding(this);
21081     BasicType bt = Matcher::vector_element_basic_type(this);
21082     int opc = this->ideal_Opcode();
21083     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
21084                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
21085   %}
21086   ins_pipe( pipe_slow );
21087 %}
21088 
21089 // --------------------------------- Signum/CopySign ---------------------------
21090 
21091 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{
21092   match(Set dst (SignumF dst (Binary zero one)));
21093   effect(KILL cr);
21094   format %{ "signumF $dst, $dst" %}
21095   ins_encode %{
21096     int opcode = this->ideal_Opcode();
21097     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21098   %}
21099   ins_pipe( pipe_slow );
21100 %}
21101 
21102 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{
21103   match(Set dst (SignumD dst (Binary zero one)));
21104   effect(KILL cr);
21105   format %{ "signumD $dst, $dst" %}
21106   ins_encode %{
21107     int opcode = this->ideal_Opcode();
21108     __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister);
21109   %}
21110   ins_pipe( pipe_slow );
21111 %}
21112 
21113 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{
21114   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
21115   match(Set dst (SignumVF src (Binary zero one)));
21116   match(Set dst (SignumVD src (Binary zero one)));
21117   effect(TEMP dst, TEMP xtmp1);
21118   format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %}
21119   ins_encode %{
21120     int opcode = this->ideal_Opcode();
21121     int vec_enc = vector_length_encoding(this);
21122     __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21123                          $xtmp1$$XMMRegister, vec_enc);
21124   %}
21125   ins_pipe( pipe_slow );
21126 %}
21127 
21128 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{
21129   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
21130   match(Set dst (SignumVF src (Binary zero one)));
21131   match(Set dst (SignumVD src (Binary zero one)));
21132   effect(TEMP dst, TEMP ktmp1);
21133   format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %}
21134   ins_encode %{
21135     int opcode = this->ideal_Opcode();
21136     int vec_enc = vector_length_encoding(this);
21137     __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister,
21138                           $ktmp1$$KRegister, vec_enc);
21139   %}
21140   ins_pipe( pipe_slow );
21141 %}
21142 
21143 // ---------------------------------------
21144 // For copySign use 0xE4 as writemask for vpternlog
21145 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit
21146 // C (xmm2) is set to 0x7FFFFFFF
21147 // Wherever xmm2 is 0, we want to pick from B (sign)
21148 // Wherever xmm2 is 1, we want to pick from A (src)
21149 //
21150 // A B C Result
21151 // 0 0 0 0
21152 // 0 0 1 0
21153 // 0 1 0 1
21154 // 0 1 1 0
21155 // 1 0 0 0
21156 // 1 0 1 1
21157 // 1 1 0 1
21158 // 1 1 1 1
21159 //
21160 // Result going from high bit to low bit is 0x11100100 = 0xe4
21161 // ---------------------------------------
21162 
21163 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{
21164   match(Set dst (CopySignF dst src));
21165   effect(TEMP tmp1, TEMP tmp2);
21166   format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21167   ins_encode %{
21168     __ movl($tmp2$$Register, 0x7FFFFFFF);
21169     __ movdl($tmp1$$XMMRegister, $tmp2$$Register);
21170     __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21171   %}
21172   ins_pipe( pipe_slow );
21173 %}
21174 
21175 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
21176   match(Set dst (CopySignD dst (Binary src zero)));
21177   ins_cost(100);
21178   effect(TEMP tmp1, TEMP tmp2);
21179   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
21180   ins_encode %{
21181     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
21182     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
21183     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
21184   %}
21185   ins_pipe( pipe_slow );
21186 %}
21187 
21188 //----------------------------- CompressBits/ExpandBits ------------------------
21189 
21190 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21191   predicate(n->bottom_type()->isa_int());
21192   match(Set dst (CompressBits src mask));
21193   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21194   ins_encode %{
21195     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
21196   %}
21197   ins_pipe( pipe_slow );
21198 %}
21199 
21200 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{
21201   predicate(n->bottom_type()->isa_int());
21202   match(Set dst (ExpandBits src mask));
21203   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21204   ins_encode %{
21205     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
21206   %}
21207   ins_pipe( pipe_slow );
21208 %}
21209 
21210 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21211   predicate(n->bottom_type()->isa_int());
21212   match(Set dst (CompressBits src (LoadI mask)));
21213   format %{ "pextl  $dst, $src, $mask\t! parallel bit extract" %}
21214   ins_encode %{
21215     __ pextl($dst$$Register, $src$$Register, $mask$$Address);
21216   %}
21217   ins_pipe( pipe_slow );
21218 %}
21219 
21220 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{
21221   predicate(n->bottom_type()->isa_int());
21222   match(Set dst (ExpandBits src (LoadI mask)));
21223   format %{ "pdepl  $dst, $src, $mask\t! parallel bit deposit" %}
21224   ins_encode %{
21225     __ pdepl($dst$$Register, $src$$Register, $mask$$Address);
21226   %}
21227   ins_pipe( pipe_slow );
21228 %}
21229 
21230 // --------------------------------- Sqrt --------------------------------------
21231 
21232 instruct vsqrtF_reg(vec dst, vec src) %{
21233   match(Set dst (SqrtVF src));
21234   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
21235   ins_encode %{
21236     assert(UseAVX > 0, "required");
21237     int vlen_enc = vector_length_encoding(this);
21238     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21239   %}
21240   ins_pipe( pipe_slow );
21241 %}
21242 
21243 instruct vsqrtF_mem(vec dst, memory mem) %{
21244   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21245   match(Set dst (SqrtVF (LoadVector mem)));
21246   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
21247   ins_encode %{
21248     assert(UseAVX > 0, "required");
21249     int vlen_enc = vector_length_encoding(this);
21250     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
21251   %}
21252   ins_pipe( pipe_slow );
21253 %}
21254 
21255 // Floating point vector sqrt
21256 instruct vsqrtD_reg(vec dst, vec src) %{
21257   match(Set dst (SqrtVD src));
21258   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
21259   ins_encode %{
21260     assert(UseAVX > 0, "required");
21261     int vlen_enc = vector_length_encoding(this);
21262     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21263   %}
21264   ins_pipe( pipe_slow );
21265 %}
21266 
21267 instruct vsqrtD_mem(vec dst, memory mem) %{
21268   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
21269   match(Set dst (SqrtVD (LoadVector mem)));
21270   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
21271   ins_encode %{
21272     assert(UseAVX > 0, "required");
21273     int vlen_enc = vector_length_encoding(this);
21274     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
21275   %}
21276   ins_pipe( pipe_slow );
21277 %}
21278 
21279 // ------------------------------ Shift ---------------------------------------
21280 
21281 // Left and right shift count vectors are the same on x86
21282 // (only lowest bits of xmm reg are used for count).
21283 instruct vshiftcnt(vec dst, rRegI cnt) %{
21284   match(Set dst (LShiftCntV cnt));
21285   match(Set dst (RShiftCntV cnt));
21286   format %{ "movdl    $dst,$cnt\t! load shift count" %}
21287   ins_encode %{
21288     __ movdl($dst$$XMMRegister, $cnt$$Register);
21289   %}
21290   ins_pipe( pipe_slow );
21291 %}
21292 
21293 // Byte vector shift
21294 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{
21295   predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift());
21296   match(Set dst ( LShiftVB src shift));
21297   match(Set dst ( RShiftVB src shift));
21298   match(Set dst (URShiftVB src shift));
21299   effect(TEMP dst, USE src, USE shift, TEMP tmp);
21300   format %{"vector_byte_shift $dst,$src,$shift" %}
21301   ins_encode %{
21302     assert(UseSSE > 3, "required");
21303     int opcode = this->ideal_Opcode();
21304     bool sign = (opcode != Op_URShiftVB);
21305     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
21306     __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
21307     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21308     __ pand($dst$$XMMRegister, $tmp$$XMMRegister);
21309     __ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
21310   %}
21311   ins_pipe( pipe_slow );
21312 %}
21313 
21314 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21315   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21316             UseAVX <= 1);
21317   match(Set dst ( LShiftVB src shift));
21318   match(Set dst ( RShiftVB src shift));
21319   match(Set dst (URShiftVB src shift));
21320   effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2);
21321   format %{"vector_byte_shift $dst,$src,$shift" %}
21322   ins_encode %{
21323     assert(UseSSE > 3, "required");
21324     int opcode = this->ideal_Opcode();
21325     bool sign = (opcode != Op_URShiftVB);
21326     __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
21327     __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
21328     __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
21329     __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
21330     __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
21331     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21332     __ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
21333     __ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
21334     __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
21335   %}
21336   ins_pipe( pipe_slow );
21337 %}
21338 
21339 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{
21340   predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() &&
21341             UseAVX > 1);
21342   match(Set dst ( LShiftVB src shift));
21343   match(Set dst ( RShiftVB src shift));
21344   match(Set dst (URShiftVB src shift));
21345   effect(TEMP dst, TEMP tmp);
21346   format %{"vector_byte_shift $dst,$src,$shift" %}
21347   ins_encode %{
21348     int opcode = this->ideal_Opcode();
21349     bool sign = (opcode != Op_URShiftVB);
21350     int vlen_enc = Assembler::AVX_256bit;
21351     __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
21352     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21353     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21354     __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
21355     __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
21356   %}
21357   ins_pipe( pipe_slow );
21358 %}
21359 
21360 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{
21361   predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift());
21362   match(Set dst ( LShiftVB src shift));
21363   match(Set dst ( RShiftVB src shift));
21364   match(Set dst (URShiftVB src shift));
21365   effect(TEMP dst, TEMP tmp);
21366   format %{"vector_byte_shift $dst,$src,$shift" %}
21367   ins_encode %{
21368     assert(UseAVX > 1, "required");
21369     int opcode = this->ideal_Opcode();
21370     bool sign = (opcode != Op_URShiftVB);
21371     int vlen_enc = Assembler::AVX_256bit;
21372     __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
21373     __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21374     __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21375     __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21376     __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21377     __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21378     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
21379     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21380     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21381   %}
21382   ins_pipe( pipe_slow );
21383 %}
21384 
21385 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{
21386   predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift());
21387   match(Set dst ( LShiftVB src shift));
21388   match(Set dst  (RShiftVB src shift));
21389   match(Set dst (URShiftVB src shift));
21390   effect(TEMP dst, TEMP tmp1, TEMP tmp2);
21391   format %{"vector_byte_shift $dst,$src,$shift" %}
21392   ins_encode %{
21393     assert(UseAVX > 2, "required");
21394     int opcode = this->ideal_Opcode();
21395     bool sign = (opcode != Op_URShiftVB);
21396     int vlen_enc = Assembler::AVX_512bit;
21397     __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
21398     __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
21399     __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21400     __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21401     __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21402     __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg);
21403     __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21404     __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21405     __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21406     __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
21407     __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg);
21408     __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21409   %}
21410   ins_pipe( pipe_slow );
21411 %}
21412 
21413 // Shorts vector logical right shift produces incorrect Java result
21414 // for negative data because java code convert short value into int with
21415 // sign extension before a shift. But char vectors are fine since chars are
21416 // unsigned values.
21417 // Shorts/Chars vector left shift
21418 instruct vshiftS(vec dst, vec src, vec shift) %{
21419   predicate(!n->as_ShiftV()->is_var_shift());
21420   match(Set dst ( LShiftVS src shift));
21421   match(Set dst ( RShiftVS src shift));
21422   match(Set dst (URShiftVS src shift));
21423   effect(TEMP dst, USE src, USE shift);
21424   format %{ "vshiftw  $dst,$src,$shift\t! shift packedS" %}
21425   ins_encode %{
21426     int opcode = this->ideal_Opcode();
21427     if (UseAVX > 0) {
21428       int vlen_enc = vector_length_encoding(this);
21429       __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21430     } else {
21431       int vlen = Matcher::vector_length(this);
21432       if (vlen == 2) {
21433         __ movflt($dst$$XMMRegister, $src$$XMMRegister);
21434         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21435       } else if (vlen == 4) {
21436         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21437         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21438       } else {
21439         assert (vlen == 8, "sanity");
21440         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21441         __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21442       }
21443     }
21444   %}
21445   ins_pipe( pipe_slow );
21446 %}
21447 
21448 // Integers vector left shift
21449 instruct vshiftI(vec dst, vec src, vec shift) %{
21450   predicate(!n->as_ShiftV()->is_var_shift());
21451   match(Set dst ( LShiftVI src shift));
21452   match(Set dst ( RShiftVI src shift));
21453   match(Set dst (URShiftVI src shift));
21454   effect(TEMP dst, USE src, USE shift);
21455   format %{ "vshiftd  $dst,$src,$shift\t! shift packedI" %}
21456   ins_encode %{
21457     int opcode = this->ideal_Opcode();
21458     if (UseAVX > 0) {
21459       int vlen_enc = vector_length_encoding(this);
21460       __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21461     } else {
21462       int vlen = Matcher::vector_length(this);
21463       if (vlen == 2) {
21464         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21465         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21466       } else {
21467         assert(vlen == 4, "sanity");
21468         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21469         __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21470       }
21471     }
21472   %}
21473   ins_pipe( pipe_slow );
21474 %}
21475 
21476 // Integers vector left constant shift
21477 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
21478   match(Set dst (LShiftVI src (LShiftCntV shift)));
21479   match(Set dst (RShiftVI src (RShiftCntV shift)));
21480   match(Set dst (URShiftVI src (RShiftCntV shift)));
21481   format %{ "vshiftd_imm  $dst,$src,$shift\t! shift packedI" %}
21482   ins_encode %{
21483     int opcode = this->ideal_Opcode();
21484     if (UseAVX > 0) {
21485       int vector_len = vector_length_encoding(this);
21486       __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21487     } else {
21488       int vlen = Matcher::vector_length(this);
21489       if (vlen == 2) {
21490         __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
21491         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21492       } else {
21493         assert(vlen == 4, "sanity");
21494         __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21495         __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21496       }
21497     }
21498   %}
21499   ins_pipe( pipe_slow );
21500 %}
21501 
21502 // Longs vector shift
21503 instruct vshiftL(vec dst, vec src, vec shift) %{
21504   predicate(!n->as_ShiftV()->is_var_shift());
21505   match(Set dst ( LShiftVL src shift));
21506   match(Set dst (URShiftVL src shift));
21507   effect(TEMP dst, USE src, USE shift);
21508   format %{ "vshiftq  $dst,$src,$shift\t! shift packedL" %}
21509   ins_encode %{
21510     int opcode = this->ideal_Opcode();
21511     if (UseAVX > 0) {
21512       int vlen_enc = vector_length_encoding(this);
21513       __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21514     } else {
21515       assert(Matcher::vector_length(this) == 2, "");
21516       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21517       __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
21518     }
21519   %}
21520   ins_pipe( pipe_slow );
21521 %}
21522 
21523 // Longs vector constant shift
21524 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
21525   match(Set dst (LShiftVL src (LShiftCntV shift)));
21526   match(Set dst (URShiftVL src (RShiftCntV shift)));
21527   format %{ "vshiftq_imm  $dst,$src,$shift\t! shift packedL" %}
21528   ins_encode %{
21529     int opcode = this->ideal_Opcode();
21530     if (UseAVX > 0) {
21531       int vector_len = vector_length_encoding(this);
21532       __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
21533     } else {
21534       assert(Matcher::vector_length(this) == 2, "");
21535       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21536       __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant);
21537     }
21538   %}
21539   ins_pipe( pipe_slow );
21540 %}
21541 
21542 // -------------------ArithmeticRightShift -----------------------------------
21543 // Long vector arithmetic right shift
21544 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{
21545   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2);
21546   match(Set dst (RShiftVL src shift));
21547   effect(TEMP dst, TEMP tmp);
21548   format %{ "vshiftq $dst,$src,$shift" %}
21549   ins_encode %{
21550     uint vlen = Matcher::vector_length(this);
21551     if (vlen == 2) {
21552       __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
21553       __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
21554       __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21555       __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
21556       __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
21557       __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
21558     } else {
21559       assert(vlen == 4, "sanity");
21560       assert(UseAVX > 1, "required");
21561       int vlen_enc = Assembler::AVX_256bit;
21562       __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21563       __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg);
21564       __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21565       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21566       __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
21567     }
21568   %}
21569   ins_pipe( pipe_slow );
21570 %}
21571 
21572 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
21573   predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2);
21574   match(Set dst (RShiftVL src shift));
21575   format %{ "vshiftq $dst,$src,$shift" %}
21576   ins_encode %{
21577     int vlen_enc = vector_length_encoding(this);
21578     __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21579   %}
21580   ins_pipe( pipe_slow );
21581 %}
21582 
21583 // ------------------- Variable Shift -----------------------------
21584 // Byte variable shift
21585 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21586   predicate(Matcher::vector_length(n) <= 8 &&
21587             n->as_ShiftV()->is_var_shift() &&
21588             !VM_Version::supports_avx512bw());
21589   match(Set dst ( LShiftVB src shift));
21590   match(Set dst ( RShiftVB src shift));
21591   match(Set dst (URShiftVB src shift));
21592   effect(TEMP dst, TEMP vtmp);
21593   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21594   ins_encode %{
21595     assert(UseAVX >= 2, "required");
21596 
21597     int opcode = this->ideal_Opcode();
21598     int vlen_enc = Assembler::AVX_128bit;
21599     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21600     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21601   %}
21602   ins_pipe( pipe_slow );
21603 %}
21604 
21605 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21606   predicate(Matcher::vector_length(n) == 16 &&
21607             n->as_ShiftV()->is_var_shift() &&
21608             !VM_Version::supports_avx512bw());
21609   match(Set dst ( LShiftVB src shift));
21610   match(Set dst ( RShiftVB src shift));
21611   match(Set dst (URShiftVB src shift));
21612   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21613   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21614   ins_encode %{
21615     assert(UseAVX >= 2, "required");
21616 
21617     int opcode = this->ideal_Opcode();
21618     int vlen_enc = Assembler::AVX_128bit;
21619     // Shift lower half and get word result in dst
21620     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21621 
21622     // Shift upper half and get word result in vtmp1
21623     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21624     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21625     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21626 
21627     // Merge and down convert the two word results to byte in dst
21628     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21629   %}
21630   ins_pipe( pipe_slow );
21631 %}
21632 
21633 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{
21634   predicate(Matcher::vector_length(n) == 32 &&
21635             n->as_ShiftV()->is_var_shift() &&
21636             !VM_Version::supports_avx512bw());
21637   match(Set dst ( LShiftVB src shift));
21638   match(Set dst ( RShiftVB src shift));
21639   match(Set dst (URShiftVB src shift));
21640   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4);
21641   format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %}
21642   ins_encode %{
21643     assert(UseAVX >= 2, "required");
21644 
21645     int opcode = this->ideal_Opcode();
21646     int vlen_enc = Assembler::AVX_128bit;
21647     // Process lower 128 bits and get result in dst
21648     __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21649     __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
21650     __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
21651     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21652     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
21653 
21654     // Process higher 128 bits and get result in vtmp3
21655     __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21656     __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21657     __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister);
21658     __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
21659     __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
21660     __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21661     __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
21662 
21663     // Merge the two results in dst
21664     __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21665   %}
21666   ins_pipe( pipe_slow );
21667 %}
21668 
21669 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{
21670   predicate(Matcher::vector_length(n) <= 32 &&
21671             n->as_ShiftV()->is_var_shift() &&
21672             VM_Version::supports_avx512bw());
21673   match(Set dst ( LShiftVB src shift));
21674   match(Set dst ( RShiftVB src shift));
21675   match(Set dst (URShiftVB src shift));
21676   effect(TEMP dst, TEMP vtmp);
21677   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %}
21678   ins_encode %{
21679     assert(UseAVX > 2, "required");
21680 
21681     int opcode = this->ideal_Opcode();
21682     int vlen_enc = vector_length_encoding(this);
21683     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister);
21684   %}
21685   ins_pipe( pipe_slow );
21686 %}
21687 
21688 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21689   predicate(Matcher::vector_length(n) == 64 &&
21690             n->as_ShiftV()->is_var_shift() &&
21691             VM_Version::supports_avx512bw());
21692   match(Set dst ( LShiftVB src shift));
21693   match(Set dst ( RShiftVB src shift));
21694   match(Set dst (URShiftVB src shift));
21695   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21696   format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %}
21697   ins_encode %{
21698     assert(UseAVX > 2, "required");
21699 
21700     int opcode = this->ideal_Opcode();
21701     int vlen_enc = Assembler::AVX_256bit;
21702     __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister);
21703     __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
21704     __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
21705     __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister);
21706     __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
21707   %}
21708   ins_pipe( pipe_slow );
21709 %}
21710 
21711 // Short variable shift
21712 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{
21713   predicate(Matcher::vector_length(n) <= 8 &&
21714             n->as_ShiftV()->is_var_shift() &&
21715             !VM_Version::supports_avx512bw());
21716   match(Set dst ( LShiftVS src shift));
21717   match(Set dst ( RShiftVS src shift));
21718   match(Set dst (URShiftVS src shift));
21719   effect(TEMP dst, TEMP vtmp);
21720   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21721   ins_encode %{
21722     assert(UseAVX >= 2, "required");
21723 
21724     int opcode = this->ideal_Opcode();
21725     bool sign = (opcode != Op_URShiftVS);
21726     int vlen_enc = Assembler::AVX_256bit;
21727     __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
21728     __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
21729     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
21730     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21731     __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
21732     __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
21733   %}
21734   ins_pipe( pipe_slow );
21735 %}
21736 
21737 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{
21738   predicate(Matcher::vector_length(n) == 16 &&
21739             n->as_ShiftV()->is_var_shift() &&
21740             !VM_Version::supports_avx512bw());
21741   match(Set dst ( LShiftVS src shift));
21742   match(Set dst ( RShiftVS src shift));
21743   match(Set dst (URShiftVS src shift));
21744   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
21745   format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
21746   ins_encode %{
21747     assert(UseAVX >= 2, "required");
21748 
21749     int opcode = this->ideal_Opcode();
21750     bool sign = (opcode != Op_URShiftVS);
21751     int vlen_enc = Assembler::AVX_256bit;
21752     // Shift lower half, with result in vtmp2 using vtmp1 as TEMP
21753     __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
21754     __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21755     __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21756     __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21757 
21758     // Shift upper half, with result in dst using vtmp1 as TEMP
21759     __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
21760     __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
21761     __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21762     __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21763     __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
21764     __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
21765 
21766     // Merge lower and upper half result into dst
21767     __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
21768     __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
21769   %}
21770   ins_pipe( pipe_slow );
21771 %}
21772 
21773 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
21774   predicate(n->as_ShiftV()->is_var_shift() &&
21775             VM_Version::supports_avx512bw());
21776   match(Set dst ( LShiftVS src shift));
21777   match(Set dst ( RShiftVS src shift));
21778   match(Set dst (URShiftVS src shift));
21779   format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
21780   ins_encode %{
21781     assert(UseAVX > 2, "required");
21782 
21783     int opcode = this->ideal_Opcode();
21784     int vlen_enc = vector_length_encoding(this);
21785     if (!VM_Version::supports_avx512vl()) {
21786       vlen_enc = Assembler::AVX_512bit;
21787     }
21788     __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21789   %}
21790   ins_pipe( pipe_slow );
21791 %}
21792 
21793 //Integer variable shift
21794 instruct vshiftI_var(vec dst, vec src, vec shift) %{
21795   predicate(n->as_ShiftV()->is_var_shift());
21796   match(Set dst ( LShiftVI src shift));
21797   match(Set dst ( RShiftVI src shift));
21798   match(Set dst (URShiftVI src shift));
21799   format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
21800   ins_encode %{
21801     assert(UseAVX >= 2, "required");
21802 
21803     int opcode = this->ideal_Opcode();
21804     int vlen_enc = vector_length_encoding(this);
21805     __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21806   %}
21807   ins_pipe( pipe_slow );
21808 %}
21809 
21810 //Long variable shift
21811 instruct vshiftL_var(vec dst, vec src, vec shift) %{
21812   predicate(n->as_ShiftV()->is_var_shift());
21813   match(Set dst ( LShiftVL src shift));
21814   match(Set dst (URShiftVL src shift));
21815   format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
21816   ins_encode %{
21817     assert(UseAVX >= 2, "required");
21818 
21819     int opcode = this->ideal_Opcode();
21820     int vlen_enc = vector_length_encoding(this);
21821     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21822   %}
21823   ins_pipe( pipe_slow );
21824 %}
21825 
21826 //Long variable right shift arithmetic
21827 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
21828   predicate(Matcher::vector_length(n) <= 4 &&
21829             n->as_ShiftV()->is_var_shift() &&
21830             UseAVX == 2);
21831   match(Set dst (RShiftVL src shift));
21832   effect(TEMP dst, TEMP vtmp);
21833   format %{ "vector_varshift_long  $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
21834   ins_encode %{
21835     int opcode = this->ideal_Opcode();
21836     int vlen_enc = vector_length_encoding(this);
21837     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
21838                  $vtmp$$XMMRegister);
21839   %}
21840   ins_pipe( pipe_slow );
21841 %}
21842 
21843 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
21844   predicate(n->as_ShiftV()->is_var_shift() &&
21845             UseAVX > 2);
21846   match(Set dst (RShiftVL src shift));
21847   format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
21848   ins_encode %{
21849     int opcode = this->ideal_Opcode();
21850     int vlen_enc = vector_length_encoding(this);
21851     __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
21852   %}
21853   ins_pipe( pipe_slow );
21854 %}
21855 
21856 // --------------------------------- AND --------------------------------------
21857 
21858 instruct vand(vec dst, vec src) %{
21859   predicate(UseAVX == 0);
21860   match(Set dst (AndV dst src));
21861   format %{ "pand    $dst,$src\t! and vectors" %}
21862   ins_encode %{
21863     __ pand($dst$$XMMRegister, $src$$XMMRegister);
21864   %}
21865   ins_pipe( pipe_slow );
21866 %}
21867 
21868 instruct vand_reg(vec dst, vec src1, vec src2) %{
21869   predicate(UseAVX > 0);
21870   match(Set dst (AndV src1 src2));
21871   format %{ "vpand   $dst,$src1,$src2\t! and vectors" %}
21872   ins_encode %{
21873     int vlen_enc = vector_length_encoding(this);
21874     __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21875   %}
21876   ins_pipe( pipe_slow );
21877 %}
21878 
21879 instruct vand_mem(vec dst, vec src, memory mem) %{
21880   predicate((UseAVX > 0) &&
21881             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21882   match(Set dst (AndV src (LoadVector mem)));
21883   format %{ "vpand   $dst,$src,$mem\t! and vectors" %}
21884   ins_encode %{
21885     int vlen_enc = vector_length_encoding(this);
21886     __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21887   %}
21888   ins_pipe( pipe_slow );
21889 %}
21890 
21891 // --------------------------------- OR ---------------------------------------
21892 
21893 instruct vor(vec dst, vec src) %{
21894   predicate(UseAVX == 0);
21895   match(Set dst (OrV dst src));
21896   format %{ "por     $dst,$src\t! or vectors" %}
21897   ins_encode %{
21898     __ por($dst$$XMMRegister, $src$$XMMRegister);
21899   %}
21900   ins_pipe( pipe_slow );
21901 %}
21902 
21903 instruct vor_reg(vec dst, vec src1, vec src2) %{
21904   predicate(UseAVX > 0);
21905   match(Set dst (OrV src1 src2));
21906   format %{ "vpor    $dst,$src1,$src2\t! or vectors" %}
21907   ins_encode %{
21908     int vlen_enc = vector_length_encoding(this);
21909     __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21910   %}
21911   ins_pipe( pipe_slow );
21912 %}
21913 
21914 instruct vor_mem(vec dst, vec src, memory mem) %{
21915   predicate((UseAVX > 0) &&
21916             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21917   match(Set dst (OrV src (LoadVector mem)));
21918   format %{ "vpor    $dst,$src,$mem\t! or vectors" %}
21919   ins_encode %{
21920     int vlen_enc = vector_length_encoding(this);
21921     __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21922   %}
21923   ins_pipe( pipe_slow );
21924 %}
21925 
21926 // --------------------------------- XOR --------------------------------------
21927 
21928 instruct vxor(vec dst, vec src) %{
21929   predicate(UseAVX == 0);
21930   match(Set dst (XorV dst src));
21931   format %{ "pxor    $dst,$src\t! xor vectors" %}
21932   ins_encode %{
21933     __ pxor($dst$$XMMRegister, $src$$XMMRegister);
21934   %}
21935   ins_pipe( pipe_slow );
21936 %}
21937 
21938 instruct vxor_reg(vec dst, vec src1, vec src2) %{
21939   predicate(UseAVX > 0);
21940   match(Set dst (XorV src1 src2));
21941   format %{ "vpxor   $dst,$src1,$src2\t! xor vectors" %}
21942   ins_encode %{
21943     int vlen_enc = vector_length_encoding(this);
21944     __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
21945   %}
21946   ins_pipe( pipe_slow );
21947 %}
21948 
21949 instruct vxor_mem(vec dst, vec src, memory mem) %{
21950   predicate((UseAVX > 0) &&
21951             (Matcher::vector_length_in_bytes(n->in(1)) > 8));
21952   match(Set dst (XorV src (LoadVector mem)));
21953   format %{ "vpxor   $dst,$src,$mem\t! xor vectors" %}
21954   ins_encode %{
21955     int vlen_enc = vector_length_encoding(this);
21956     __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
21957   %}
21958   ins_pipe( pipe_slow );
21959 %}
21960 
21961 // --------------------------------- VectorCast --------------------------------------
21962 
21963 instruct vcastBtoX(vec dst, vec src) %{
21964   predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE);
21965   match(Set dst (VectorCastB2X src));
21966   format %{ "vector_cast_b2x $dst,$src\t!" %}
21967   ins_encode %{
21968     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
21969     int vlen_enc = vector_length_encoding(this);
21970     __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21971   %}
21972   ins_pipe( pipe_slow );
21973 %}
21974 
21975 instruct vcastBtoD(legVec dst, legVec src) %{
21976   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE);
21977   match(Set dst (VectorCastB2X src));
21978   format %{ "vector_cast_b2x $dst,$src\t!" %}
21979   ins_encode %{
21980     int vlen_enc = vector_length_encoding(this);
21981     __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
21982   %}
21983   ins_pipe( pipe_slow );
21984 %}
21985 
21986 instruct castStoX(vec dst, vec src) %{
21987   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
21988             Matcher::vector_length(n->in(1)) <= 8 && // src
21989             Matcher::vector_element_basic_type(n) == T_BYTE);
21990   match(Set dst (VectorCastS2X src));
21991   format %{ "vector_cast_s2x $dst,$src" %}
21992   ins_encode %{
21993     assert(UseAVX > 0, "required");
21994 
21995     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg);
21996     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
21997   %}
21998   ins_pipe( pipe_slow );
21999 %}
22000 
22001 instruct vcastStoX(vec dst, vec src, vec vtmp) %{
22002   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) &&
22003             Matcher::vector_length(n->in(1)) == 16 && // src
22004             Matcher::vector_element_basic_type(n) == T_BYTE);
22005   effect(TEMP dst, TEMP vtmp);
22006   match(Set dst (VectorCastS2X src));
22007   format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %}
22008   ins_encode %{
22009     assert(UseAVX > 0, "required");
22010 
22011     int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src));
22012     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg);
22013     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
22014     __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
22015   %}
22016   ins_pipe( pipe_slow );
22017 %}
22018 
22019 instruct vcastStoX_evex(vec dst, vec src) %{
22020   predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) ||
22021             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22022   match(Set dst (VectorCastS2X src));
22023   format %{ "vector_cast_s2x $dst,$src\t!" %}
22024   ins_encode %{
22025     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22026     int src_vlen_enc = vector_length_encoding(this, $src);
22027     int vlen_enc = vector_length_encoding(this);
22028     switch (to_elem_bt) {
22029       case T_BYTE:
22030         if (!VM_Version::supports_avx512vl()) {
22031           vlen_enc = Assembler::AVX_512bit;
22032         }
22033         __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22034         break;
22035       case T_INT:
22036         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22037         break;
22038       case T_FLOAT:
22039         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22040         __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22041         break;
22042       case T_LONG:
22043         __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22044         break;
22045       case T_DOUBLE: {
22046         int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit;
22047         __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc);
22048         __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22049         break;
22050       }
22051       default:
22052         ShouldNotReachHere();
22053     }
22054   %}
22055   ins_pipe( pipe_slow );
22056 %}
22057 
22058 instruct castItoX(vec dst, vec src) %{
22059   predicate(UseAVX <= 2 &&
22060             (Matcher::vector_length_in_bytes(n->in(1)) <= 16) &&
22061             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22062   match(Set dst (VectorCastI2X src));
22063   format %{ "vector_cast_i2x $dst,$src" %}
22064   ins_encode %{
22065     assert(UseAVX > 0, "required");
22066 
22067     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22068     int vlen_enc = vector_length_encoding(this, $src);
22069 
22070     if (to_elem_bt == T_BYTE) {
22071       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22072       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22073       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22074     } else {
22075       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22076       __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22077       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22078     }
22079   %}
22080   ins_pipe( pipe_slow );
22081 %}
22082 
22083 instruct vcastItoX(vec dst, vec src, vec vtmp) %{
22084   predicate(UseAVX <= 2 &&
22085             (Matcher::vector_length_in_bytes(n->in(1)) == 32) &&
22086             (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src
22087   match(Set dst (VectorCastI2X src));
22088   format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %}
22089   effect(TEMP dst, TEMP vtmp);
22090   ins_encode %{
22091     assert(UseAVX > 0, "required");
22092 
22093     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22094     int vlen_enc = vector_length_encoding(this, $src);
22095 
22096     if (to_elem_bt == T_BYTE) {
22097       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg);
22098       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22099       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22100       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22101     } else {
22102       assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
22103       __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg);
22104       __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
22105       __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
22106     }
22107   %}
22108   ins_pipe( pipe_slow );
22109 %}
22110 
22111 instruct vcastItoX_evex(vec dst, vec src) %{
22112   predicate(UseAVX > 2 ||
22113             (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src
22114   match(Set dst (VectorCastI2X src));
22115   format %{ "vector_cast_i2x $dst,$src\t!" %}
22116   ins_encode %{
22117     assert(UseAVX > 0, "required");
22118 
22119     BasicType dst_elem_bt = Matcher::vector_element_basic_type(this);
22120     int src_vlen_enc = vector_length_encoding(this, $src);
22121     int dst_vlen_enc = vector_length_encoding(this);
22122     switch (dst_elem_bt) {
22123       case T_BYTE:
22124         if (!VM_Version::supports_avx512vl()) {
22125           src_vlen_enc = Assembler::AVX_512bit;
22126         }
22127         __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22128         break;
22129       case T_SHORT:
22130         if (!VM_Version::supports_avx512vl()) {
22131           src_vlen_enc = Assembler::AVX_512bit;
22132         }
22133         __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
22134         break;
22135       case T_FLOAT:
22136         __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22137         break;
22138       case T_LONG:
22139         __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22140         break;
22141       case T_DOUBLE:
22142         __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
22143         break;
22144       default:
22145         ShouldNotReachHere();
22146     }
22147   %}
22148   ins_pipe( pipe_slow );
22149 %}
22150 
22151 instruct vcastLtoBS(vec dst, vec src) %{
22152   predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) &&
22153             UseAVX <= 2);
22154   match(Set dst (VectorCastL2X src));
22155   format %{ "vector_cast_l2x  $dst,$src" %}
22156   ins_encode %{
22157     assert(UseAVX > 0, "required");
22158 
22159     int vlen = Matcher::vector_length_in_bytes(this, $src);
22160     BasicType to_elem_bt  = Matcher::vector_element_basic_type(this);
22161     AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
22162                                                       : ExternalAddress(vector_int_to_short_mask());
22163     if (vlen <= 16) {
22164       __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
22165       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22166       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22167     } else {
22168       assert(vlen <= 32, "required");
22169       __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
22170       __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
22171       __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg);
22172       __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22173     }
22174     if (to_elem_bt == T_BYTE) {
22175       __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
22176     }
22177   %}
22178   ins_pipe( pipe_slow );
22179 %}
22180 
22181 instruct vcastLtoX_evex(vec dst, vec src) %{
22182   predicate(UseAVX > 2 ||
22183             (Matcher::vector_element_basic_type(n) == T_INT ||
22184              Matcher::vector_element_basic_type(n) == T_FLOAT ||
22185              Matcher::vector_element_basic_type(n) == T_DOUBLE));
22186   match(Set dst (VectorCastL2X src));
22187   format %{ "vector_cast_l2x  $dst,$src\t!" %}
22188   ins_encode %{
22189     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22190     int vlen = Matcher::vector_length_in_bytes(this, $src);
22191     int vlen_enc = vector_length_encoding(this, $src);
22192     switch (to_elem_bt) {
22193       case T_BYTE:
22194         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22195           vlen_enc = Assembler::AVX_512bit;
22196         }
22197         __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22198         break;
22199       case T_SHORT:
22200         if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
22201           vlen_enc = Assembler::AVX_512bit;
22202         }
22203         __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22204         break;
22205       case T_INT:
22206         if (vlen == 8) {
22207           if ($dst$$XMMRegister != $src$$XMMRegister) {
22208             __ movflt($dst$$XMMRegister, $src$$XMMRegister);
22209           }
22210         } else if (vlen == 16) {
22211           __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
22212         } else if (vlen == 32) {
22213           if (UseAVX > 2) {
22214             if (!VM_Version::supports_avx512vl()) {
22215               vlen_enc = Assembler::AVX_512bit;
22216             }
22217             __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22218           } else {
22219             __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
22220             __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
22221           }
22222         } else { // vlen == 64
22223           __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22224         }
22225         break;
22226       case T_FLOAT:
22227         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22228         __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22229         break;
22230       case T_DOUBLE:
22231         assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
22232         __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22233         break;
22234 
22235       default: assert(false, "%s", type2name(to_elem_bt));
22236     }
22237   %}
22238   ins_pipe( pipe_slow );
22239 %}
22240 
22241 instruct vcastFtoD_reg(vec dst, vec src) %{
22242   predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
22243   match(Set dst (VectorCastF2X src));
22244   format %{ "vector_cast_f2d  $dst,$src\t!" %}
22245   ins_encode %{
22246     int vlen_enc = vector_length_encoding(this);
22247     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22248   %}
22249   ins_pipe( pipe_slow );
22250 %}
22251 
22252 
22253 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22254   predicate(!VM_Version::supports_avx10_2() &&
22255             !VM_Version::supports_avx512vl() &&
22256             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22257             type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4 &&
22258             is_integral_type(Matcher::vector_element_basic_type(n)));
22259   match(Set dst (VectorCastF2X src));
22260   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22261   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
22262   ins_encode %{
22263     int vlen_enc = vector_length_encoding(this, $src);
22264     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22265     // JDK-8292878 removed the need for an explicit scratch register needed to load greater than
22266     // 32 bit addresses for register indirect addressing mode since stub constants
22267     // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently.
22268     // However, targets are free to increase this limit, but having a large code cache size
22269     // greater than 2G looks unreasonable in practical scenario, on the hind side with given
22270     // cap we save a temporary register allocation which in limiting case can prevent
22271     // spilling in high register pressure blocks.
22272     __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22273                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
22274                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22275   %}
22276   ins_pipe( pipe_slow );
22277 %}
22278 
22279 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22280   predicate(!VM_Version::supports_avx10_2() &&
22281             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22282             is_integral_type(Matcher::vector_element_basic_type(n)));
22283   match(Set dst (VectorCastF2X src));
22284   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22285   format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22286   ins_encode %{
22287     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22288     if (to_elem_bt == T_LONG) {
22289       int vlen_enc = vector_length_encoding(this);
22290       __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22291                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22292                              ExternalAddress(vector_double_signflip()), noreg, vlen_enc);
22293     } else {
22294       int vlen_enc = vector_length_encoding(this, $src);
22295       __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22296                              $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
22297                              ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22298     }
22299   %}
22300   ins_pipe( pipe_slow );
22301 %}
22302 
22303 instruct castFtoX_reg_avx10_2(vec dst, vec src) %{
22304   predicate(VM_Version::supports_avx10_2() &&
22305             is_integral_type(Matcher::vector_element_basic_type(n)));
22306   match(Set dst (VectorCastF2X src));
22307   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22308   ins_encode %{
22309     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22310     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(this, $src);
22311     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22312   %}
22313   ins_pipe( pipe_slow );
22314 %}
22315 
22316 instruct castFtoX_mem_avx10_2(vec dst, memory src) %{
22317   predicate(VM_Version::supports_avx10_2() &&
22318             is_integral_type(Matcher::vector_element_basic_type(n)));
22319   match(Set dst (VectorCastF2X (LoadVector src)));
22320   format %{ "vector_cast_f2x_avx10_2 $dst, $src\t!" %}
22321   ins_encode %{
22322     int vlen = Matcher::vector_length(this);
22323     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22324     int vlen_enc = (to_elem_bt == T_LONG) ? vector_length_encoding(this) : vector_length_encoding(vlen * sizeof(jfloat));
22325     __ vector_castF2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22326   %}
22327   ins_pipe( pipe_slow );
22328 %}
22329 
22330 instruct vcastDtoF_reg(vec dst, vec src) %{
22331   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
22332   match(Set dst (VectorCastD2X src));
22333   format %{ "vector_cast_d2x  $dst,$src\t!" %}
22334   ins_encode %{
22335     int vlen_enc = vector_length_encoding(this, $src);
22336     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22337   %}
22338   ins_pipe( pipe_slow );
22339 %}
22340 
22341 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{
22342   predicate(!VM_Version::supports_avx10_2() &&
22343             !VM_Version::supports_avx512vl() &&
22344             Matcher::vector_length_in_bytes(n->in(1)) < 64 &&
22345             is_integral_type(Matcher::vector_element_basic_type(n)));
22346   match(Set dst (VectorCastD2X src));
22347   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr);
22348   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %}
22349   ins_encode %{
22350     int vlen_enc = vector_length_encoding(this, $src);
22351     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22352     __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22353                           $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister,
22354                           ExternalAddress(vector_float_signflip()), noreg, vlen_enc);
22355   %}
22356   ins_pipe( pipe_slow );
22357 %}
22358 
22359 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22360   predicate(!VM_Version::supports_avx10_2() &&
22361             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) &&
22362             is_integral_type(Matcher::vector_element_basic_type(n)));
22363   match(Set dst (VectorCastD2X src));
22364   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22365   format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
22366   ins_encode %{
22367     int vlen_enc = vector_length_encoding(this, $src);
22368     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22369     AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) :
22370                               ExternalAddress(vector_float_signflip());
22371     __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
22372                            $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc);
22373   %}
22374   ins_pipe( pipe_slow );
22375 %}
22376 
22377 instruct castDtoX_reg_avx10_2(vec dst, vec src) %{
22378   predicate(VM_Version::supports_avx10_2() &&
22379             is_integral_type(Matcher::vector_element_basic_type(n)));
22380   match(Set dst (VectorCastD2X src));
22381   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22382   ins_encode %{
22383     int vlen_enc = vector_length_encoding(this, $src);
22384     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22385     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22386   %}
22387   ins_pipe( pipe_slow );
22388 %}
22389 
22390 instruct castDtoX_mem_avx10_2(vec dst, memory src) %{
22391   predicate(VM_Version::supports_avx10_2() &&
22392             is_integral_type(Matcher::vector_element_basic_type(n)));
22393   match(Set dst (VectorCastD2X (LoadVector src)));
22394   format %{ "vector_cast_d2x_avx10_2 $dst, $src\t!" %}
22395   ins_encode %{
22396     int vlen = Matcher::vector_length(this);
22397     int vlen_enc = vector_length_encoding(vlen * sizeof(jdouble));
22398     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22399     __ vector_castD2X_avx10_2(to_elem_bt, $dst$$XMMRegister, $src$$Address, vlen_enc);
22400   %}
22401   ins_pipe( pipe_slow );
22402 %}
22403 
22404 instruct vucast(vec dst, vec src) %{
22405   match(Set dst (VectorUCastB2X src));
22406   match(Set dst (VectorUCastS2X src));
22407   match(Set dst (VectorUCastI2X src));
22408   format %{ "vector_ucast $dst,$src\t!" %}
22409   ins_encode %{
22410     assert(UseAVX > 0, "required");
22411 
22412     BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src);
22413     BasicType to_elem_bt = Matcher::vector_element_basic_type(this);
22414     int vlen_enc = vector_length_encoding(this);
22415     __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt);
22416   %}
22417   ins_pipe( pipe_slow );
22418 %}
22419 
22420 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{
22421   predicate(!VM_Version::supports_avx512vl() &&
22422             Matcher::vector_length_in_bytes(n) < 64 &&
22423             Matcher::vector_element_basic_type(n) == T_INT);
22424   match(Set dst (RoundVF src));
22425   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr);
22426   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
22427   ins_encode %{
22428     int vlen_enc = vector_length_encoding(this);
22429     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22430     __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
22431                               ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22432                               $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
22433   %}
22434   ins_pipe( pipe_slow );
22435 %}
22436 
22437 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22438   predicate((VM_Version::supports_avx512vl() ||
22439              Matcher::vector_length_in_bytes(n) == 64) &&
22440              Matcher::vector_element_basic_type(n) == T_INT);
22441   match(Set dst (RoundVF src));
22442   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
22443   format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22444   ins_encode %{
22445     int vlen_enc = vector_length_encoding(this);
22446     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22447     __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
22448                                ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
22449                                $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22450   %}
22451   ins_pipe( pipe_slow );
22452 %}
22453 
22454 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
22455   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
22456   match(Set dst (RoundVD src));
22457   effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2,  KILL cr);
22458   format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
22459   ins_encode %{
22460     int vlen_enc = vector_length_encoding(this);
22461     InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
22462     __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
22463                                 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
22464                                 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
22465   %}
22466   ins_pipe( pipe_slow );
22467 %}
22468 
22469 // --------------------------------- VectorMaskCmp --------------------------------------
22470 
22471 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22472   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22473             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
22474             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22475             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22476   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22477   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22478   ins_encode %{
22479     int vlen_enc = vector_length_encoding(this, $src1);
22480     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22481     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22482       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22483     } else {
22484       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22485     }
22486   %}
22487   ins_pipe( pipe_slow );
22488 %}
22489 
22490 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22491   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
22492             n->bottom_type()->isa_pvectmask() == nullptr &&
22493             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22494   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22495   effect(TEMP ktmp);
22496   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22497   ins_encode %{
22498     int vlen_enc = Assembler::AVX_512bit;
22499     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22500     KRegister mask = k0; // The comparison itself is not being masked.
22501     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22502       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22503       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22504     } else {
22505       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22506       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
22507     }
22508   %}
22509   ins_pipe( pipe_slow );
22510 %}
22511 
22512 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
22513   predicate(n->bottom_type()->isa_pvectmask() &&
22514             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
22515   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22516   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
22517   ins_encode %{
22518     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22519     int vlen_enc = vector_length_encoding(this, $src1);
22520     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
22521     KRegister mask = k0; // The comparison itself is not being masked.
22522     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
22523       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22524     } else {
22525       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
22526     }
22527   %}
22528   ins_pipe( pipe_slow );
22529 %}
22530 
22531 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
22532   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22533             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22534             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22535             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22536             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22537             (n->in(2)->get_int() == BoolTest::eq ||
22538              n->in(2)->get_int() == BoolTest::lt ||
22539              n->in(2)->get_int() == BoolTest::gt)); // cond
22540   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22541   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
22542   ins_encode %{
22543     int vlen_enc = vector_length_encoding(this, $src1);
22544     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22545     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22546     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
22547   %}
22548   ins_pipe( pipe_slow );
22549 %}
22550 
22551 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22552   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22553             !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22554             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22555             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22556             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
22557             (n->in(2)->get_int() == BoolTest::ne ||
22558              n->in(2)->get_int() == BoolTest::le ||
22559              n->in(2)->get_int() == BoolTest::ge)); // cond
22560   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22561   effect(TEMP dst, TEMP xtmp);
22562   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22563   ins_encode %{
22564     int vlen_enc = vector_length_encoding(this, $src1);
22565     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22566     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22567     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22568   %}
22569   ins_pipe( pipe_slow );
22570 %}
22571 
22572 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
22573   predicate(n->bottom_type()->isa_pvectmask() == nullptr &&
22574             Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
22575             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
22576             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
22577             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22578   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22579   effect(TEMP dst, TEMP xtmp);
22580   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
22581   ins_encode %{
22582     InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
22583     int vlen_enc = vector_length_encoding(this, $src1);
22584     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22585     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
22586 
22587     if (vlen_enc == Assembler::AVX_128bit) {
22588       __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22589     } else {
22590       __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
22591     }
22592     __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22593     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22594     __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
22595   %}
22596   ins_pipe( pipe_slow );
22597 %}
22598 
22599 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
22600   predicate((n->bottom_type()->isa_pvectmask() == nullptr &&
22601              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
22602              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22603   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22604   effect(TEMP ktmp);
22605   format %{ "vector_compare $dst,$src1,$src2,$cond" %}
22606   ins_encode %{
22607     assert(UseAVX > 2, "required");
22608 
22609     int vlen_enc = vector_length_encoding(this, $src1);
22610     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22611     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22612     KRegister mask = k0; // The comparison itself is not being masked.
22613     bool merge = false;
22614     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22615 
22616     switch (src1_elem_bt) {
22617       case T_INT: {
22618         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22619         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22620         break;
22621       }
22622       case T_LONG: {
22623         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22624         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
22625         break;
22626       }
22627       default: assert(false, "%s", type2name(src1_elem_bt));
22628     }
22629   %}
22630   ins_pipe( pipe_slow );
22631 %}
22632 
22633 
22634 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
22635   predicate(n->bottom_type()->isa_pvectmask() &&
22636             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
22637   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
22638   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
22639   ins_encode %{
22640     assert(UseAVX > 2, "required");
22641     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
22642 
22643     int vlen_enc = vector_length_encoding(this, $src1);
22644     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
22645     bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
22646     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
22647 
22648     // Comparison i
22649     switch (src1_elem_bt) {
22650       case T_BYTE: {
22651         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22652         break;
22653       }
22654       case T_SHORT: {
22655         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22656         break;
22657       }
22658       case T_INT: {
22659         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22660         break;
22661       }
22662       case T_LONG: {
22663         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
22664         break;
22665       }
22666       default: assert(false, "%s", type2name(src1_elem_bt));
22667     }
22668   %}
22669   ins_pipe( pipe_slow );
22670 %}
22671 
22672 // Extract
22673 
22674 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
22675   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
22676   match(Set dst (ExtractI src idx));
22677   match(Set dst (ExtractS src idx));
22678   match(Set dst (ExtractB src idx));
22679   format %{ "extractI $dst,$src,$idx\t!" %}
22680   ins_encode %{
22681     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22682 
22683     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22684     __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22685   %}
22686   ins_pipe( pipe_slow );
22687 %}
22688 
22689 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
22690   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src
22691             Matcher::vector_length_in_bytes(n->in(1)) == 64);  // src
22692   match(Set dst (ExtractI src idx));
22693   match(Set dst (ExtractS src idx));
22694   match(Set dst (ExtractB src idx));
22695   effect(TEMP vtmp);
22696   format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
22697   ins_encode %{
22698     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22699 
22700     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
22701     XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22702     __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
22703   %}
22704   ins_pipe( pipe_slow );
22705 %}
22706 
22707 instruct extractL(rRegL dst, legVec src, immU8 idx) %{
22708   predicate(Matcher::vector_length(n->in(1)) <= 2); // src
22709   match(Set dst (ExtractL src idx));
22710   format %{ "extractL $dst,$src,$idx\t!" %}
22711   ins_encode %{
22712     assert(UseSSE >= 4, "required");
22713     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22714 
22715     __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
22716   %}
22717   ins_pipe( pipe_slow );
22718 %}
22719 
22720 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
22721   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22722             Matcher::vector_length(n->in(1)) == 8);  // src
22723   match(Set dst (ExtractL src idx));
22724   effect(TEMP vtmp);
22725   format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
22726   ins_encode %{
22727     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22728 
22729     XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22730     __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
22731   %}
22732   ins_pipe( pipe_slow );
22733 %}
22734 
22735 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22736   predicate(Matcher::vector_length(n->in(1)) <= 4);
22737   match(Set dst (ExtractF src idx));
22738   effect(TEMP dst, TEMP vtmp);
22739   format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22740   ins_encode %{
22741     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22742 
22743     __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister);
22744   %}
22745   ins_pipe( pipe_slow );
22746 %}
22747 
22748 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{
22749   predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 ||
22750             Matcher::vector_length(n->in(1)/*src*/) == 16);
22751   match(Set dst (ExtractF src idx));
22752   effect(TEMP vtmp);
22753   format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %}
22754   ins_encode %{
22755     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22756 
22757     XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22758     __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant);
22759   %}
22760   ins_pipe( pipe_slow );
22761 %}
22762 
22763 instruct extractD(legRegD dst, legVec src, immU8 idx) %{
22764   predicate(Matcher::vector_length(n->in(1)) == 2); // src
22765   match(Set dst (ExtractD src idx));
22766   format %{ "extractD $dst,$src,$idx\t!" %}
22767   ins_encode %{
22768     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22769 
22770     __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22771   %}
22772   ins_pipe( pipe_slow );
22773 %}
22774 
22775 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
22776   predicate(Matcher::vector_length(n->in(1)) == 4 || // src
22777             Matcher::vector_length(n->in(1)) == 8);  // src
22778   match(Set dst (ExtractD src idx));
22779   effect(TEMP vtmp);
22780   format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
22781   ins_encode %{
22782     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");
22783 
22784     XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
22785     __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
22786   %}
22787   ins_pipe( pipe_slow );
22788 %}
22789 
22790 // --------------------------------- Vector Blend --------------------------------------
22791 
22792 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
22793   predicate(UseAVX == 0);
22794   match(Set dst (VectorBlend (Binary dst src) mask));
22795   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
22796   effect(TEMP tmp);
22797   ins_encode %{
22798     assert(UseSSE >= 4, "required");
22799 
22800     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
22801       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
22802     }
22803     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
22804   %}
22805   ins_pipe( pipe_slow );
22806 %}
22807 
22808 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
22809   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22810             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22811             Matcher::vector_length_in_bytes(n) <= 32 &&
22812             is_integral_type(Matcher::vector_element_basic_type(n)));
22813   match(Set dst (VectorBlend (Binary src1 src2) mask));
22814   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22815   ins_encode %{
22816     int vlen_enc = vector_length_encoding(this);
22817     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22818   %}
22819   ins_pipe( pipe_slow );
22820 %}
22821 
22822 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
22823   predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
22824             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22825             Matcher::vector_length_in_bytes(n) <= 32 &&
22826             !is_integral_type(Matcher::vector_element_basic_type(n)));
22827   match(Set dst (VectorBlend (Binary src1 src2) mask));
22828   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
22829   ins_encode %{
22830     int vlen_enc = vector_length_encoding(this);
22831     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
22832   %}
22833   ins_pipe( pipe_slow );
22834 %}
22835 
22836 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
22837   predicate(UseAVX > 0 && EnableX86ECoreOpts &&
22838             n->in(2)->bottom_type()->isa_pvectmask() == nullptr &&
22839             Matcher::vector_length_in_bytes(n) <= 32);
22840   match(Set dst (VectorBlend (Binary src1 src2) mask));
22841   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
22842   effect(TEMP vtmp, TEMP dst);
22843   ins_encode %{
22844     int vlen_enc = vector_length_encoding(this);
22845     __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
22846     __ vpand ($dst$$XMMRegister,  $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
22847     __ vpor  ($dst$$XMMRegister,  $dst$$XMMRegister,  $vtmp$$XMMRegister, vlen_enc);
22848   %}
22849   ins_pipe( pipe_slow );
22850 %}
22851 
22852 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
22853   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
22854             n->in(2)->bottom_type()->isa_pvectmask() == nullptr);
22855   match(Set dst (VectorBlend (Binary src1 src2) mask));
22856   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22857   effect(TEMP ktmp);
22858   ins_encode %{
22859      int vlen_enc = Assembler::AVX_512bit;
22860      BasicType elem_bt = Matcher::vector_element_basic_type(this);
22861     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
22862     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22863   %}
22864   ins_pipe( pipe_slow );
22865 %}
22866 
22867 
22868 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
22869   predicate(n->in(2)->bottom_type()->isa_pvectmask() &&
22870             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
22871              VM_Version::supports_avx512bw()));
22872   match(Set dst (VectorBlend (Binary src1 src2) mask));
22873   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
22874   ins_encode %{
22875     int vlen_enc = vector_length_encoding(this);
22876     BasicType elem_bt = Matcher::vector_element_basic_type(this);
22877     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
22878   %}
22879   ins_pipe( pipe_slow );
22880 %}
22881 
22882 // --------------------------------- ABS --------------------------------------
22883 // a = |a|
22884 instruct vabsB_reg(vec dst, vec src) %{
22885   match(Set dst (AbsVB  src));
22886   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
22887   ins_encode %{
22888     uint vlen = Matcher::vector_length(this);
22889     if (vlen <= 16) {
22890       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
22891     } else {
22892       int vlen_enc = vector_length_encoding(this);
22893       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22894     }
22895   %}
22896   ins_pipe( pipe_slow );
22897 %}
22898 
22899 instruct vabsS_reg(vec dst, vec src) %{
22900   match(Set dst (AbsVS  src));
22901   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
22902   ins_encode %{
22903     uint vlen = Matcher::vector_length(this);
22904     if (vlen <= 8) {
22905       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
22906     } else {
22907       int vlen_enc = vector_length_encoding(this);
22908       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22909     }
22910   %}
22911   ins_pipe( pipe_slow );
22912 %}
22913 
22914 instruct vabsI_reg(vec dst, vec src) %{
22915   match(Set dst (AbsVI  src));
22916   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
22917   ins_encode %{
22918     uint vlen = Matcher::vector_length(this);
22919     if (vlen <= 4) {
22920       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
22921     } else {
22922       int vlen_enc = vector_length_encoding(this);
22923       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22924     }
22925   %}
22926   ins_pipe( pipe_slow );
22927 %}
22928 
22929 instruct vabsL_reg(vec dst, vec src) %{
22930   match(Set dst (AbsVL  src));
22931   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
22932   ins_encode %{
22933     assert(UseAVX > 2, "required");
22934     int vlen_enc = vector_length_encoding(this);
22935     if (!VM_Version::supports_avx512vl()) {
22936       vlen_enc = Assembler::AVX_512bit;
22937     }
22938     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22939   %}
22940   ins_pipe( pipe_slow );
22941 %}
22942 
22943 // --------------------------------- ABSNEG --------------------------------------
22944 
22945 instruct vabsnegF(vec dst, vec src) %{
22946   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
22947   match(Set dst (AbsVF src));
22948   match(Set dst (NegVF src));
22949   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
22950   ins_cost(150);
22951   ins_encode %{
22952     int opcode = this->ideal_Opcode();
22953     int vlen = Matcher::vector_length(this);
22954     if (vlen == 2) {
22955       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22956     } else {
22957       assert(vlen == 8 || vlen == 16, "required");
22958       int vlen_enc = vector_length_encoding(this);
22959       __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22960     }
22961   %}
22962   ins_pipe( pipe_slow );
22963 %}
22964 
22965 instruct vabsneg4F(vec dst) %{
22966   predicate(Matcher::vector_length(n) == 4);
22967   match(Set dst (AbsVF dst));
22968   match(Set dst (NegVF dst));
22969   format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
22970   ins_cost(150);
22971   ins_encode %{
22972     int opcode = this->ideal_Opcode();
22973     __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister);
22974   %}
22975   ins_pipe( pipe_slow );
22976 %}
22977 
22978 instruct vabsnegD(vec dst, vec src) %{
22979   match(Set dst (AbsVD  src));
22980   match(Set dst (NegVD  src));
22981   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
22982   ins_encode %{
22983     int opcode = this->ideal_Opcode();
22984     uint vlen = Matcher::vector_length(this);
22985     if (vlen == 2) {
22986       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister);
22987     } else {
22988       int vlen_enc = vector_length_encoding(this);
22989       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
22990     }
22991   %}
22992   ins_pipe( pipe_slow );
22993 %}
22994 
22995 //------------------------------------- VectorTest --------------------------------------------
22996 
22997 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{
22998   predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16);
22999   match(Set cr (VectorTest src1 src2));
23000   effect(TEMP vtmp);
23001   format %{ "vptest_lt16  $src1, $src2\t! using $vtmp as TEMP" %}
23002   ins_encode %{
23003     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23004     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23005     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen);
23006   %}
23007   ins_pipe( pipe_slow );
23008 %}
23009 
23010 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{
23011   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16);
23012   match(Set cr (VectorTest src1 src2));
23013   format %{ "vptest_ge16  $src1, $src2\n\t" %}
23014   ins_encode %{
23015     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
23016     int vlen = Matcher::vector_length_in_bytes(this, $src1);
23017     __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen);
23018   %}
23019   ins_pipe( pipe_slow );
23020 %}
23021 
23022 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23023   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23024              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23025             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
23026   match(Set cr (VectorTest src1 src2));
23027   effect(TEMP tmp);
23028   format %{ "ktest_alltrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23029   ins_encode %{
23030     uint masklen = Matcher::vector_length(this, $src1);
23031     __ kmovwl($tmp$$Register, $src1$$KRegister);
23032     __ andl($tmp$$Register, (1 << masklen) - 1);
23033     __ cmpl($tmp$$Register, (1 << masklen) - 1);
23034   %}
23035   ins_pipe( pipe_slow );
23036 %}
23037 
23038 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{
23039   predicate((Matcher::vector_length(n->in(1)) < 8 ||
23040              (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) &&
23041             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
23042   match(Set cr (VectorTest src1 src2));
23043   effect(TEMP tmp);
23044   format %{ "ktest_anytrue_le8  $src1, $src2\t! using $tmp as TEMP" %}
23045   ins_encode %{
23046     uint masklen = Matcher::vector_length(this, $src1);
23047     __ kmovwl($tmp$$Register, $src1$$KRegister);
23048     __ andl($tmp$$Register, (1 << masklen) - 1);
23049   %}
23050   ins_pipe( pipe_slow );
23051 %}
23052 
23053 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
23054   predicate(Matcher::vector_length(n->in(1)) >= 16 ||
23055             (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
23056   match(Set cr (VectorTest src1 src2));
23057   format %{ "ktest_ge8  $src1, $src2\n\t" %}
23058   ins_encode %{
23059     uint masklen = Matcher::vector_length(this, $src1);
23060     __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
23061   %}
23062   ins_pipe( pipe_slow );
23063 %}
23064 
23065 //------------------------------------- LoadMask --------------------------------------------
23066 
23067 instruct loadMask(legVec dst, legVec src) %{
23068   predicate(n->bottom_type()->isa_pvectmask() == nullptr && !VM_Version::supports_avx512vlbw());
23069   match(Set dst (VectorLoadMask src));
23070   effect(TEMP dst);
23071   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
23072   ins_encode %{
23073     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23074     BasicType elem_bt = Matcher::vector_element_basic_type(this);
23075     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
23076   %}
23077   ins_pipe( pipe_slow );
23078 %}
23079 
23080 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
23081   predicate(n->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23082   match(Set dst (VectorLoadMask src));
23083   effect(TEMP xtmp);
23084   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
23085   ins_encode %{
23086     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23087                         true, Assembler::AVX_512bit);
23088   %}
23089   ins_pipe( pipe_slow );
23090 %}
23091 
23092 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
23093   predicate(n->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23094   match(Set dst (VectorLoadMask src));
23095   effect(TEMP xtmp);
23096   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
23097   ins_encode %{
23098     int vlen_enc = vector_length_encoding(in(1));
23099     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
23100                         false, vlen_enc);
23101   %}
23102   ins_pipe( pipe_slow );
23103 %}
23104 
23105 //------------------------------------- StoreMask --------------------------------------------
23106 
23107 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
23108   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23109   match(Set dst (VectorStoreMask src size));
23110   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23111   ins_encode %{
23112     int vlen = Matcher::vector_length(this);
23113     if (vlen <= 16 && UseAVX <= 2) {
23114       assert(UseSSE >= 3, "required");
23115       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
23116     } else {
23117       assert(UseAVX > 0, "required");
23118       int src_vlen_enc = vector_length_encoding(this, $src);
23119       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23120     }
23121   %}
23122   ins_pipe( pipe_slow );
23123 %}
23124 
23125 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
23126   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23127   match(Set dst (VectorStoreMask src size));
23128   effect(TEMP_DEF dst, TEMP xtmp);
23129   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23130   ins_encode %{
23131     int vlen_enc = Assembler::AVX_128bit;
23132     int vlen = Matcher::vector_length(this);
23133     if (vlen <= 8) {
23134       assert(UseSSE >= 3, "required");
23135       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23136       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
23137       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23138     } else {
23139       assert(UseAVX > 0, "required");
23140       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23141       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23142       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23143     }
23144   %}
23145   ins_pipe( pipe_slow );
23146 %}
23147 
23148 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
23149   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23150   match(Set dst (VectorStoreMask src size));
23151   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23152   effect(TEMP_DEF dst, TEMP xtmp);
23153   ins_encode %{
23154     int vlen_enc = Assembler::AVX_128bit;
23155     int vlen = Matcher::vector_length(this);
23156     if (vlen <= 4) {
23157       assert(UseSSE >= 3, "required");
23158       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23159       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
23160       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23161       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23162     } else {
23163       assert(UseAVX > 0, "required");
23164       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23165       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
23166       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23167       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
23168       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23169     }
23170   %}
23171   ins_pipe( pipe_slow );
23172 %}
23173 
23174 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
23175   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
23176   match(Set dst (VectorStoreMask src size));
23177   effect(TEMP_DEF dst, TEMP xtmp);
23178   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23179   ins_encode %{
23180     assert(UseSSE >= 3, "required");
23181     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
23182     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
23183     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
23184     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
23185     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
23186   %}
23187   ins_pipe( pipe_slow );
23188 %}
23189 
23190 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
23191   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
23192   match(Set dst (VectorStoreMask src size));
23193   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
23194   effect(TEMP_DEF dst, TEMP vtmp);
23195   ins_encode %{
23196     int vlen_enc = Assembler::AVX_128bit;
23197     __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
23198     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
23199     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
23200     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23201     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23202     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23203     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
23204   %}
23205   ins_pipe( pipe_slow );
23206 %}
23207 
23208 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
23209   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23210   match(Set dst (VectorStoreMask src size));
23211   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23212   ins_encode %{
23213     int src_vlen_enc = vector_length_encoding(this, $src);
23214     int dst_vlen_enc = vector_length_encoding(this);
23215     if (!VM_Version::supports_avx512vl()) {
23216       src_vlen_enc = Assembler::AVX_512bit;
23217     }
23218     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23219     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23220   %}
23221   ins_pipe( pipe_slow );
23222 %}
23223 
23224 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
23225   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
23226   match(Set dst (VectorStoreMask src size));
23227   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
23228   ins_encode %{
23229     int src_vlen_enc = vector_length_encoding(this, $src);
23230     int dst_vlen_enc = vector_length_encoding(this);
23231     if (!VM_Version::supports_avx512vl()) {
23232       src_vlen_enc = Assembler::AVX_512bit;
23233     }
23234     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
23235     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23236   %}
23237   ins_pipe( pipe_slow );
23238 %}
23239 
23240 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
23241   predicate(n->in(1)->bottom_type()->isa_pvectmask() && !VM_Version::supports_avx512vlbw());
23242   match(Set dst (VectorStoreMask mask size));
23243   effect(TEMP_DEF dst);
23244   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23245   ins_encode %{
23246     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
23247     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
23248                  false, Assembler::AVX_512bit, noreg);
23249     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);
23250   %}
23251   ins_pipe( pipe_slow );
23252 %}
23253 
23254 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
23255   predicate(n->in(1)->bottom_type()->isa_pvectmask() && VM_Version::supports_avx512vlbw());
23256   match(Set dst (VectorStoreMask mask size));
23257   effect(TEMP_DEF dst);
23258   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
23259   ins_encode %{
23260     int dst_vlen_enc = vector_length_encoding(this);
23261     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);
23262     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
23263   %}
23264   ins_pipe( pipe_slow );
23265 %}
23266 
23267 instruct vmaskcast_evex(kReg dst) %{
23268   match(Set dst (VectorMaskCast dst));
23269   ins_cost(0);
23270   format %{ "vector_mask_cast $dst" %}
23271   ins_encode %{
23272     // empty
23273   %}
23274   ins_pipe(empty);
23275 %}
23276 
23277 instruct vmaskcast(vec dst) %{
23278   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
23279   match(Set dst (VectorMaskCast dst));
23280   ins_cost(0);
23281   format %{ "vector_mask_cast $dst" %}
23282   ins_encode %{
23283     // empty
23284   %}
23285   ins_pipe(empty);
23286 %}
23287 
23288 instruct vmaskcast_avx(vec dst, vec src) %{
23289   predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
23290   match(Set dst (VectorMaskCast src));
23291   format %{ "vector_mask_cast $dst, $src" %}
23292   ins_encode %{
23293     int vlen = Matcher::vector_length(this);
23294     BasicType src_bt = Matcher::vector_element_basic_type(this, $src);
23295     BasicType dst_bt = Matcher::vector_element_basic_type(this);
23296     __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen);
23297   %}
23298   ins_pipe(pipe_slow);
23299 %}
23300 
23301 //-------------------------------- Load Iota Indices ----------------------------------
23302 
23303 instruct loadIotaIndices(vec dst, immI_0 src) %{
23304   match(Set dst (VectorLoadConst src));
23305   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
23306   ins_encode %{
23307      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23308      BasicType bt = Matcher::vector_element_basic_type(this);
23309      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt);
23310   %}
23311   ins_pipe( pipe_slow );
23312 %}
23313 
23314 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{
23315   match(Set dst (PopulateIndex src1 src2));
23316   effect(TEMP dst, TEMP vtmp);
23317   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23318   ins_encode %{
23319      assert($src2$$constant == 1, "required");
23320      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23321      int vlen_enc = vector_length_encoding(this);
23322      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23323      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23324      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23325      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23326   %}
23327   ins_pipe( pipe_slow );
23328 %}
23329 
23330 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{
23331   match(Set dst (PopulateIndex src1 src2));
23332   effect(TEMP dst, TEMP vtmp);
23333   format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %}
23334   ins_encode %{
23335      assert($src2$$constant == 1, "required");
23336      int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23337      int vlen_enc = vector_length_encoding(this);
23338      BasicType elem_bt = Matcher::vector_element_basic_type(this);
23339      __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc);
23340      __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt);
23341      __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23342   %}
23343   ins_pipe( pipe_slow );
23344 %}
23345 
23346 //-------------------------------- Rearrange ----------------------------------
23347 
23348 // LoadShuffle/Rearrange for Byte
23349 instruct rearrangeB(vec dst, vec shuffle) %{
23350   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23351             Matcher::vector_length(n) < 32);
23352   match(Set dst (VectorRearrange dst shuffle));
23353   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23354   ins_encode %{
23355     assert(UseSSE >= 4, "required");
23356     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23357   %}
23358   ins_pipe( pipe_slow );
23359 %}
23360 
23361 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23362   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23363             Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
23364   match(Set dst (VectorRearrange src shuffle));
23365   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23366   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23367   ins_encode %{
23368     assert(UseAVX >= 2, "required");
23369     // Swap src into vtmp1
23370     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23371     // Shuffle swapped src to get entries from other 128 bit lane
23372     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23373     // Shuffle original src to get entries from self 128 bit lane
23374     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23375     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23376     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23377     // Perform the blend
23378     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23379   %}
23380   ins_pipe( pipe_slow );
23381 %}
23382 
23383 
23384 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{
23385   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23386             Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi());
23387   match(Set dst (VectorRearrange src shuffle));
23388   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
23389   format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %}
23390   ins_encode %{
23391     int vlen_enc = vector_length_encoding(this);
23392     __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister,
23393                        $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister,
23394                        $rtmp$$Register, $ktmp$$KRegister, vlen_enc);
23395   %}
23396   ins_pipe( pipe_slow );
23397 %}
23398 
23399 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{
23400   predicate(Matcher::vector_element_basic_type(n) == T_BYTE &&
23401             Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
23402   match(Set dst (VectorRearrange src shuffle));
23403   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23404   ins_encode %{
23405     int vlen_enc = vector_length_encoding(this);
23406     __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23407   %}
23408   ins_pipe( pipe_slow );
23409 %}
23410 
23411 // LoadShuffle/Rearrange for Short
23412 
23413 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{
23414   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23415             !VM_Version::supports_avx512bw());
23416   match(Set dst (VectorLoadShuffle src));
23417   effect(TEMP dst, TEMP vtmp);
23418   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23419   ins_encode %{
23420     // Create a byte shuffle mask from short shuffle mask
23421     // only byte shuffle instruction available on these platforms
23422     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
23423     if (UseAVX == 0) {
23424       assert(vlen_in_bytes <= 16, "required");
23425       // Multiply each shuffle by two to get byte index
23426       __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23427       __ psllw($vtmp$$XMMRegister, 1);
23428 
23429       // Duplicate to create 2 copies of byte index
23430       __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23431       __ psllw($dst$$XMMRegister, 8);
23432       __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
23433 
23434       // Add one to get alternate byte index
23435       __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg);
23436       __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23437     } else {
23438       assert(UseAVX > 1 || vlen_in_bytes <= 16, "required");
23439       int vlen_enc = vector_length_encoding(this);
23440       // Multiply each shuffle by two to get byte index
23441       __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23442 
23443       // Duplicate to create 2 copies of byte index
23444       __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister,  8, vlen_enc);
23445       __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23446 
23447       // Add one to get alternate byte index
23448       __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg);
23449     }
23450   %}
23451   ins_pipe( pipe_slow );
23452 %}
23453 
23454 instruct rearrangeS(vec dst, vec shuffle) %{
23455   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23456             Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
23457   match(Set dst (VectorRearrange dst shuffle));
23458   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23459   ins_encode %{
23460     assert(UseSSE >= 4, "required");
23461     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23462   %}
23463   ins_pipe( pipe_slow );
23464 %}
23465 
23466 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{
23467   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23468             Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
23469   match(Set dst (VectorRearrange src shuffle));
23470   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
23471   format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %}
23472   ins_encode %{
23473     assert(UseAVX >= 2, "required");
23474     // Swap src into vtmp1
23475     __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1);
23476     // Shuffle swapped src to get entries from other 128 bit lane
23477     __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23478     // Shuffle original src to get entries from self 128 bit lane
23479     __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit);
23480     // Create a blend mask by setting high bits for entries coming from other lane in shuffle
23481     __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg);
23482     // Perform the blend
23483     __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit);
23484   %}
23485   ins_pipe( pipe_slow );
23486 %}
23487 
23488 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
23489   predicate(Matcher::vector_element_basic_type(n) == T_SHORT &&
23490             VM_Version::supports_avx512bw());
23491   match(Set dst (VectorRearrange src shuffle));
23492   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23493   ins_encode %{
23494     int vlen_enc = vector_length_encoding(this);
23495     if (!VM_Version::supports_avx512vl()) {
23496       vlen_enc = Assembler::AVX_512bit;
23497     }
23498     __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23499   %}
23500   ins_pipe( pipe_slow );
23501 %}
23502 
23503 // LoadShuffle/Rearrange for Integer and Float
23504 
23505 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{
23506   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23507             Matcher::vector_length(n) == 4 && UseAVX == 0);
23508   match(Set dst (VectorLoadShuffle src));
23509   effect(TEMP dst, TEMP vtmp);
23510   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23511   ins_encode %{
23512     assert(UseSSE >= 4, "required");
23513 
23514     // Create a byte shuffle mask from int shuffle mask
23515     // only byte shuffle instruction available on these platforms
23516 
23517     // Duplicate and multiply each shuffle by 4
23518     __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister);
23519     __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23520     __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
23521     __ psllw($vtmp$$XMMRegister, 2);
23522 
23523     // Duplicate again to create 4 copies of byte index
23524     __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
23525     __ psllw($dst$$XMMRegister, 8);
23526     __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
23527 
23528     // Add 3,2,1,0 to get alternate byte index
23529     __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg);
23530     __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
23531   %}
23532   ins_pipe( pipe_slow );
23533 %}
23534 
23535 instruct rearrangeI(vec dst, vec shuffle) %{
23536   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23537             UseAVX == 0);
23538   match(Set dst (VectorRearrange dst shuffle));
23539   format %{ "vector_rearrange $dst, $shuffle, $dst" %}
23540   ins_encode %{
23541     assert(UseSSE >= 4, "required");
23542     __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
23543   %}
23544   ins_pipe( pipe_slow );
23545 %}
23546 
23547 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
23548   predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) &&
23549             UseAVX > 0);
23550   match(Set dst (VectorRearrange src shuffle));
23551   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23552   ins_encode %{
23553     int vlen_enc = vector_length_encoding(this);
23554     BasicType bt = Matcher::vector_element_basic_type(this);
23555     __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23556   %}
23557   ins_pipe( pipe_slow );
23558 %}
23559 
23560 // LoadShuffle/Rearrange for Long and Double
23561 
23562 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{
23563   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23564             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23565   match(Set dst (VectorLoadShuffle src));
23566   effect(TEMP dst, TEMP vtmp);
23567   format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %}
23568   ins_encode %{
23569     assert(UseAVX >= 2, "required");
23570 
23571     int vlen_enc = vector_length_encoding(this);
23572     // Create a double word shuffle mask from long shuffle mask
23573     // only double word shuffle instruction available on these platforms
23574 
23575     // Multiply each shuffle by two to get double word index
23576     __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc);
23577 
23578     // Duplicate each double word shuffle
23579     __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
23580     __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
23581 
23582     // Add one to get alternate double word index
23583     __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg);
23584   %}
23585   ins_pipe( pipe_slow );
23586 %}
23587 
23588 instruct rearrangeL(vec dst, vec src, vec shuffle) %{
23589   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23590             Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl());
23591   match(Set dst (VectorRearrange src shuffle));
23592   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23593   ins_encode %{
23594     assert(UseAVX >= 2, "required");
23595 
23596     int vlen_enc = vector_length_encoding(this);
23597     __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23598   %}
23599   ins_pipe( pipe_slow );
23600 %}
23601 
23602 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
23603   predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
23604             (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl()));
23605   match(Set dst (VectorRearrange src shuffle));
23606   format %{ "vector_rearrange $dst, $shuffle, $src" %}
23607   ins_encode %{
23608     assert(UseAVX > 2, "required");
23609 
23610     int vlen_enc = vector_length_encoding(this);
23611     if (vlen_enc == Assembler::AVX_128bit) {
23612       vlen_enc = Assembler::AVX_256bit;
23613     }
23614     __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
23615   %}
23616   ins_pipe( pipe_slow );
23617 %}
23618 
23619 // --------------------------------- FMA --------------------------------------
23620 // a * b + c
23621 
23622 instruct vfmaF_reg(vec a, vec b, vec c) %{
23623   match(Set c (FmaVF  c (Binary a b)));
23624   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23625   ins_cost(150);
23626   ins_encode %{
23627     assert(UseFMA, "not enabled");
23628     int vlen_enc = vector_length_encoding(this);
23629     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23630   %}
23631   ins_pipe( pipe_slow );
23632 %}
23633 
23634 instruct vfmaF_mem(vec a, memory b, vec c) %{
23635   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23636   match(Set c (FmaVF  c (Binary a (LoadVector b))));
23637   format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
23638   ins_cost(150);
23639   ins_encode %{
23640     assert(UseFMA, "not enabled");
23641     int vlen_enc = vector_length_encoding(this);
23642     __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23643   %}
23644   ins_pipe( pipe_slow );
23645 %}
23646 
23647 instruct vfmaD_reg(vec a, vec b, vec c) %{
23648   match(Set c (FmaVD  c (Binary a b)));
23649   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23650   ins_cost(150);
23651   ins_encode %{
23652     assert(UseFMA, "not enabled");
23653     int vlen_enc = vector_length_encoding(this);
23654     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
23655   %}
23656   ins_pipe( pipe_slow );
23657 %}
23658 
23659 instruct vfmaD_mem(vec a, memory b, vec c) %{
23660   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
23661   match(Set c (FmaVD  c (Binary a (LoadVector b))));
23662   format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
23663   ins_cost(150);
23664   ins_encode %{
23665     assert(UseFMA, "not enabled");
23666     int vlen_enc = vector_length_encoding(this);
23667     __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
23668   %}
23669   ins_pipe( pipe_slow );
23670 %}
23671 
23672 // --------------------------------- Vector Multiply Add --------------------------------------
23673 
23674 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
23675   predicate(UseAVX == 0);
23676   match(Set dst (MulAddVS2VI dst src1));
23677   format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
23678   ins_encode %{
23679     __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
23680   %}
23681   ins_pipe( pipe_slow );
23682 %}
23683 
23684 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
23685   predicate(UseAVX > 0);
23686   match(Set dst (MulAddVS2VI src1 src2));
23687   format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
23688   ins_encode %{
23689     int vlen_enc = vector_length_encoding(this);
23690     __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23691   %}
23692   ins_pipe( pipe_slow );
23693 %}
23694 
23695 // --------------------------------- Vector Multiply Add Add ----------------------------------
23696 
23697 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
23698   predicate(VM_Version::supports_avx512_vnni());
23699   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
23700   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
23701   ins_encode %{
23702     assert(UseAVX > 2, "required");
23703     int vlen_enc = vector_length_encoding(this);
23704     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
23705   %}
23706   ins_pipe( pipe_slow );
23707   ins_cost(10);
23708 %}
23709 
23710 // --------------------------------- PopCount --------------------------------------
23711 
23712 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
23713   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23714   match(Set dst (PopCountVI src));
23715   match(Set dst (PopCountVL src));
23716   format %{ "vector_popcount_integral $dst, $src" %}
23717   ins_encode %{
23718     int opcode = this->ideal_Opcode();
23719     int vlen_enc = vector_length_encoding(this, $src);
23720     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23721     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
23722   %}
23723   ins_pipe( pipe_slow );
23724 %}
23725 
23726 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
23727   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23728   match(Set dst (PopCountVI src mask));
23729   match(Set dst (PopCountVL src mask));
23730   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
23731   ins_encode %{
23732     int vlen_enc = vector_length_encoding(this, $src);
23733     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23734     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
23735     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
23736   %}
23737   ins_pipe( pipe_slow );
23738 %}
23739 
23740 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
23741   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
23742   match(Set dst (PopCountVI src));
23743   match(Set dst (PopCountVL src));
23744   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
23745   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
23746   ins_encode %{
23747     int opcode = this->ideal_Opcode();
23748     int vlen_enc = vector_length_encoding(this, $src);
23749     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23750     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23751                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
23752   %}
23753   ins_pipe( pipe_slow );
23754 %}
23755 
23756 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
23757 
23758 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
23759   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
23760                                               Matcher::vector_length_in_bytes(n->in(1))));
23761   match(Set dst (CountTrailingZerosV src));
23762   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
23763   ins_cost(400);
23764   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
23765   ins_encode %{
23766     int vlen_enc = vector_length_encoding(this, $src);
23767     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23768     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
23769                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23770   %}
23771   ins_pipe( pipe_slow );
23772 %}
23773 
23774 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23775   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
23776             VM_Version::supports_avx512cd() &&
23777             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
23778   match(Set dst (CountTrailingZerosV src));
23779   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23780   ins_cost(400);
23781   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
23782   ins_encode %{
23783     int vlen_enc = vector_length_encoding(this, $src);
23784     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23785     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23786                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
23787   %}
23788   ins_pipe( pipe_slow );
23789 %}
23790 
23791 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
23792   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
23793   match(Set dst (CountTrailingZerosV src));
23794   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
23795   ins_cost(400);
23796   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
23797   ins_encode %{
23798     int vlen_enc = vector_length_encoding(this, $src);
23799     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23800     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23801                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
23802                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
23803   %}
23804   ins_pipe( pipe_slow );
23805 %}
23806 
23807 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
23808   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
23809   match(Set dst (CountTrailingZerosV src));
23810   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
23811   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
23812   ins_encode %{
23813     int vlen_enc = vector_length_encoding(this, $src);
23814     BasicType bt = Matcher::vector_element_basic_type(this, $src);
23815     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
23816                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
23817   %}
23818   ins_pipe( pipe_slow );
23819 %}
23820 
23821 
23822 // --------------------------------- Bitwise Ternary Logic ----------------------------------
23823 
23824 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
23825   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
23826   effect(TEMP dst);
23827   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23828   ins_encode %{
23829     int vector_len = vector_length_encoding(this);
23830     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
23831   %}
23832   ins_pipe( pipe_slow );
23833 %}
23834 
23835 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
23836   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
23837   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
23838   effect(TEMP dst);
23839   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
23840   ins_encode %{
23841     int vector_len = vector_length_encoding(this);
23842     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
23843   %}
23844   ins_pipe( pipe_slow );
23845 %}
23846 
23847 // --------------------------------- Rotation Operations ----------------------------------
23848 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{
23849   match(Set dst (RotateLeftV src shift));
23850   match(Set dst (RotateRightV src shift));
23851   format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %}
23852   ins_encode %{
23853     int opcode      = this->ideal_Opcode();
23854     int vector_len  = vector_length_encoding(this);
23855     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23856     __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len);
23857   %}
23858   ins_pipe( pipe_slow );
23859 %}
23860 
23861 instruct vprorate(vec dst, vec src, vec shift) %{
23862   match(Set dst (RotateLeftV src shift));
23863   match(Set dst (RotateRightV src shift));
23864   format %{ "vprotate $dst,$src,$shift\t! vector rotate" %}
23865   ins_encode %{
23866     int opcode      = this->ideal_Opcode();
23867     int vector_len  = vector_length_encoding(this);
23868     BasicType etype = this->bottom_type()->is_vect()->element_basic_type();
23869     __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
23870   %}
23871   ins_pipe( pipe_slow );
23872 %}
23873 
23874 // ---------------------------------- Masked Operations ------------------------------------
23875 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{
23876   predicate(!n->in(3)->bottom_type()->isa_pvectmask());
23877   match(Set dst (LoadVectorMasked mem mask));
23878   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23879   ins_encode %{
23880     BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
23881     int vlen_enc = vector_length_encoding(this);
23882     __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc);
23883   %}
23884   ins_pipe( pipe_slow );
23885 %}
23886 
23887 
23888 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{
23889   predicate(n->in(3)->bottom_type()->isa_pvectmask());
23890   match(Set dst (LoadVectorMasked mem mask));
23891   format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
23892   ins_encode %{
23893     BasicType elmType =  this->bottom_type()->is_vect()->element_basic_type();
23894     int vector_len = vector_length_encoding(this);
23895     __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len);
23896   %}
23897   ins_pipe( pipe_slow );
23898 %}
23899 
23900 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{
23901   predicate(!n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23902   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23903   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23904   ins_encode %{
23905     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23906     int vlen_enc = vector_length_encoding(src_node);
23907     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23908     __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc);
23909   %}
23910   ins_pipe( pipe_slow );
23911 %}
23912 
23913 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{
23914   predicate(n->in(3)->in(2)->bottom_type()->isa_pvectmask());
23915   match(Set mem (StoreVectorMasked mem (Binary src mask)));
23916   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
23917   ins_encode %{
23918     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
23919     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
23920     int vlen_enc = vector_length_encoding(src_node);
23921     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc);
23922   %}
23923   ins_pipe( pipe_slow );
23924 %}
23925 
23926 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{
23927   match(Set addr (VerifyVectorAlignment addr mask));
23928   effect(KILL cr);
23929   format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %}
23930   ins_encode %{
23931     Label Lskip;
23932     // check if masked bits of addr are zero
23933     __ testq($addr$$Register, $mask$$constant);
23934     __ jccb(Assembler::equal, Lskip);
23935     __ stop("verify_vector_alignment found a misaligned vector memory access");
23936     __ bind(Lskip);
23937   %}
23938   ins_pipe(pipe_slow);
23939 %}
23940 
23941 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
23942   match(Set dst (VectorCmpMasked src1 (Binary src2 mask)));
23943   effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr);
23944   format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %}
23945   ins_encode %{
23946     assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch");
23947     assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch");
23948 
23949     Label DONE;
23950     int vlen_enc = vector_length_encoding(this, $src1);
23951     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1);
23952 
23953     __ knotql($ktmp2$$KRegister, $mask$$KRegister);
23954     __ mov64($dst$$Register, -1L);
23955     __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc);
23956     __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister);
23957     __ jccb(Assembler::carrySet, DONE);
23958     __ kmovql($dst$$Register, $ktmp1$$KRegister);
23959     __ notq($dst$$Register);
23960     __ tzcntq($dst$$Register, $dst$$Register);
23961     __ bind(DONE);
23962   %}
23963   ins_pipe( pipe_slow );
23964 %}
23965 
23966 
23967 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{
23968   match(Set dst (VectorMaskGen len));
23969   effect(TEMP temp, KILL cr);
23970   format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
23971   ins_encode %{
23972     __ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
23973   %}
23974   ins_pipe( pipe_slow );
23975 %}
23976 
23977 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
23978   match(Set dst (VectorMaskGen len));
23979   format %{ "vector_mask_gen $len \t! vector mask generator" %}
23980   effect(TEMP temp);
23981   ins_encode %{
23982     if ($len$$constant > 0) {
23983       __ mov64($temp$$Register, right_n_bits($len$$constant));
23984       __ kmovql($dst$$KRegister, $temp$$Register);
23985     } else {
23986       __ kxorql($dst$$KRegister, $dst$$KRegister, $dst$$KRegister);
23987     }
23988   %}
23989   ins_pipe( pipe_slow );
23990 %}
23991 
23992 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
23993   predicate(n->in(1)->bottom_type()->isa_pvectmask());
23994   match(Set dst (VectorMaskToLong mask));
23995   effect(TEMP dst, KILL cr);
23996   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
23997   ins_encode %{
23998     int opcode = this->ideal_Opcode();
23999     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24000     int mask_len = Matcher::vector_length(this, $mask);
24001     int mask_size = mask_len * type2aelembytes(mbt);
24002     int vlen_enc = vector_length_encoding(this, $mask);
24003     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24004                              $dst$$Register, mask_len, mask_size, vlen_enc);
24005   %}
24006   ins_pipe( pipe_slow );
24007 %}
24008 
24009 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
24010   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24011   match(Set dst (VectorMaskToLong mask));
24012   format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
24013   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24014   ins_encode %{
24015     int opcode = this->ideal_Opcode();
24016     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24017     int mask_len = Matcher::vector_length(this, $mask);
24018     int vlen_enc = vector_length_encoding(this, $mask);
24019     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24020                              $dst$$Register, mask_len, mbt, vlen_enc);
24021   %}
24022   ins_pipe( pipe_slow );
24023 %}
24024 
24025 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
24026   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24027   match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
24028   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
24029   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
24030   ins_encode %{
24031     int opcode = this->ideal_Opcode();
24032     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24033     int mask_len = Matcher::vector_length(this, $mask);
24034     int vlen_enc = vector_length_encoding(this, $mask);
24035     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24036                              $dst$$Register, mask_len, mbt, vlen_enc);
24037   %}
24038   ins_pipe( pipe_slow );
24039 %}
24040 
24041 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24042   predicate(n->in(1)->bottom_type()->isa_pvectmask());
24043   match(Set dst (VectorMaskTrueCount mask));
24044   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24045   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
24046   ins_encode %{
24047     int opcode = this->ideal_Opcode();
24048     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24049     int mask_len = Matcher::vector_length(this, $mask);
24050     int mask_size = mask_len * type2aelembytes(mbt);
24051     int vlen_enc = vector_length_encoding(this, $mask);
24052     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24053                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24054   %}
24055   ins_pipe( pipe_slow );
24056 %}
24057 
24058 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24059   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24060   match(Set dst (VectorMaskTrueCount mask));
24061   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24062   format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24063   ins_encode %{
24064     int opcode = this->ideal_Opcode();
24065     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24066     int mask_len = Matcher::vector_length(this, $mask);
24067     int vlen_enc = vector_length_encoding(this, $mask);
24068     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24069                              $tmp$$Register, mask_len, mbt, vlen_enc);
24070   %}
24071   ins_pipe( pipe_slow );
24072 %}
24073 
24074 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24075   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24076   match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
24077   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24078   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24079   ins_encode %{
24080     int opcode = this->ideal_Opcode();
24081     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24082     int mask_len = Matcher::vector_length(this, $mask);
24083     int vlen_enc = vector_length_encoding(this, $mask);
24084     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24085                              $tmp$$Register, mask_len, mbt, vlen_enc);
24086   %}
24087   ins_pipe( pipe_slow );
24088 %}
24089 
24090 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
24091   predicate(n->in(1)->bottom_type()->isa_pvectmask());
24092   match(Set dst (VectorMaskFirstTrue mask));
24093   match(Set dst (VectorMaskLastTrue mask));
24094   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
24095   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
24096   ins_encode %{
24097     int opcode = this->ideal_Opcode();
24098     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24099     int mask_len = Matcher::vector_length(this, $mask);
24100     int mask_size = mask_len * type2aelembytes(mbt);
24101     int vlen_enc = vector_length_encoding(this, $mask);
24102     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
24103                              $tmp$$Register, mask_len, mask_size, vlen_enc);
24104   %}
24105   ins_pipe( pipe_slow );
24106 %}
24107 
24108 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24109   predicate(n->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24110   match(Set dst (VectorMaskFirstTrue mask));
24111   match(Set dst (VectorMaskLastTrue mask));
24112   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24113   format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24114   ins_encode %{
24115     int opcode = this->ideal_Opcode();
24116     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24117     int mask_len = Matcher::vector_length(this, $mask);
24118     int vlen_enc = vector_length_encoding(this, $mask);
24119     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24120                              $tmp$$Register, mask_len, mbt, vlen_enc);
24121   %}
24122   ins_pipe( pipe_slow );
24123 %}
24124 
24125 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
24126   predicate(n->in(1)->in(1)->bottom_type()->isa_pvectmask() == nullptr);
24127   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
24128   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
24129   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
24130   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
24131   ins_encode %{
24132     int opcode = this->ideal_Opcode();
24133     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
24134     int mask_len = Matcher::vector_length(this, $mask);
24135     int vlen_enc = vector_length_encoding(this, $mask);
24136     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
24137                              $tmp$$Register, mask_len, mbt, vlen_enc);
24138   %}
24139   ins_pipe( pipe_slow );
24140 %}
24141 
24142 // --------------------------------- Compress/Expand Operations ---------------------------
24143 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{
24144   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
24145   match(Set dst (CompressV src mask));
24146   match(Set dst (ExpandV src mask));
24147   effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr);
24148   format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %}
24149   ins_encode %{
24150     int opcode = this->ideal_Opcode();
24151     int vlen_enc = vector_length_encoding(this);
24152     BasicType bt  = Matcher::vector_element_basic_type(this);
24153     __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register,
24154                                    $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc);
24155   %}
24156   ins_pipe( pipe_slow );
24157 %}
24158 
24159 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
24160   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
24161   match(Set dst (CompressV src mask));
24162   match(Set dst (ExpandV src mask));
24163   format %{ "vector_compress_expand $dst, $src, $mask" %}
24164   ins_encode %{
24165     int opcode = this->ideal_Opcode();
24166     int vector_len = vector_length_encoding(this);
24167     BasicType bt  = Matcher::vector_element_basic_type(this);
24168     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
24169   %}
24170   ins_pipe( pipe_slow );
24171 %}
24172 
24173 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
24174   match(Set dst (CompressM mask));
24175   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
24176   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
24177   ins_encode %{
24178     assert(this->in(1)->bottom_type()->isa_pvectmask(), "");
24179     int mask_len = Matcher::vector_length(this);
24180     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
24181   %}
24182   ins_pipe( pipe_slow );
24183 %}
24184 
24185 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
24186 
24187 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24188   predicate(!VM_Version::supports_gfni());
24189   match(Set dst (ReverseV src));
24190   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24191   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24192   ins_encode %{
24193     int vec_enc = vector_length_encoding(this);
24194     BasicType bt = Matcher::vector_element_basic_type(this);
24195     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24196                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24197   %}
24198   ins_pipe( pipe_slow );
24199 %}
24200 
24201 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{
24202   predicate(VM_Version::supports_gfni());
24203   match(Set dst (ReverseV src));
24204   effect(TEMP dst, TEMP xtmp);
24205   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %}
24206   ins_encode %{
24207     int vec_enc = vector_length_encoding(this);
24208     BasicType bt  = Matcher::vector_element_basic_type(this);
24209     InternalAddress addr = $constantaddress(jlong(0x8040201008040201));
24210     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc,
24211                                $xtmp$$XMMRegister);
24212   %}
24213   ins_pipe( pipe_slow );
24214 %}
24215 
24216 instruct vreverse_byte_reg(vec dst, vec src) %{
24217   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
24218   match(Set dst (ReverseBytesV src));
24219   effect(TEMP dst);
24220   format %{ "vector_reverse_byte $dst, $src" %}
24221   ins_encode %{
24222     int vec_enc = vector_length_encoding(this);
24223     BasicType bt = Matcher::vector_element_basic_type(this);
24224     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc);
24225   %}
24226   ins_pipe( pipe_slow );
24227 %}
24228 
24229 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
24230   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
24231   match(Set dst (ReverseBytesV src));
24232   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
24233   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
24234   ins_encode %{
24235     int vec_enc = vector_length_encoding(this);
24236     BasicType bt = Matcher::vector_element_basic_type(this);
24237     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24238                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
24239   %}
24240   ins_pipe( pipe_slow );
24241 %}
24242 
24243 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
24244 
24245 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
24246   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24247                                               Matcher::vector_length_in_bytes(n->in(1))));
24248   match(Set dst (CountLeadingZerosV src));
24249   format %{ "vector_count_leading_zeros $dst, $src" %}
24250   ins_encode %{
24251      int vlen_enc = vector_length_encoding(this, $src);
24252      BasicType bt = Matcher::vector_element_basic_type(this, $src);
24253      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
24254                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
24255   %}
24256   ins_pipe( pipe_slow );
24257 %}
24258 
24259 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
24260   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
24261                                               Matcher::vector_length_in_bytes(n->in(1))));
24262   match(Set dst (CountLeadingZerosV src mask));
24263   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
24264   ins_encode %{
24265     int vlen_enc = vector_length_encoding(this, $src);
24266     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24267     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
24268     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
24269                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
24270   %}
24271   ins_pipe( pipe_slow );
24272 %}
24273 
24274 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
24275   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
24276             VM_Version::supports_avx512cd() &&
24277             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
24278   match(Set dst (CountLeadingZerosV src));
24279   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
24280   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
24281   ins_encode %{
24282     int vlen_enc = vector_length_encoding(this, $src);
24283     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24284     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24285                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
24286   %}
24287   ins_pipe( pipe_slow );
24288 %}
24289 
24290 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
24291   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
24292   match(Set dst (CountLeadingZerosV src));
24293   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
24294   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
24295   ins_encode %{
24296     int vlen_enc = vector_length_encoding(this, $src);
24297     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24298     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24299                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
24300                                        $rtmp$$Register, true, vlen_enc);
24301   %}
24302   ins_pipe( pipe_slow );
24303 %}
24304 
24305 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
24306   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
24307             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24308   match(Set dst (CountLeadingZerosV src));
24309   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
24310   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
24311   ins_encode %{
24312     int vlen_enc = vector_length_encoding(this, $src);
24313     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24314     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24315                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
24316   %}
24317   ins_pipe( pipe_slow );
24318 %}
24319 
24320 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
24321   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
24322             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
24323   match(Set dst (CountLeadingZerosV src));
24324   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
24325   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
24326   ins_encode %{
24327     int vlen_enc = vector_length_encoding(this, $src);
24328     BasicType bt = Matcher::vector_element_basic_type(this, $src);
24329     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
24330                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
24331   %}
24332   ins_pipe( pipe_slow );
24333 %}
24334 
24335 // ---------------------------------- Vector Masked Operations ------------------------------------
24336 
24337 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
24338   match(Set dst (AddVB (Binary dst src2) mask));
24339   match(Set dst (AddVS (Binary dst src2) mask));
24340   match(Set dst (AddVI (Binary dst src2) mask));
24341   match(Set dst (AddVL (Binary dst src2) mask));
24342   match(Set dst (AddVF (Binary dst src2) mask));
24343   match(Set dst (AddVD (Binary dst src2) mask));
24344   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24345   ins_encode %{
24346     int vlen_enc = vector_length_encoding(this);
24347     BasicType bt = Matcher::vector_element_basic_type(this);
24348     int opc = this->ideal_Opcode();
24349     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24350                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24351   %}
24352   ins_pipe( pipe_slow );
24353 %}
24354 
24355 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
24356   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
24357   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
24358   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
24359   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
24360   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
24361   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
24362   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
24363   ins_encode %{
24364     int vlen_enc = vector_length_encoding(this);
24365     BasicType bt = Matcher::vector_element_basic_type(this);
24366     int opc = this->ideal_Opcode();
24367     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24368                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24369   %}
24370   ins_pipe( pipe_slow );
24371 %}
24372 
24373 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
24374   match(Set dst (XorV (Binary dst src2) mask));
24375   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24376   ins_encode %{
24377     int vlen_enc = vector_length_encoding(this);
24378     BasicType bt = Matcher::vector_element_basic_type(this);
24379     int opc = this->ideal_Opcode();
24380     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24381                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24382   %}
24383   ins_pipe( pipe_slow );
24384 %}
24385 
24386 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
24387   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
24388   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
24389   ins_encode %{
24390     int vlen_enc = vector_length_encoding(this);
24391     BasicType bt = Matcher::vector_element_basic_type(this);
24392     int opc = this->ideal_Opcode();
24393     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24394                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24395   %}
24396   ins_pipe( pipe_slow );
24397 %}
24398 
24399 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
24400   match(Set dst (OrV (Binary dst src2) mask));
24401   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24402   ins_encode %{
24403     int vlen_enc = vector_length_encoding(this);
24404     BasicType bt = Matcher::vector_element_basic_type(this);
24405     int opc = this->ideal_Opcode();
24406     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24407                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24408   %}
24409   ins_pipe( pipe_slow );
24410 %}
24411 
24412 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
24413   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
24414   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
24415   ins_encode %{
24416     int vlen_enc = vector_length_encoding(this);
24417     BasicType bt = Matcher::vector_element_basic_type(this);
24418     int opc = this->ideal_Opcode();
24419     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24420                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24421   %}
24422   ins_pipe( pipe_slow );
24423 %}
24424 
24425 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
24426   match(Set dst (AndV (Binary dst src2) mask));
24427   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24428   ins_encode %{
24429     int vlen_enc = vector_length_encoding(this);
24430     BasicType bt = Matcher::vector_element_basic_type(this);
24431     int opc = this->ideal_Opcode();
24432     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24433                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24434   %}
24435   ins_pipe( pipe_slow );
24436 %}
24437 
24438 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
24439   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
24440   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
24441   ins_encode %{
24442     int vlen_enc = vector_length_encoding(this);
24443     BasicType bt = Matcher::vector_element_basic_type(this);
24444     int opc = this->ideal_Opcode();
24445     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24446                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24447   %}
24448   ins_pipe( pipe_slow );
24449 %}
24450 
24451 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
24452   match(Set dst (SubVB (Binary dst src2) mask));
24453   match(Set dst (SubVS (Binary dst src2) mask));
24454   match(Set dst (SubVI (Binary dst src2) mask));
24455   match(Set dst (SubVL (Binary dst src2) mask));
24456   match(Set dst (SubVF (Binary dst src2) mask));
24457   match(Set dst (SubVD (Binary dst src2) mask));
24458   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24459   ins_encode %{
24460     int vlen_enc = vector_length_encoding(this);
24461     BasicType bt = Matcher::vector_element_basic_type(this);
24462     int opc = this->ideal_Opcode();
24463     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24464                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24465   %}
24466   ins_pipe( pipe_slow );
24467 %}
24468 
24469 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
24470   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
24471   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
24472   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
24473   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
24474   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
24475   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
24476   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
24477   ins_encode %{
24478     int vlen_enc = vector_length_encoding(this);
24479     BasicType bt = Matcher::vector_element_basic_type(this);
24480     int opc = this->ideal_Opcode();
24481     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24482                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24483   %}
24484   ins_pipe( pipe_slow );
24485 %}
24486 
24487 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
24488   match(Set dst (MulVS (Binary dst src2) mask));
24489   match(Set dst (MulVI (Binary dst src2) mask));
24490   match(Set dst (MulVL (Binary dst src2) mask));
24491   match(Set dst (MulVF (Binary dst src2) mask));
24492   match(Set dst (MulVD (Binary dst src2) mask));
24493   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24494   ins_encode %{
24495     int vlen_enc = vector_length_encoding(this);
24496     BasicType bt = Matcher::vector_element_basic_type(this);
24497     int opc = this->ideal_Opcode();
24498     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24499                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24500   %}
24501   ins_pipe( pipe_slow );
24502 %}
24503 
24504 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
24505   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
24506   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
24507   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
24508   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
24509   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
24510   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
24511   ins_encode %{
24512     int vlen_enc = vector_length_encoding(this);
24513     BasicType bt = Matcher::vector_element_basic_type(this);
24514     int opc = this->ideal_Opcode();
24515     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24516                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24517   %}
24518   ins_pipe( pipe_slow );
24519 %}
24520 
24521 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
24522   match(Set dst (SqrtVF dst mask));
24523   match(Set dst (SqrtVD dst mask));
24524   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
24525   ins_encode %{
24526     int vlen_enc = vector_length_encoding(this);
24527     BasicType bt = Matcher::vector_element_basic_type(this);
24528     int opc = this->ideal_Opcode();
24529     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24530                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24531   %}
24532   ins_pipe( pipe_slow );
24533 %}
24534 
24535 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
24536   match(Set dst (DivVF (Binary dst src2) mask));
24537   match(Set dst (DivVD (Binary dst src2) mask));
24538   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24539   ins_encode %{
24540     int vlen_enc = vector_length_encoding(this);
24541     BasicType bt = Matcher::vector_element_basic_type(this);
24542     int opc = this->ideal_Opcode();
24543     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24544                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24545   %}
24546   ins_pipe( pipe_slow );
24547 %}
24548 
24549 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
24550   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
24551   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
24552   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
24553   ins_encode %{
24554     int vlen_enc = vector_length_encoding(this);
24555     BasicType bt = Matcher::vector_element_basic_type(this);
24556     int opc = this->ideal_Opcode();
24557     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24558                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24559   %}
24560   ins_pipe( pipe_slow );
24561 %}
24562 
24563 
24564 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
24565   match(Set dst (RotateLeftV (Binary dst shift) mask));
24566   match(Set dst (RotateRightV (Binary dst shift) mask));
24567   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
24568   ins_encode %{
24569     int vlen_enc = vector_length_encoding(this);
24570     BasicType bt = Matcher::vector_element_basic_type(this);
24571     int opc = this->ideal_Opcode();
24572     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24573                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24574   %}
24575   ins_pipe( pipe_slow );
24576 %}
24577 
24578 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
24579   match(Set dst (RotateLeftV (Binary dst src2) mask));
24580   match(Set dst (RotateRightV (Binary dst src2) mask));
24581   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
24582   ins_encode %{
24583     int vlen_enc = vector_length_encoding(this);
24584     BasicType bt = Matcher::vector_element_basic_type(this);
24585     int opc = this->ideal_Opcode();
24586     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24587                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24588   %}
24589   ins_pipe( pipe_slow );
24590 %}
24591 
24592 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24593   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
24594   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
24595   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
24596   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
24597   ins_encode %{
24598     int vlen_enc = vector_length_encoding(this);
24599     BasicType bt = Matcher::vector_element_basic_type(this);
24600     int opc = this->ideal_Opcode();
24601     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24602                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24603   %}
24604   ins_pipe( pipe_slow );
24605 %}
24606 
24607 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
24608   predicate(!n->as_ShiftV()->is_var_shift());
24609   match(Set dst (LShiftVS (Binary dst src2) mask));
24610   match(Set dst (LShiftVI (Binary dst src2) mask));
24611   match(Set dst (LShiftVL (Binary dst src2) mask));
24612   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24613   ins_encode %{
24614     int vlen_enc = vector_length_encoding(this);
24615     BasicType bt = Matcher::vector_element_basic_type(this);
24616     int opc = this->ideal_Opcode();
24617     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24618                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24619   %}
24620   ins_pipe( pipe_slow );
24621 %}
24622 
24623 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24624   predicate(n->as_ShiftV()->is_var_shift());
24625   match(Set dst (LShiftVS (Binary dst src2) mask));
24626   match(Set dst (LShiftVI (Binary dst src2) mask));
24627   match(Set dst (LShiftVL (Binary dst src2) mask));
24628   format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
24629   ins_encode %{
24630     int vlen_enc = vector_length_encoding(this);
24631     BasicType bt = Matcher::vector_element_basic_type(this);
24632     int opc = this->ideal_Opcode();
24633     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24634                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24635   %}
24636   ins_pipe( pipe_slow );
24637 %}
24638 
24639 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24640   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
24641   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
24642   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
24643   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
24644   ins_encode %{
24645     int vlen_enc = vector_length_encoding(this);
24646     BasicType bt = Matcher::vector_element_basic_type(this);
24647     int opc = this->ideal_Opcode();
24648     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24649                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24650   %}
24651   ins_pipe( pipe_slow );
24652 %}
24653 
24654 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
24655   predicate(!n->as_ShiftV()->is_var_shift());
24656   match(Set dst (RShiftVS (Binary dst src2) mask));
24657   match(Set dst (RShiftVI (Binary dst src2) mask));
24658   match(Set dst (RShiftVL (Binary dst src2) mask));
24659   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24660   ins_encode %{
24661     int vlen_enc = vector_length_encoding(this);
24662     BasicType bt = Matcher::vector_element_basic_type(this);
24663     int opc = this->ideal_Opcode();
24664     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24665                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24666   %}
24667   ins_pipe( pipe_slow );
24668 %}
24669 
24670 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24671   predicate(n->as_ShiftV()->is_var_shift());
24672   match(Set dst (RShiftVS (Binary dst src2) mask));
24673   match(Set dst (RShiftVI (Binary dst src2) mask));
24674   match(Set dst (RShiftVL (Binary dst src2) mask));
24675   format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
24676   ins_encode %{
24677     int vlen_enc = vector_length_encoding(this);
24678     BasicType bt = Matcher::vector_element_basic_type(this);
24679     int opc = this->ideal_Opcode();
24680     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24681                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24682   %}
24683   ins_pipe( pipe_slow );
24684 %}
24685 
24686 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
24687   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
24688   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
24689   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
24690   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
24691   ins_encode %{
24692     int vlen_enc = vector_length_encoding(this);
24693     BasicType bt = Matcher::vector_element_basic_type(this);
24694     int opc = this->ideal_Opcode();
24695     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24696                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
24697   %}
24698   ins_pipe( pipe_slow );
24699 %}
24700 
24701 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
24702   predicate(!n->as_ShiftV()->is_var_shift());
24703   match(Set dst (URShiftVS (Binary dst src2) mask));
24704   match(Set dst (URShiftVI (Binary dst src2) mask));
24705   match(Set dst (URShiftVL (Binary dst src2) mask));
24706   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24707   ins_encode %{
24708     int vlen_enc = vector_length_encoding(this);
24709     BasicType bt = Matcher::vector_element_basic_type(this);
24710     int opc = this->ideal_Opcode();
24711     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24712                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false);
24713   %}
24714   ins_pipe( pipe_slow );
24715 %}
24716 
24717 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{
24718   predicate(n->as_ShiftV()->is_var_shift());
24719   match(Set dst (URShiftVS (Binary dst src2) mask));
24720   match(Set dst (URShiftVI (Binary dst src2) mask));
24721   match(Set dst (URShiftVL (Binary dst src2) mask));
24722   format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
24723   ins_encode %{
24724     int vlen_enc = vector_length_encoding(this);
24725     BasicType bt = Matcher::vector_element_basic_type(this);
24726     int opc = this->ideal_Opcode();
24727     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24728                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true);
24729   %}
24730   ins_pipe( pipe_slow );
24731 %}
24732 
24733 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
24734   match(Set dst (MaxV (Binary dst src2) mask));
24735   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24736   ins_encode %{
24737     int vlen_enc = vector_length_encoding(this);
24738     BasicType bt = Matcher::vector_element_basic_type(this);
24739     int opc = this->ideal_Opcode();
24740     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24741                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24742   %}
24743   ins_pipe( pipe_slow );
24744 %}
24745 
24746 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
24747   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
24748   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
24749   ins_encode %{
24750     int vlen_enc = vector_length_encoding(this);
24751     BasicType bt = Matcher::vector_element_basic_type(this);
24752     int opc = this->ideal_Opcode();
24753     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24754                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24755   %}
24756   ins_pipe( pipe_slow );
24757 %}
24758 
24759 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
24760   match(Set dst (MinV (Binary dst src2) mask));
24761   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24762   ins_encode %{
24763     int vlen_enc = vector_length_encoding(this);
24764     BasicType bt = Matcher::vector_element_basic_type(this);
24765     int opc = this->ideal_Opcode();
24766     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24767                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
24768   %}
24769   ins_pipe( pipe_slow );
24770 %}
24771 
24772 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
24773   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
24774   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
24775   ins_encode %{
24776     int vlen_enc = vector_length_encoding(this);
24777     BasicType bt = Matcher::vector_element_basic_type(this);
24778     int opc = this->ideal_Opcode();
24779     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24780                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
24781   %}
24782   ins_pipe( pipe_slow );
24783 %}
24784 
24785 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
24786   match(Set dst (VectorRearrange (Binary dst src2) mask));
24787   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
24788   ins_encode %{
24789     int vlen_enc = vector_length_encoding(this);
24790     BasicType bt = Matcher::vector_element_basic_type(this);
24791     int opc = this->ideal_Opcode();
24792     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24793                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
24794   %}
24795   ins_pipe( pipe_slow );
24796 %}
24797 
24798 instruct vabs_masked(vec dst, kReg mask) %{
24799   match(Set dst (AbsVB dst mask));
24800   match(Set dst (AbsVS dst mask));
24801   match(Set dst (AbsVI dst mask));
24802   match(Set dst (AbsVL dst mask));
24803   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
24804   ins_encode %{
24805     int vlen_enc = vector_length_encoding(this);
24806     BasicType bt = Matcher::vector_element_basic_type(this);
24807     int opc = this->ideal_Opcode();
24808     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24809                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
24810   %}
24811   ins_pipe( pipe_slow );
24812 %}
24813 
24814 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
24815   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
24816   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
24817   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24818   ins_encode %{
24819     assert(UseFMA, "Needs FMA instructions support.");
24820     int vlen_enc = vector_length_encoding(this);
24821     BasicType bt = Matcher::vector_element_basic_type(this);
24822     int opc = this->ideal_Opcode();
24823     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24824                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
24825   %}
24826   ins_pipe( pipe_slow );
24827 %}
24828 
24829 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
24830   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
24831   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
24832   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
24833   ins_encode %{
24834     assert(UseFMA, "Needs FMA instructions support.");
24835     int vlen_enc = vector_length_encoding(this);
24836     BasicType bt = Matcher::vector_element_basic_type(this);
24837     int opc = this->ideal_Opcode();
24838     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
24839                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
24840   %}
24841   ins_pipe( pipe_slow );
24842 %}
24843 
24844 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{
24845   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
24846   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %}
24847   ins_encode %{
24848     assert(bottom_type()->isa_pvectmask(), "TypePVectMask expected");
24849     int vlen_enc = vector_length_encoding(this, $src1);
24850     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
24851 
24852     // Comparison i
24853     switch (src1_elem_bt) {
24854       case T_BYTE: {
24855         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24856         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24857         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24858         break;
24859       }
24860       case T_SHORT: {
24861         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24862         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24863         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24864         break;
24865       }
24866       case T_INT: {
24867         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24868         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24869         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24870         break;
24871       }
24872       case T_LONG: {
24873         bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
24874         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
24875         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
24876         break;
24877       }
24878       case T_FLOAT: {
24879         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24880         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24881         break;
24882       }
24883       case T_DOUBLE: {
24884         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
24885         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
24886         break;
24887       }
24888       default: assert(false, "%s", type2name(src1_elem_bt)); break;
24889     }
24890   %}
24891   ins_pipe( pipe_slow );
24892 %}
24893 
24894 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{
24895   predicate(Matcher::vector_length(n) <= 32);
24896   match(Set dst (MaskAll src));
24897   format %{ "mask_all_evexI_LE32 $dst, $src \t" %}
24898   ins_encode %{
24899     int mask_len = Matcher::vector_length(this);
24900     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
24901   %}
24902   ins_pipe( pipe_slow );
24903 %}
24904 
24905 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
24906   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
24907   match(Set dst (XorVMask src (MaskAll cnt)));
24908   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
24909   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
24910   ins_encode %{
24911     uint masklen = Matcher::vector_length(this);
24912     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
24913   %}
24914   ins_pipe( pipe_slow );
24915 %}
24916 
24917 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
24918   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
24919             (Matcher::vector_length(n) == 16) ||
24920             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
24921   match(Set dst (XorVMask src (MaskAll cnt)));
24922   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
24923   ins_encode %{
24924     uint masklen = Matcher::vector_length(this);
24925     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
24926   %}
24927   ins_pipe( pipe_slow );
24928 %}
24929 
24930 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2) %{
24931   predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) <= 8);
24932   match(Set dst (VectorLongToMask src));
24933   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2);
24934   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2" %}
24935   ins_encode %{
24936     int mask_len = Matcher::vector_length(this);
24937     int vec_enc  = vector_length_encoding(mask_len);
24938     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24939                               $rtmp2$$Register, xnoreg, mask_len, vec_enc);
24940   %}
24941   ins_pipe( pipe_slow );
24942 %}
24943 
24944 
24945 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
24946   predicate(n->bottom_type()->isa_pvectmask() == nullptr && Matcher::vector_length(n) > 8);
24947   match(Set dst (VectorLongToMask src));
24948   effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
24949   format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
24950   ins_encode %{
24951     int mask_len = Matcher::vector_length(this);
24952     assert(mask_len <= 32, "invalid mask length");
24953     int vec_enc  = vector_length_encoding(mask_len);
24954     __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
24955                               $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
24956   %}
24957   ins_pipe( pipe_slow );
24958 %}
24959 
24960 instruct long_to_mask_evex(kReg dst, rRegL src) %{
24961   predicate(n->bottom_type()->isa_pvectmask());
24962   match(Set dst (VectorLongToMask src));
24963   format %{ "long_to_mask_evex $dst, $src\t!" %}
24964   ins_encode %{
24965     __ kmov($dst$$KRegister, $src$$Register);
24966   %}
24967   ins_pipe( pipe_slow );
24968 %}
24969 
24970 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
24971   match(Set dst (AndVMask src1 src2));
24972   match(Set dst (OrVMask src1 src2));
24973   match(Set dst (XorVMask src1 src2));
24974   effect(TEMP kscratch);
24975   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
24976   ins_encode %{
24977     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
24978     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
24979     assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal");
24980     uint masklen = Matcher::vector_length(this);
24981     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
24982     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
24983   %}
24984   ins_pipe( pipe_slow );
24985 %}
24986 
24987 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
24988   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
24989   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
24990   ins_encode %{
24991     int vlen_enc = vector_length_encoding(this);
24992     BasicType bt = Matcher::vector_element_basic_type(this);
24993     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
24994                   $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
24995   %}
24996   ins_pipe( pipe_slow );
24997 %}
24998 
24999 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
25000   match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
25001   format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
25002   ins_encode %{
25003     int vlen_enc = vector_length_encoding(this);
25004     BasicType bt = Matcher::vector_element_basic_type(this);
25005     __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
25006                   $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
25007   %}
25008   ins_pipe( pipe_slow );
25009 %}
25010 
25011 instruct castMM(kReg dst)
25012 %{
25013   match(Set dst (CastVV dst));
25014 
25015   size(0);
25016   format %{ "# castVV of $dst" %}
25017   ins_encode(/* empty encoding */);
25018   ins_cost(0);
25019   ins_pipe(empty);
25020 %}
25021 
25022 instruct castVV(vec dst)
25023 %{
25024   match(Set dst (CastVV dst));
25025 
25026   size(0);
25027   format %{ "# castVV of $dst" %}
25028   ins_encode(/* empty encoding */);
25029   ins_cost(0);
25030   ins_pipe(empty);
25031 %}
25032 
25033 instruct castVVLeg(legVec dst)
25034 %{
25035   match(Set dst (CastVV dst));
25036 
25037   size(0);
25038   format %{ "# castVV of $dst" %}
25039   ins_encode(/* empty encoding */);
25040   ins_cost(0);
25041   ins_pipe(empty);
25042 %}
25043 
25044 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr)
25045 %{
25046   match(Set dst (IsInfiniteF src));
25047   effect(TEMP ktmp, KILL cr);
25048   format %{ "float_class_check $dst, $src" %}
25049   ins_encode %{
25050     __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25051     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25052   %}
25053   ins_pipe(pipe_slow);
25054 %}
25055 
25056 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr)
25057 %{
25058   match(Set dst (IsInfiniteD src));
25059   effect(TEMP ktmp, KILL cr);
25060   format %{ "double_class_check $dst, $src" %}
25061   ins_encode %{
25062     __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18);
25063     __ kmovbl($dst$$Register, $ktmp$$KRegister);
25064   %}
25065   ins_pipe(pipe_slow);
25066 %}
25067 
25068 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2)
25069 %{
25070   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25071             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25072   match(Set dst (SaturatingAddV src1 src2));
25073   match(Set dst (SaturatingSubV src1 src2));
25074   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25075   ins_encode %{
25076     int vlen_enc = vector_length_encoding(this);
25077     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25078     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25079                             $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
25080   %}
25081   ins_pipe(pipe_slow);
25082 %}
25083 
25084 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2)
25085 %{
25086   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25087             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25088   match(Set dst (SaturatingAddV src1 src2));
25089   match(Set dst (SaturatingSubV src1 src2));
25090   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25091   ins_encode %{
25092     int vlen_enc = vector_length_encoding(this);
25093     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25094     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25095                             $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
25096   %}
25097   ins_pipe(pipe_slow);
25098 %}
25099 
25100 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2)
25101 %{
25102   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25103             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25104             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25105   match(Set dst (SaturatingAddV src1 src2));
25106   match(Set dst (SaturatingSubV src1 src2));
25107   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2);
25108   format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
25109   ins_encode %{
25110     int vlen_enc = vector_length_encoding(this);
25111     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25112     __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25113                                         $src1$$XMMRegister, $src2$$XMMRegister,
25114                                         $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25115                                         $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc);
25116   %}
25117   ins_pipe(pipe_slow);
25118 %}
25119 
25120 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4)
25121 %{
25122   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25123             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() &&
25124             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25125   match(Set dst (SaturatingAddV src1 src2));
25126   match(Set dst (SaturatingSubV src1 src2));
25127   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4);
25128   format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
25129   ins_encode %{
25130     int vlen_enc = vector_length_encoding(this);
25131     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25132     __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25133                                        $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
25134                                        $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc);
25135   %}
25136   ins_pipe(pipe_slow);
25137 %}
25138 
25139 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp)
25140 %{
25141   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25142             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25143             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25144   match(Set dst (SaturatingAddV src1 src2));
25145   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp);
25146   format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %}
25147   ins_encode %{
25148     int vlen_enc = vector_length_encoding(this);
25149     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25150     __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25151                                               $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25152   %}
25153   ins_pipe(pipe_slow);
25154 %}
25155 
25156 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3)
25157 %{
25158   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25159             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25160             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25161   match(Set dst (SaturatingAddV src1 src2));
25162   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
25163   format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
25164   ins_encode %{
25165     int vlen_enc = vector_length_encoding(this);
25166     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25167     __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25168                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc);
25169   %}
25170   ins_pipe(pipe_slow);
25171 %}
25172 
25173 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp)
25174 %{
25175   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25176             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25177             (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()));
25178   match(Set dst (SaturatingSubV src1 src2));
25179   effect(TEMP ktmp);
25180   format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %}
25181   ins_encode %{
25182     int vlen_enc = vector_length_encoding(this);
25183     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25184     __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister,
25185                                               $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc);
25186   %}
25187   ins_pipe(pipe_slow);
25188 %}
25189 
25190 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2)
25191 %{
25192   predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) &&
25193             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() &&
25194             Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl());
25195   match(Set dst (SaturatingSubV src1 src2));
25196   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
25197   format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %}
25198   ins_encode %{
25199     int vlen_enc = vector_length_encoding(this);
25200     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25201     __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25202                                              $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25203   %}
25204   ins_pipe(pipe_slow);
25205 %}
25206 
25207 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2)
25208 %{
25209   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25210             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25211   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25212   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25213   format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %}
25214   ins_encode %{
25215     int vlen_enc = vector_length_encoding(this);
25216     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25217     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25218                             $src1$$XMMRegister, $src2$$Address, false, vlen_enc);
25219   %}
25220   ins_pipe(pipe_slow);
25221 %}
25222 
25223 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2)
25224 %{
25225   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25226             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25227   match(Set dst (SaturatingAddV src1 (LoadVector src2)));
25228   match(Set dst (SaturatingSubV src1 (LoadVector src2)));
25229   format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %}
25230   ins_encode %{
25231     int vlen_enc = vector_length_encoding(this);
25232     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25233     __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister,
25234                             $src1$$XMMRegister, $src2$$Address, true, vlen_enc);
25235   %}
25236   ins_pipe(pipe_slow);
25237 %}
25238 
25239 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{
25240   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25241             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25242   match(Set dst (SaturatingAddV (Binary dst src) mask));
25243   match(Set dst (SaturatingSubV (Binary dst src) mask));
25244   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25245   ins_encode %{
25246     int vlen_enc = vector_length_encoding(this);
25247     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25248     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25249                               $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc);
25250   %}
25251   ins_pipe( pipe_slow );
25252 %}
25253 
25254 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{
25255   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25256             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25257   match(Set dst (SaturatingAddV (Binary dst src) mask));
25258   match(Set dst (SaturatingSubV (Binary dst src) mask));
25259   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25260   ins_encode %{
25261     int vlen_enc = vector_length_encoding(this);
25262     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25263     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25264                               $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc);
25265   %}
25266   ins_pipe( pipe_slow );
25267 %}
25268 
25269 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{
25270   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25271             n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
25272   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25273   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25274   format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %}
25275   ins_encode %{
25276     int vlen_enc = vector_length_encoding(this);
25277     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25278     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25279                               $dst$$XMMRegister, $src$$Address, false, true, vlen_enc);
25280   %}
25281   ins_pipe( pipe_slow );
25282 %}
25283 
25284 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{
25285   predicate(is_subword_type(Matcher::vector_element_basic_type(n)) &&
25286             n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
25287   match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask));
25288   match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask));
25289   format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %}
25290   ins_encode %{
25291     int vlen_enc = vector_length_encoding(this);
25292     BasicType elem_bt = Matcher::vector_element_basic_type(this);
25293     __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister,
25294                               $dst$$XMMRegister, $src$$Address, true, true, vlen_enc);
25295   %}
25296   ins_pipe( pipe_slow );
25297 %}
25298 
25299 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2)
25300 %{
25301   match(Set index (SelectFromTwoVector (Binary index src1) src2));
25302   format %{ "select_from_two_vector $index, $src1, $src2 \t!" %}
25303   ins_encode %{
25304     int vlen_enc = vector_length_encoding(this);
25305     BasicType bt = Matcher::vector_element_basic_type(this);
25306     __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25307   %}
25308   ins_pipe(pipe_slow);
25309 %}
25310 
25311 instruct reinterpretS2HF(regF dst, rRegI src)
25312 %{
25313   match(Set dst (ReinterpretS2HF src));
25314   format %{ "evmovw $dst, $src" %}
25315   ins_encode %{
25316     __ evmovw($dst$$XMMRegister, $src$$Register);
25317   %}
25318   ins_pipe(pipe_slow);
25319 %}
25320 
25321 instruct reinterpretHF2S(rRegI dst, regF src)
25322 %{
25323   match(Set dst (ReinterpretHF2S src));
25324   format %{ "evmovw $dst, $src" %}
25325   ins_encode %{
25326     __ evmovw($dst$$Register, $src$$XMMRegister);
25327     __ narrow_subword_type($dst$$Register, T_SHORT);
25328   %}
25329   ins_pipe(pipe_slow);
25330 %}
25331 
25332 instruct convF2HFAndS2HF(regF dst, regF src)
25333 %{
25334   match(Set dst (ReinterpretS2HF (ConvF2HF src)));
25335   format %{ "convF2HFAndS2HF $dst, $src" %}
25336   ins_encode %{
25337     __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
25338   %}
25339   ins_pipe(pipe_slow);
25340 %}
25341 
25342 instruct convHF2SAndHF2F(regF dst, regF src)
25343 %{
25344   match(Set dst (ConvHF2F (ReinterpretHF2S src)));
25345   format %{ "convHF2SAndHF2F $dst, $src" %}
25346   ins_encode %{
25347     __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit);
25348   %}
25349   ins_pipe(pipe_slow);
25350 %}
25351 
25352 instruct scalar_sqrt_HF_reg(regF dst, regF src)
25353 %{
25354   match(Set dst (SqrtHF src));
25355   format %{ "scalar_sqrt_fp16 $dst, $src" %}
25356   ins_encode %{
25357     __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister);
25358   %}
25359   ins_pipe(pipe_slow);
25360 %}
25361 
25362 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2)
25363 %{
25364   match(Set dst (AddHF src1 src2));
25365   match(Set dst (DivHF src1 src2));
25366   match(Set dst (MulHF src1 src2));
25367   match(Set dst (SubHF src1 src2));
25368   format %{ "scalar_binop_fp16 $dst, $src1, $src2" %}
25369   ins_encode %{
25370     int opcode = this->ideal_Opcode();
25371     __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
25372   %}
25373   ins_pipe(pipe_slow);
25374 %}
25375 
25376 instruct scalar_minmax_HF_reg_avx10_2(regF dst, regF src1, regF src2)
25377 %{
25378   predicate(VM_Version::supports_avx10_2());
25379   match(Set dst (MaxHF src1 src2));
25380   match(Set dst (MinHF src1 src2));
25381 
25382   format %{ "scalar_min_max_fp16 $dst, $src1, $src2" %}
25383   ins_encode %{
25384     int opcode = this->ideal_Opcode();
25385     __ sminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, k0);
25386   %}
25387   ins_pipe( pipe_slow );
25388 %}
25389 
25390 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2)
25391 %{
25392   predicate(!VM_Version::supports_avx10_2());
25393   match(Set dst (MaxHF src1 src2));
25394   match(Set dst (MinHF src1 src2));
25395   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25396 
25397   format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25398   ins_encode %{
25399     int opcode = this->ideal_Opcode();
25400     __ sminmax_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister,
25401                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister);
25402   %}
25403   ins_pipe( pipe_slow );
25404 %}
25405 
25406 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2)
25407 %{
25408   match(Set dst (FmaHF  src2 (Binary dst src1)));
25409   effect(DEF dst);
25410   format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25411   ins_encode %{
25412     __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister);
25413   %}
25414   ins_pipe( pipe_slow );
25415 %}
25416 
25417 
25418 instruct vector_sqrt_HF_reg(vec dst, vec src)
25419 %{
25420   match(Set dst (SqrtVHF src));
25421   format %{ "vector_sqrt_fp16 $dst, $src" %}
25422   ins_encode %{
25423     int vlen_enc = vector_length_encoding(this);
25424     __ evsqrtph($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
25425   %}
25426   ins_pipe(pipe_slow);
25427 %}
25428 
25429 instruct vector_sqrt_HF_mem(vec dst, memory src)
25430 %{
25431   match(Set dst (SqrtVHF (VectorReinterpret (LoadVector src))));
25432   format %{ "vector_sqrt_fp16_mem $dst, $src" %}
25433   ins_encode %{
25434     int vlen_enc = vector_length_encoding(this);
25435     __ evsqrtph($dst$$XMMRegister, $src$$Address, vlen_enc);
25436   %}
25437   ins_pipe(pipe_slow);
25438 %}
25439 
25440 instruct vector_binOps_HF_reg(vec dst, vec src1, vec src2)
25441 %{
25442   match(Set dst (AddVHF src1 src2));
25443   match(Set dst (DivVHF src1 src2));
25444   match(Set dst (MulVHF src1 src2));
25445   match(Set dst (SubVHF src1 src2));
25446   format %{ "vector_binop_fp16 $dst, $src1, $src2" %}
25447   ins_encode %{
25448     int vlen_enc = vector_length_encoding(this);
25449     int opcode = this->ideal_Opcode();
25450     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
25451   %}
25452   ins_pipe(pipe_slow);
25453 %}
25454 
25455 
25456 instruct vector_binOps_HF_mem(vec dst, vec src1, memory src2)
25457 %{
25458   match(Set dst (AddVHF src1 (VectorReinterpret (LoadVector src2))));
25459   match(Set dst (DivVHF src1 (VectorReinterpret (LoadVector src2))));
25460   match(Set dst (MulVHF src1 (VectorReinterpret (LoadVector src2))));
25461   match(Set dst (SubVHF src1 (VectorReinterpret (LoadVector src2))));
25462   format %{ "vector_binop_fp16_mem $dst, $src1, $src2" %}
25463   ins_encode %{
25464     int vlen_enc = vector_length_encoding(this);
25465     int opcode = this->ideal_Opcode();
25466     __ evfp16ph(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address, vlen_enc);
25467   %}
25468   ins_pipe(pipe_slow);
25469 %}
25470 
25471 instruct vector_fma_HF_reg(vec dst, vec src1, vec src2)
25472 %{
25473   match(Set dst (FmaVHF src2 (Binary dst src1)));
25474   format %{ "vector_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25475   ins_encode %{
25476     int vlen_enc = vector_length_encoding(this);
25477     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
25478   %}
25479   ins_pipe( pipe_slow );
25480 %}
25481 
25482 instruct vector_fma_HF_mem(vec dst, memory src1, vec src2)
25483 %{
25484   match(Set dst (FmaVHF src2 (Binary dst (VectorReinterpret (LoadVector src1)))));
25485   format %{ "vector_fma_fp16_mem $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %}
25486   ins_encode %{
25487     int vlen_enc = vector_length_encoding(this);
25488     __ evfmadd132ph($dst$$XMMRegister, $src2$$XMMRegister, $src1$$Address, vlen_enc);
25489   %}
25490   ins_pipe( pipe_slow );
25491 %}
25492 
25493 instruct vector_minmax_HF_mem_avx10_2(vec dst, vec src1, memory src2)
25494 %{
25495   predicate(VM_Version::supports_avx10_2());
25496   match(Set dst (MinVHF src1 (VectorReinterpret (LoadVector src2))));
25497   match(Set dst (MaxVHF src1 (VectorReinterpret (LoadVector src2))));
25498   format %{ "vector_min_max_fp16_mem $dst, $src1, $src2" %}
25499   ins_encode %{
25500     int vlen_enc = vector_length_encoding(this);
25501     int opcode = this->ideal_Opcode();
25502     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address,
25503                             k0, vlen_enc);
25504   %}
25505   ins_pipe( pipe_slow );
25506 %}
25507 
25508 instruct vector_minmax_HF_reg_avx10_2(vec dst, vec src1, vec src2)
25509 %{
25510   predicate(VM_Version::supports_avx10_2());
25511   match(Set dst (MinVHF src1 src2));
25512   match(Set dst (MaxVHF src1 src2));
25513   format %{ "vector_min_max_fp16 $dst, $src1, $src2" %}
25514   ins_encode %{
25515     int vlen_enc = vector_length_encoding(this);
25516     int opcode = this->ideal_Opcode();
25517     __ vminmax_fp16_avx10_2(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
25518                             k0, vlen_enc);
25519   %}
25520   ins_pipe( pipe_slow );
25521 %}
25522 
25523 instruct vector_minmax_HF_reg(vec dst, vec src1, vec src2, kReg ktmp, vec xtmp1, vec xtmp2)
25524 %{
25525   predicate(!VM_Version::supports_avx10_2());
25526   match(Set dst (MinVHF src1 src2));
25527   match(Set dst (MaxVHF src1 src2));
25528   effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2);
25529   format %{ "vector_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %}
25530   ins_encode %{
25531     int vlen_enc = vector_length_encoding(this);
25532     int opcode = this->ideal_Opcode();
25533     __ vminmax_fp16(opcode, $dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister, $ktmp$$KRegister,
25534                     $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc);
25535   %}
25536   ins_pipe( pipe_slow );
25537 %}
25538 
25539 //----------PEEPHOLE RULES-----------------------------------------------------
25540 // These must follow all instruction definitions as they use the names
25541 // defined in the instructions definitions.
25542 //
25543 // peeppredicate ( rule_predicate );
25544 // // the predicate unless which the peephole rule will be ignored
25545 //
25546 // peepmatch ( root_instr_name [preceding_instruction]* );
25547 //
25548 // peepprocedure ( procedure_name );
25549 // // provide a procedure name to perform the optimization, the procedure should
25550 // // reside in the architecture dependent peephole file, the method has the
25551 // // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
25552 // // with the arguments being the basic block, the current node index inside the
25553 // // block, the register allocator, the functions upon invoked return a new node
25554 // // defined in peepreplace, and the rules of the nodes appearing in the
25555 // // corresponding peepmatch, the function return true if successful, else
25556 // // return false
25557 //
25558 // peepconstraint %{
25559 // (instruction_number.operand_name relational_op instruction_number.operand_name
25560 //  [, ...] );
25561 // // instruction numbers are zero-based using left to right order in peepmatch
25562 //
25563 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
25564 // // provide an instruction_number.operand_name for each operand that appears
25565 // // in the replacement instruction's match rule
25566 //
25567 // ---------VM FLAGS---------------------------------------------------------
25568 //
25569 // All peephole optimizations can be turned off using -XX:-OptoPeephole
25570 //
25571 // Each peephole rule is given an identifying number starting with zero and
25572 // increasing by one in the order seen by the parser.  An individual peephole
25573 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
25574 // on the command-line.
25575 //
25576 // ---------CURRENT LIMITATIONS----------------------------------------------
25577 //
25578 // Only transformations inside a basic block (do we need more for peephole)
25579 //
25580 // ---------EXAMPLE----------------------------------------------------------
25581 //
25582 // // pertinent parts of existing instructions in architecture description
25583 // instruct movI(rRegI dst, rRegI src)
25584 // %{
25585 //   match(Set dst (CopyI src));
25586 // %}
25587 //
25588 // instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
25589 // %{
25590 //   match(Set dst (AddI dst src));
25591 //   effect(KILL cr);
25592 // %}
25593 //
25594 // instruct leaI_rReg_immI(rRegI dst, immI_1 src)
25595 // %{
25596 //   match(Set dst (AddI dst src));
25597 // %}
25598 //
25599 // 1. Simple replacement
25600 // - Only match adjacent instructions in same basic block
25601 // - Only equality constraints
25602 // - Only constraints between operands, not (0.dest_reg == RAX_enc)
25603 // - Only one replacement instruction
25604 //
25605 // // Change (inc mov) to lea
25606 // peephole %{
25607 //   // lea should only be emitted when beneficial
25608 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25609 //   // increment preceded by register-register move
25610 //   peepmatch ( incI_rReg movI );
25611 //   // require that the destination register of the increment
25612 //   // match the destination register of the move
25613 //   peepconstraint ( 0.dst == 1.dst );
25614 //   // construct a replacement instruction that sets
25615 //   // the destination to ( move's source register + one )
25616 //   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
25617 // %}
25618 //
25619 // 2. Procedural replacement
25620 // - More flexible finding relevent nodes
25621 // - More flexible constraints
25622 // - More flexible transformations
25623 // - May utilise architecture-dependent API more effectively
25624 // - Currently only one replacement instruction due to adlc parsing capabilities
25625 //
25626 // // Change (inc mov) to lea
25627 // peephole %{
25628 //   // lea should only be emitted when beneficial
25629 //   peeppredicate( VM_Version::supports_fast_2op_lea() );
25630 //   // the rule numbers of these nodes inside are passed into the function below
25631 //   peepmatch ( incI_rReg movI );
25632 //   // the method that takes the responsibility of transformation
25633 //   peepprocedure ( inc_mov_to_lea );
25634 //   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
25635 //   // node is passed into the function above
25636 //   peepreplace ( leaI_rReg_immI() );
25637 // %}
25638 
25639 // These instructions is not matched by the matcher but used by the peephole
25640 instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
25641 %{
25642   predicate(false);
25643   match(Set dst (AddI src1 src2));
25644   format %{ "leal    $dst, [$src1 + $src2]" %}
25645   ins_encode %{
25646     Register dst = $dst$$Register;
25647     Register src1 = $src1$$Register;
25648     Register src2 = $src2$$Register;
25649     if (src1 != rbp && src1 != r13) {
25650       __ leal(dst, Address(src1, src2, Address::times_1));
25651     } else {
25652       assert(src2 != rbp && src2 != r13, "");
25653       __ leal(dst, Address(src2, src1, Address::times_1));
25654     }
25655   %}
25656   ins_pipe(ialu_reg_reg);
25657 %}
25658 
25659 instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
25660 %{
25661   predicate(false);
25662   match(Set dst (AddI src1 src2));
25663   format %{ "leal    $dst, [$src1 + $src2]" %}
25664   ins_encode %{
25665     __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
25666   %}
25667   ins_pipe(ialu_reg_reg);
25668 %}
25669 
25670 instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
25671 %{
25672   predicate(false);
25673   match(Set dst (LShiftI src shift));
25674   format %{ "leal    $dst, [$src << $shift]" %}
25675   ins_encode %{
25676     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25677     Register src = $src$$Register;
25678     if (scale == Address::times_2 && src != rbp && src != r13) {
25679       __ leal($dst$$Register, Address(src, src, Address::times_1));
25680     } else {
25681       __ leal($dst$$Register, Address(noreg, src, scale));
25682     }
25683   %}
25684   ins_pipe(ialu_reg_reg);
25685 %}
25686 
25687 instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
25688 %{
25689   predicate(false);
25690   match(Set dst (AddL src1 src2));
25691   format %{ "leaq    $dst, [$src1 + $src2]" %}
25692   ins_encode %{
25693     Register dst = $dst$$Register;
25694     Register src1 = $src1$$Register;
25695     Register src2 = $src2$$Register;
25696     if (src1 != rbp && src1 != r13) {
25697       __ leaq(dst, Address(src1, src2, Address::times_1));
25698     } else {
25699       assert(src2 != rbp && src2 != r13, "");
25700       __ leaq(dst, Address(src2, src1, Address::times_1));
25701     }
25702   %}
25703   ins_pipe(ialu_reg_reg);
25704 %}
25705 
25706 instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
25707 %{
25708   predicate(false);
25709   match(Set dst (AddL src1 src2));
25710   format %{ "leaq    $dst, [$src1 + $src2]" %}
25711   ins_encode %{
25712     __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
25713   %}
25714   ins_pipe(ialu_reg_reg);
25715 %}
25716 
25717 instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
25718 %{
25719   predicate(false);
25720   match(Set dst (LShiftL src shift));
25721   format %{ "leaq    $dst, [$src << $shift]" %}
25722   ins_encode %{
25723     Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
25724     Register src = $src$$Register;
25725     if (scale == Address::times_2 && src != rbp && src != r13) {
25726       __ leaq($dst$$Register, Address(src, src, Address::times_1));
25727     } else {
25728       __ leaq($dst$$Register, Address(noreg, src, scale));
25729     }
25730   %}
25731   ins_pipe(ialu_reg_reg);
25732 %}
25733 
25734 // These peephole rules replace mov + I pairs (where I is one of {add, inc, dec,
25735 // sal}) with lea instructions. The {add, sal} rules are beneficial in
25736 // processors with at least partial ALU support for lea
25737 // (supports_fast_2op_lea()), whereas the {inc, dec} rules are only generally
25738 // beneficial for processors with full ALU support
25739 // (VM_Version::supports_fast_3op_lea()) and Intel Cascade Lake.
25740 
25741 peephole
25742 %{
25743   peeppredicate(VM_Version::supports_fast_2op_lea());
25744   peepmatch (addI_rReg);
25745   peepprocedure (lea_coalesce_reg);
25746   peepreplace (leaI_rReg_rReg_peep());
25747 %}
25748 
25749 peephole
25750 %{
25751   peeppredicate(VM_Version::supports_fast_2op_lea());
25752   peepmatch (addI_rReg_imm);
25753   peepprocedure (lea_coalesce_imm);
25754   peepreplace (leaI_rReg_immI_peep());
25755 %}
25756 
25757 peephole
25758 %{
25759   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25760                 VM_Version::is_intel_cascade_lake());
25761   peepmatch (incI_rReg);
25762   peepprocedure (lea_coalesce_imm);
25763   peepreplace (leaI_rReg_immI_peep());
25764 %}
25765 
25766 peephole
25767 %{
25768   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25769                 VM_Version::is_intel_cascade_lake());
25770   peepmatch (decI_rReg);
25771   peepprocedure (lea_coalesce_imm);
25772   peepreplace (leaI_rReg_immI_peep());
25773 %}
25774 
25775 peephole
25776 %{
25777   peeppredicate(VM_Version::supports_fast_2op_lea());
25778   peepmatch (salI_rReg_immI2);
25779   peepprocedure (lea_coalesce_imm);
25780   peepreplace (leaI_rReg_immI2_peep());
25781 %}
25782 
25783 peephole
25784 %{
25785   peeppredicate(VM_Version::supports_fast_2op_lea());
25786   peepmatch (addL_rReg);
25787   peepprocedure (lea_coalesce_reg);
25788   peepreplace (leaL_rReg_rReg_peep());
25789 %}
25790 
25791 peephole
25792 %{
25793   peeppredicate(VM_Version::supports_fast_2op_lea());
25794   peepmatch (addL_rReg_imm);
25795   peepprocedure (lea_coalesce_imm);
25796   peepreplace (leaL_rReg_immL32_peep());
25797 %}
25798 
25799 peephole
25800 %{
25801   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25802                 VM_Version::is_intel_cascade_lake());
25803   peepmatch (incL_rReg);
25804   peepprocedure (lea_coalesce_imm);
25805   peepreplace (leaL_rReg_immL32_peep());
25806 %}
25807 
25808 peephole
25809 %{
25810   peeppredicate(VM_Version::supports_fast_3op_lea() ||
25811                 VM_Version::is_intel_cascade_lake());
25812   peepmatch (decL_rReg);
25813   peepprocedure (lea_coalesce_imm);
25814   peepreplace (leaL_rReg_immL32_peep());
25815 %}
25816 
25817 peephole
25818 %{
25819   peeppredicate(VM_Version::supports_fast_2op_lea());
25820   peepmatch (salL_rReg_immI2);
25821   peepprocedure (lea_coalesce_imm);
25822   peepreplace (leaL_rReg_immI2_peep());
25823 %}
25824 
25825 peephole
25826 %{
25827   peepmatch (leaPCompressedOopOffset);
25828   peepprocedure (lea_remove_redundant);
25829 %}
25830 
25831 peephole
25832 %{
25833   peepmatch (leaP8Narrow);
25834   peepprocedure (lea_remove_redundant);
25835 %}
25836 
25837 peephole
25838 %{
25839   peepmatch (leaP32Narrow);
25840   peepprocedure (lea_remove_redundant);
25841 %}
25842 
25843 // These peephole rules matches instructions which set flags and are followed by a testI/L_reg
25844 // The test instruction is redudanent in case the downstream instuctions (like JCC or CMOV) only use flags that are already set by the previous instruction
25845 
25846 //int variant
25847 peephole
25848 %{
25849   peepmatch (testI_reg);
25850   peepprocedure (test_may_remove);
25851 %}
25852 
25853 //long variant
25854 peephole
25855 %{
25856   peepmatch (testL_reg);
25857   peepprocedure (test_may_remove);
25858 %}
25859 
25860 
25861 //----------SMARTSPILL RULES---------------------------------------------------
25862 // These must follow all instruction definitions as they use the names
25863 // defined in the instructions definitions.